From 21209e2bdd48a16cca4d74d000d2f0494c78d3f5 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 1 Jul 2024 13:17:38 -0600
Subject: [PATCH 01/22] Extract the `Doc` lexer into a top-level function

---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 857 ++++++++++++-----------
 1 file changed, 429 insertions(+), 428 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 14fe31f9a7..144ccd95c3 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -400,6 +400,435 @@ restoreStack lbl p = do
   S.put (s2 {layout = layout1})
   pure $ p <> closes
 
+-- | The `Doc` lexer as documented on unison-lang.org
+doc2 :: P [Token Lexeme]
+doc2 = do
+  -- Ensure we're at a doc before we start consuming tokens
+  P.lookAhead (lit "{{")
+  openStart <- posP
+  -- Produce any layout tokens, such as closing the last open block or virtual semicolons
+  -- We don't use 'token' on "{{" directly because we don't want to duplicate layout
+  -- tokens if we do the rewrite hack for type-docs below.
+  beforeStartToks <- token' ignore (pure ())
+  void $ lit "{{"
+  openEnd <- posP
+  CP.space
+  -- Construct the token for opening the doc block.
+  let openTok = Token (Open "syntax.docUntitledSection") openStart openEnd
+  env0 <- S.get
+  -- Disable layout while parsing the doc block and reset the section number
+  (bodyToks0, closeTok) <- local
+    ( \env ->
+        env
+          { inLayout = False,
+            parentSections = 0 : (parentSections env0)
+          }
+    )
+    do
+      bodyToks <- body
+      closeStart <- posP
+      lit "}}"
+      closeEnd <- posP
+      pure (bodyToks, Token Close closeStart closeEnd)
+  let docToks = beforeStartToks <> [openTok] <> bodyToks0 <> [closeTok]
+  -- Parse any layout tokens after the doc block, e.g. virtual semicolon
+  endToks <- token' ignore (pure ())
+  -- Hack to allow anonymous doc blocks before type decls
+  --   {{ Some docs }}             Foo.doc = {{ Some docs }}
+  --   ability Foo where      =>   ability Foo where
+  tn <- subsequentTypeName
+  pure $ case (tn) of
+    -- If we're followed by a type, we rewrite the doc block to be a named doc block.
+    (Just (WordyId tname))
+      | isTopLevel ->
+          beforeStartToks
+            <> [WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment)) <$ openTok, Open "=" <$ openTok]
+            <> [openTok]
+            <> bodyToks0
+            <> [closeTok]
+            -- We need an extra 'Close' here because we added an extra Open above.
+            <> [closeTok]
+            <> endToks
+      where
+        isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
+    _ -> docToks <> endToks
+  where
+    wordyKw kw = separated wordySep (lit kw)
+    subsequentTypeName = P.lookAhead . P.optional $ do
+      let lit' s = lit s <* sp
+      let modifier = typeModifiersAlt (lit' . Text.unpack)
+      let typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
+      _ <- optional modifier *> typeOrAbility' *> sp
+      Token name start stop <- tokenP identifierP
+      if Name.isSymboly (HQ'.toName name)
+        then P.customFailure (Token (InvalidSymbolyId (Text.unpack (HQ'.toTextWith Name.toText name))) start stop)
+        else pure (WordyId name)
+    ignore _ _ _ = []
+    body = join <$> P.many (sectionElem <* CP.space)
+    sectionElem = section <|> fencedBlock <|> list <|> paragraph
+    paragraph = wrap "syntax.docParagraph" $ join <$> spaced leaf
+    reserved word = List.isPrefixOf "}}" word || all (== '#') word
+
+    wordy closing = wrap "syntax.docWord" . tok . fmap Textual . P.try $ do
+      let end =
+            P.lookAhead $
+              void docClose
+                <|> void docOpen
+                <|> void (P.satisfy isSpace)
+                <|> void closing
+      word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
+      guard (not $ reserved word || null word)
+      pure word
+
+    leafy closing = groupy closing gs
+      where
+        gs =
+          link
+            <|> externalLink
+            <|> exampleInline
+            <|> expr
+            <|> boldOrItalicOrStrikethrough closing
+            <|> verbatim
+            <|> atDoc
+            <|> wordy closing
+
+    leaf = leafy mzero
+
+    atDoc = src <|> evalInline <|> signature <|> signatureInline
+      where
+        comma = lit "," <* CP.space
+        src =
+          src' "syntax.docSource" "@source"
+            <|> src' "syntax.docFoldedSource" "@foldedSource"
+        srcElem =
+          wrap "syntax.docSourceElement" $
+            (typeLink <|> termLink)
+              <+> ( fmap (fromMaybe []) . P.optional $
+                      (tok (Reserved <$> lit "@") <+> (CP.space *> annotations))
+                  )
+          where
+            annotation = tok identifierLexemeP <|> expr <* CP.space
+            annotations =
+              join <$> P.some (wrap "syntax.docEmbedAnnotation" annotation)
+        src' name atName = wrap name $ do
+          _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
+          s <- P.sepBy1 srcElem comma
+          _ <- lit "}"
+          pure (join s)
+        signature = wrap "syntax.docSignature" $ do
+          _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
+          s <- join <$> P.sepBy1 signatureLink comma
+          _ <- lit "}"
+          pure s
+        signatureInline = wrap "syntax.docSignatureInline" $ do
+          _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
+          s <- signatureLink
+          _ <- lit "}"
+          pure s
+        evalInline = wrap "syntax.docEvalInline" $ do
+          _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
+          let inlineEvalClose = [] <$ lit "}"
+          s <- lexemes' inlineEvalClose
+          pure s
+
+    typeLink = wrap "syntax.docEmbedTypeLink" do
+      _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
+      tok identifierLexemeP <* CP.space
+
+    termLink =
+      wrap "syntax.docEmbedTermLink" $
+        tok identifierLexemeP <* CP.space
+
+    signatureLink =
+      wrap "syntax.docEmbedSignatureLink" $
+        tok identifierLexemeP <* CP.space
+
+    groupy closing p = do
+      Token p start stop <- tokenP p
+      after <- P.optional . P.try $ leafy closing
+      pure $ case after of
+        Nothing -> p
+        Just after ->
+          [ Token (Open "syntax.docGroup") start stop',
+            Token (Open "syntax.docJoin") start stop'
+          ]
+            <> p
+            <> after
+            <> (take 2 $ repeat (Token Close stop' stop'))
+          where
+            stop' = maybe stop end (lastMay after)
+
+    verbatim =
+      P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
+        Token originalText start stop <- tokenP do
+          -- a single backtick followed by a non-backtick is treated as monospaced
+          let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
+          -- also two or more ' followed by that number of closing '
+          quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
+          P.someTill P.anySingle (lit quotes)
+        let isMultiLine = line start /= line stop
+        if isMultiLine
+          then do
+            let trimmed = (trimAroundDelimiters originalText)
+            let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+            -- If it's a multi-line verbatim block we trim any whitespace representing
+            -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
+            wrap "syntax.docVerbatim" $
+              wrap "syntax.docWord" $
+                pure [Token (Textual txt) start stop]
+          else
+            wrap "syntax.docCode" $
+              wrap "syntax.docWord" $
+                pure [Token (Textual originalText) start stop]
+
+    exampleInline =
+      P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
+        wrap "syntax.docExample" $ do
+          n <- P.try $ do
+            _ <- lit "`"
+            length <$> P.takeWhile1P (Just "backticks") (== '`')
+          let end :: P [Token Lexeme] = [] <$ lit (replicate (n + 1) '`')
+          ex <- CP.space *> lexemes' end
+          pure ex
+
+    docClose = [] <$ lit "}}"
+    docOpen = [] <$ lit "{{"
+
+    link =
+      P.label "link (examples: {type List}, {Nat.+})" $
+        wrap "syntax.docLink" $
+          P.try $
+            lit "{" *> (typeLink <|> termLink) <* lit "}"
+
+    expr =
+      P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+        openAs "{{" "syntax.docTransclude"
+          <+> do
+            env0 <- S.get
+            -- we re-allow layout within a transclusion, then restore it to its
+            -- previous state after
+            S.put (env0 {inLayout = True})
+            -- Note: this P.lookAhead ensures the }} isn't consumed,
+            -- so it can be consumed below by the `close` which will
+            -- pop items off the layout stack up to the nearest enclosing
+            -- syntax.docTransclude.
+            ts <- lexemes' (P.lookAhead ([] <$ lit "}}"))
+            S.modify (\env -> env {inLayout = inLayout env0})
+            pure ts
+          <+> close ["syntax.docTransclude"] (lit "}}")
+
+    nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
+    nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
+
+    -- Allows whitespace or a newline, but not more than two newlines in a row.
+    whitespaceWithoutParagraphBreak :: P ()
+    whitespaceWithoutParagraphBreak = void do
+      void nonNewlineSpaces
+      optional newline >>= \case
+        Just _ -> void nonNewlineSpaces
+        Nothing -> pure ()
+
+    fencedBlock =
+      P.label "block eval (syntax: a fenced code block)" $
+        evalUnison <|> exampleBlock <|> other
+      where
+        evalUnison = wrap "syntax.docEval" $ do
+          -- commit after seeing that ``` is on its own line
+          fence <- P.try $ do
+            fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+            b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
+            fence <$ guard b
+          CP.space
+            *> local
+              (\env -> env {inLayout = True, opening = Just "docEval"})
+              (restoreStack "docEval" $ lexemes' ([] <$ lit fence))
+
+        exampleBlock = wrap "syntax.docExampleBlock" $ do
+          void $ lit "@typecheck" <* CP.space
+          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+          local
+            (\env -> env {inLayout = True, opening = Just "docExampleBlock"})
+            (restoreStack "docExampleBlock" $ lexemes' ([] <$ lit fence))
+
+        uncolumn column tabWidth s =
+          let skip col r | col < 1 = r
+              skip col s@('\t' : _) | col < tabWidth = s
+              skip col ('\t' : r) = skip (col - tabWidth) r
+              skip col (c : r)
+                | isSpace c && (not $ isControl c) =
+                    skip (col - 1) r
+              skip _ s = s
+           in List.intercalate "\n" $ skip column <$> lines s
+
+        other = wrap "syntax.docCodeBlock" $ do
+          column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
+          let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
+          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+          name <-
+            P.takeWhileP Nothing nonNewlineSpace
+              *> tok (Textual <$> P.takeWhile1P Nothing (not . isSpace))
+              <* P.takeWhileP Nothing nonNewlineSpace
+          _ <- void CP.eol
+          verbatim <-
+            tok $
+              Textual . uncolumn column tabWidth . trimAroundDelimiters
+                <$> P.someTill P.anySingle ([] <$ lit fence)
+          pure (name <> verbatim)
+
+    boldOrItalicOrStrikethrough closing = do
+      let start =
+            some (P.satisfy (== '*'))
+              <|> some (P.satisfy (== '_'))
+              <|> some
+                (P.satisfy (== '~'))
+          name s =
+            if take 1 s == "~"
+              then "syntax.docStrikethrough"
+              else if take 1 s == "*" then "syntax.docBold" else "syntax.docItalic"
+      end <- P.try $ do
+        end <- start
+        P.lookAhead (P.satisfy (not . isSpace))
+        pure end
+      wrap (name end) . wrap "syntax.docParagraph" $
+        join
+          <$> P.someTill
+            (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
+            (lit end)
+
+    externalLink =
+      P.label "hyperlink (example: [link name](https://destination.com))" $
+        wrap "syntax.docNamedLink" $ do
+          _ <- lit "["
+          p <- leafies (void $ char ']')
+          _ <- lit "]"
+          _ <- lit "("
+          target <-
+            wrap "syntax.docGroup" . wrap "syntax.docJoin" $
+              link <|> fmap join (P.some (expr <|> wordy (char ')')))
+          _ <- lit ")"
+          pure (p <> target)
+
+    -- newline = P.optional (lit "\r") *> lit "\n"
+
+    sp = P.try $ do
+      spaces <- P.takeWhile1P (Just "space") isSpace
+      close <- P.optional (P.lookAhead (lit "}}"))
+      case close of
+        Nothing -> guard $ ok spaces
+        Just _ -> pure ()
+      pure spaces
+      where
+        ok s = length [() | '\n' <- s] < 2
+
+    spaced p = P.some (p <* P.optional sp)
+    leafies close = wrap "syntax.docParagraph" $ join <$> spaced (leafy close)
+
+    list = bulletedList <|> numberedList
+
+    bulletedList = wrap "syntax.docBulletedList" $ join <$> P.sepBy1 bullet listSep
+    numberedList = wrap "syntax.docNumberedList" $ join <$> P.sepBy1 numberedItem listSep
+
+    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (bulletedStart <|> numberedStart)
+
+    bulletedStart = P.try $ do
+      r <- listItemStart' $ [] <$ P.satisfy bulletChar
+      P.lookAhead (P.satisfy isSpace)
+      pure r
+      where
+        bulletChar ch = ch == '*' || ch == '-' || ch == '+'
+
+    listItemStart' gutter = P.try $ do
+      nonNewlineSpaces
+      col <- column <$> posP
+      parentCol <- S.gets parentListColumn
+      guard (col > parentCol)
+      (col,) <$> gutter
+
+    numberedStart =
+      listItemStart' $ P.try (tok . fmap num $ LP.decimal <* lit ".")
+      where
+        num :: Word -> Lexeme
+        num n = Numeric (show n)
+
+    listItemParagraph = wrap "syntax.docParagraph" $ do
+      col <- column <$> posP
+      join <$> P.some (leaf <* sep col)
+      where
+        -- Trickiness here to support hard line breaks inside of
+        -- a bulleted list, so for instance this parses as expected:
+        --
+        --   * uno dos
+        --     tres quatro
+        --   * alice bob
+        --     carol dave eve
+        sep col = do
+          _ <- nonNewlineSpaces
+          _ <-
+            P.optional . P.try $
+              newline
+                *> nonNewlineSpaces
+                *> do
+                  col2 <- column <$> posP
+                  guard $ col2 >= col
+                  (P.notFollowedBy $ numberedStart <|> bulletedStart)
+          pure ()
+
+    numberedItem = P.label msg $ do
+      (col, s) <- numberedStart
+      pure s
+        <+> ( wrap "syntax.docColumn" $ do
+                p <- nonNewlineSpaces *> listItemParagraph
+                subList <-
+                  local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
+                pure (p <> fromMaybe [] subList)
+            )
+      where
+        msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
+
+    bullet = wrap "syntax.docColumn" . P.label "bullet (examples: * item1, - item2)" $ do
+      (col, _) <- bulletedStart
+      p <- nonNewlineSpaces *> listItemParagraph
+      subList <-
+        local
+          (\e -> e {parentListColumn = col})
+          (P.optional $ listSep *> list)
+      pure (p <> fromMaybe [] subList)
+
+    newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
+
+    -- ## Section title
+    --
+    -- A paragraph under this section.
+    -- Part of the same paragraph. Blanklines separate paragraphs.
+    --
+    -- ### A subsection title
+    --
+    -- A paragraph under this subsection.
+
+    -- # A section title (not a subsection)
+    section :: P [Token Lexeme]
+    section = wrap "syntax.docSection" $ do
+      ns <- S.gets parentSections
+      hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
+      title <- paragraph <* CP.space
+      let m = length hashes + head ns
+      body <-
+        local (\env -> env {parentSections = (m : (tail ns))}) $
+          P.many (sectionElem <* CP.space)
+      pure $ title <> join body
+
+    wrap :: String -> P [Token Lexeme] -> P [Token Lexeme]
+    wrap o p = do
+      start <- posP
+      lexemes <- p
+      pure $ go start lexemes
+      where
+        go start [] = [Token (Open o) start start, Token Close start start]
+        go start ts@(Token _ x _ : _) =
+          Token (Open o) start x : (ts ++ [Token Close (end final) (end final)])
+          where
+            final = last ts
+
 lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
 lexemes' eof =
   P.optional space >> do
@@ -418,434 +847,6 @@ lexemes' eof =
         <|> token identifierLexemeP
         <|> (asum . map token) [semi, textual, hash]
 
-    doc2 :: P [Token Lexeme]
-    doc2 = do
-      -- Ensure we're at a doc before we start consuming tokens
-      P.lookAhead (lit "{{")
-      openStart <- posP
-      -- Produce any layout tokens, such as closing the last open block or virtual semicolons
-      -- We don't use 'token' on "{{" directly because we don't want to duplicate layout
-      -- tokens if we do the rewrite hack for type-docs below.
-      beforeStartToks <- token' ignore (pure ())
-      void $ lit "{{"
-      openEnd <- posP
-      CP.space
-      -- Construct the token for opening the doc block.
-      let openTok = Token (Open "syntax.docUntitledSection") openStart openEnd
-      env0 <- S.get
-      -- Disable layout while parsing the doc block and reset the section number
-      (bodyToks0, closeTok) <- local
-        ( \env ->
-            env
-              { inLayout = False,
-                parentSections = 0 : (parentSections env0)
-              }
-        )
-        do
-          bodyToks <- body
-          closeStart <- posP
-          lit "}}"
-          closeEnd <- posP
-          pure (bodyToks, Token Close closeStart closeEnd)
-      let docToks = beforeStartToks <> [openTok] <> bodyToks0 <> [closeTok]
-      -- Parse any layout tokens after the doc block, e.g. virtual semicolon
-      endToks <- token' ignore (pure ())
-      -- Hack to allow anonymous doc blocks before type decls
-      --   {{ Some docs }}             Foo.doc = {{ Some docs }}
-      --   ability Foo where      =>   ability Foo where
-      tn <- subsequentTypeName
-      pure $ case (tn) of
-        -- If we're followed by a type, we rewrite the doc block to be a named doc block.
-        (Just (WordyId tname))
-          | isTopLevel ->
-              beforeStartToks
-                <> [WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment)) <$ openTok, Open "=" <$ openTok]
-                <> [openTok]
-                <> bodyToks0
-                <> [closeTok]
-                -- We need an extra 'Close' here because we added an extra Open above.
-                <> [closeTok]
-                <> endToks
-          where
-            isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
-        _ -> docToks <> endToks
-      where
-        wordyKw kw = separated wordySep (lit kw)
-        subsequentTypeName = P.lookAhead . P.optional $ do
-          let lit' s = lit s <* sp
-          let modifier = typeModifiersAlt (lit' . Text.unpack)
-          let typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
-          _ <- optional modifier *> typeOrAbility' *> sp
-          Token name start stop <- tokenP identifierP
-          if Name.isSymboly (HQ'.toName name)
-            then P.customFailure (Token (InvalidSymbolyId (Text.unpack (HQ'.toTextWith Name.toText name))) start stop)
-            else pure (WordyId name)
-        ignore _ _ _ = []
-        body = join <$> P.many (sectionElem <* CP.space)
-        sectionElem = section <|> fencedBlock <|> list <|> paragraph
-        paragraph = wrap "syntax.docParagraph" $ join <$> spaced leaf
-        reserved word = List.isPrefixOf "}}" word || all (== '#') word
-
-        wordy closing = wrap "syntax.docWord" . tok . fmap Textual . P.try $ do
-          let end =
-                P.lookAhead $
-                  void docClose
-                    <|> void docOpen
-                    <|> void (P.satisfy isSpace)
-                    <|> void closing
-          word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
-          guard (not $ reserved word || null word)
-          pure word
-
-        leafy closing = groupy closing gs
-          where
-            gs =
-              link
-                <|> externalLink
-                <|> exampleInline
-                <|> expr
-                <|> boldOrItalicOrStrikethrough closing
-                <|> verbatim
-                <|> atDoc
-                <|> wordy closing
-
-        leaf = leafy mzero
-
-        atDoc = src <|> evalInline <|> signature <|> signatureInline
-          where
-            comma = lit "," <* CP.space
-            src =
-              src' "syntax.docSource" "@source"
-                <|> src' "syntax.docFoldedSource" "@foldedSource"
-            srcElem =
-              wrap "syntax.docSourceElement" $
-                (typeLink <|> termLink)
-                  <+> ( fmap (fromMaybe []) . P.optional $
-                          (tok (Reserved <$> lit "@") <+> (CP.space *> annotations))
-                      )
-              where
-                annotation = tok identifierLexemeP <|> expr <* CP.space
-                annotations =
-                  join <$> P.some (wrap "syntax.docEmbedAnnotation" annotation)
-            src' name atName = wrap name $ do
-              _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
-              s <- P.sepBy1 srcElem comma
-              _ <- lit "}"
-              pure (join s)
-            signature = wrap "syntax.docSignature" $ do
-              _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
-              s <- join <$> P.sepBy1 signatureLink comma
-              _ <- lit "}"
-              pure s
-            signatureInline = wrap "syntax.docSignatureInline" $ do
-              _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
-              s <- signatureLink
-              _ <- lit "}"
-              pure s
-            evalInline = wrap "syntax.docEvalInline" $ do
-              _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
-              let inlineEvalClose = [] <$ lit "}"
-              s <- lexemes' inlineEvalClose
-              pure s
-
-        typeLink = wrap "syntax.docEmbedTypeLink" do
-          _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-          tok identifierLexemeP <* CP.space
-
-        termLink =
-          wrap "syntax.docEmbedTermLink" $
-            tok identifierLexemeP <* CP.space
-
-        signatureLink =
-          wrap "syntax.docEmbedSignatureLink" $
-            tok identifierLexemeP <* CP.space
-
-        groupy closing p = do
-          Token p start stop <- tokenP p
-          after <- P.optional . P.try $ leafy closing
-          pure $ case after of
-            Nothing -> p
-            Just after ->
-              [ Token (Open "syntax.docGroup") start stop',
-                Token (Open "syntax.docJoin") start stop'
-              ]
-                <> p
-                <> after
-                <> (take 2 $ repeat (Token Close stop' stop'))
-              where
-                stop' = maybe stop end (lastMay after)
-
-        verbatim =
-          P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
-            Token originalText start stop <- tokenP do
-              -- a single backtick followed by a non-backtick is treated as monospaced
-              let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
-              -- also two or more ' followed by that number of closing '
-              quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
-              P.someTill P.anySingle (lit quotes)
-            let isMultiLine = line start /= line stop
-            if isMultiLine
-              then do
-                let trimmed = (trimAroundDelimiters originalText)
-                let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
-                -- If it's a multi-line verbatim block we trim any whitespace representing
-                -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-                wrap "syntax.docVerbatim" $
-                  wrap "syntax.docWord" $
-                    pure [Token (Textual txt) start stop]
-              else
-                wrap "syntax.docCode" $
-                  wrap "syntax.docWord" $
-                    pure [Token (Textual originalText) start stop]
-
-        exampleInline =
-          P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
-            wrap "syntax.docExample" $ do
-              n <- P.try $ do
-                _ <- lit "`"
-                length <$> P.takeWhile1P (Just "backticks") (== '`')
-              let end :: P [Token Lexeme] = [] <$ lit (replicate (n + 1) '`')
-              ex <- CP.space *> lexemes' end
-              pure ex
-
-        docClose = [] <$ lit "}}"
-        docOpen = [] <$ lit "{{"
-
-        link =
-          P.label "link (examples: {type List}, {Nat.+})" $
-            wrap "syntax.docLink" $
-              P.try $
-                lit "{" *> (typeLink <|> termLink) <* lit "}"
-
-        expr =
-          P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
-            openAs "{{" "syntax.docTransclude"
-              <+> do
-                env0 <- S.get
-                -- we re-allow layout within a transclusion, then restore it to its
-                -- previous state after
-                S.put (env0 {inLayout = True})
-                -- Note: this P.lookAhead ensures the }} isn't consumed,
-                -- so it can be consumed below by the `close` which will
-                -- pop items off the layout stack up to the nearest enclosing
-                -- syntax.docTransclude.
-                ts <- lexemes' (P.lookAhead ([] <$ lit "}}"))
-                S.modify (\env -> env {inLayout = inLayout env0})
-                pure ts
-              <+> close ["syntax.docTransclude"] (lit "}}")
-
-        nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
-        nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
-
-        -- Allows whitespace or a newline, but not more than two newlines in a row.
-        whitespaceWithoutParagraphBreak :: P ()
-        whitespaceWithoutParagraphBreak = void do
-          void nonNewlineSpaces
-          optional newline >>= \case
-            Just _ -> void nonNewlineSpaces
-            Nothing -> pure ()
-
-        fencedBlock =
-          P.label "block eval (syntax: a fenced code block)" $
-            evalUnison <|> exampleBlock <|> other
-          where
-            evalUnison = wrap "syntax.docEval" $ do
-              -- commit after seeing that ``` is on its own line
-              fence <- P.try $ do
-                fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-                b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
-                fence <$ guard b
-              CP.space
-                *> local
-                  (\env -> env {inLayout = True, opening = Just "docEval"})
-                  (restoreStack "docEval" $ lexemes' ([] <$ lit fence))
-
-            exampleBlock = wrap "syntax.docExampleBlock" $ do
-              void $ lit "@typecheck" <* CP.space
-              fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-              local
-                (\env -> env {inLayout = True, opening = Just "docExampleBlock"})
-                (restoreStack "docExampleBlock" $ lexemes' ([] <$ lit fence))
-
-            uncolumn column tabWidth s =
-              let skip col r | col < 1 = r
-                  skip col s@('\t' : _) | col < tabWidth = s
-                  skip col ('\t' : r) = skip (col - tabWidth) r
-                  skip col (c : r)
-                    | isSpace c && (not $ isControl c) =
-                        skip (col - 1) r
-                  skip _ s = s
-               in List.intercalate "\n" $ skip column <$> lines s
-
-            other = wrap "syntax.docCodeBlock" $ do
-              column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
-              let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
-              fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-              name <-
-                P.takeWhileP Nothing nonNewlineSpace
-                  *> tok (Textual <$> P.takeWhile1P Nothing (not . isSpace))
-                  <* P.takeWhileP Nothing nonNewlineSpace
-              _ <- void CP.eol
-              verbatim <-
-                tok $
-                  Textual . uncolumn column tabWidth . trimAroundDelimiters
-                    <$> P.someTill P.anySingle ([] <$ lit fence)
-              pure (name <> verbatim)
-
-        boldOrItalicOrStrikethrough closing = do
-          let start =
-                some (P.satisfy (== '*'))
-                  <|> some (P.satisfy (== '_'))
-                  <|> some
-                    (P.satisfy (== '~'))
-              name s =
-                if take 1 s == "~"
-                  then "syntax.docStrikethrough"
-                  else if take 1 s == "*" then "syntax.docBold" else "syntax.docItalic"
-          end <- P.try $ do
-            end <- start
-            P.lookAhead (P.satisfy (not . isSpace))
-            pure end
-          wrap (name end) . wrap "syntax.docParagraph" $
-            join
-              <$> P.someTill
-                (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
-                (lit end)
-
-        externalLink =
-          P.label "hyperlink (example: [link name](https://destination.com))" $
-            wrap "syntax.docNamedLink" $ do
-              _ <- lit "["
-              p <- leafies (void $ char ']')
-              _ <- lit "]"
-              _ <- lit "("
-              target <-
-                wrap "syntax.docGroup" . wrap "syntax.docJoin" $
-                  link <|> fmap join (P.some (expr <|> wordy (char ')')))
-              _ <- lit ")"
-              pure (p <> target)
-
-        -- newline = P.optional (lit "\r") *> lit "\n"
-
-        sp = P.try $ do
-          spaces <- P.takeWhile1P (Just "space") isSpace
-          close <- P.optional (P.lookAhead (lit "}}"))
-          case close of
-            Nothing -> guard $ ok spaces
-            Just _ -> pure ()
-          pure spaces
-          where
-            ok s = length [() | '\n' <- s] < 2
-
-        spaced p = P.some (p <* P.optional sp)
-        leafies close = wrap "syntax.docParagraph" $ join <$> spaced (leafy close)
-
-        list = bulletedList <|> numberedList
-
-        bulletedList = wrap "syntax.docBulletedList" $ join <$> P.sepBy1 bullet listSep
-        numberedList = wrap "syntax.docNumberedList" $ join <$> P.sepBy1 numberedItem listSep
-
-        listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (bulletedStart <|> numberedStart)
-
-        bulletedStart = P.try $ do
-          r <- listItemStart' $ [] <$ P.satisfy bulletChar
-          P.lookAhead (P.satisfy isSpace)
-          pure r
-          where
-            bulletChar ch = ch == '*' || ch == '-' || ch == '+'
-
-        listItemStart' gutter = P.try $ do
-          nonNewlineSpaces
-          col <- column <$> posP
-          parentCol <- S.gets parentListColumn
-          guard (col > parentCol)
-          (col,) <$> gutter
-
-        numberedStart =
-          listItemStart' $ P.try (tok . fmap num $ LP.decimal <* lit ".")
-          where
-            num :: Word -> Lexeme
-            num n = Numeric (show n)
-
-        listItemParagraph = wrap "syntax.docParagraph" $ do
-          col <- column <$> posP
-          join <$> P.some (leaf <* sep col)
-          where
-            -- Trickiness here to support hard line breaks inside of
-            -- a bulleted list, so for instance this parses as expected:
-            --
-            --   * uno dos
-            --     tres quatro
-            --   * alice bob
-            --     carol dave eve
-            sep col = do
-              _ <- nonNewlineSpaces
-              _ <-
-                P.optional . P.try $
-                  newline
-                    *> nonNewlineSpaces
-                    *> do
-                      col2 <- column <$> posP
-                      guard $ col2 >= col
-                      (P.notFollowedBy $ numberedStart <|> bulletedStart)
-              pure ()
-
-        numberedItem = P.label msg $ do
-          (col, s) <- numberedStart
-          pure s
-            <+> ( wrap "syntax.docColumn" $ do
-                    p <- nonNewlineSpaces *> listItemParagraph
-                    subList <-
-                      local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
-                    pure (p <> fromMaybe [] subList)
-                )
-          where
-            msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
-
-        bullet = wrap "syntax.docColumn" . P.label "bullet (examples: * item1, - item2)" $ do
-          (col, _) <- bulletedStart
-          p <- nonNewlineSpaces *> listItemParagraph
-          subList <-
-            local
-              (\e -> e {parentListColumn = col})
-              (P.optional $ listSep *> list)
-          pure (p <> fromMaybe [] subList)
-
-        newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
-
-        -- ## Section title
-        --
-        -- A paragraph under this section.
-        -- Part of the same paragraph. Blanklines separate paragraphs.
-        --
-        -- ### A subsection title
-        --
-        -- A paragraph under this subsection.
-
-        -- # A section title (not a subsection)
-        section :: P [Token Lexeme]
-        section = wrap "syntax.docSection" $ do
-          ns <- S.gets parentSections
-          hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
-          title <- paragraph <* CP.space
-          let m = length hashes + head ns
-          body <-
-            local (\env -> env {parentSections = (m : (tail ns))}) $
-              P.many (sectionElem <* CP.space)
-          pure $ title <> join body
-
-        wrap :: String -> P [Token Lexeme] -> P [Token Lexeme]
-        wrap o p = do
-          start <- posP
-          lexemes <- p
-          pure $ go start lexemes
-          where
-            go start [] = [Token (Open o) start start, Token Close start start]
-            go start ts@(Token _ x _ : _) =
-              Token (Open o) start x : (ts ++ [Token Close (end final) (end final)])
-              where
-                final = last ts
-
     doc :: P [Token Lexeme]
     doc = open <+> (CP.space *> fmap fixup body) <+> (close <* space)
       where

From d1fe6d9429ac5ed69b101fde6bbc353195f2edf5 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 1 Jul 2024 13:43:06 -0600
Subject: [PATCH 02/22] Separate the `Doc` lexer from the Unison lexer

`doc2` is a Unison lexer that traverses a `Doc`.

`docBody` is the actual `Doc` lexer that is ignorant of the fact that Unison wraps `Doc` blocks in `{{`/`}}`.
---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 144ccd95c3..77c91b8e84 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -425,7 +425,7 @@ doc2 = do
           }
     )
     do
-      bodyToks <- body
+      bodyToks <- docBody
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
@@ -453,6 +453,7 @@ doc2 = do
         isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
     _ -> docToks <> endToks
   where
+    -- DUPLICATED
     wordyKw kw = separated wordySep (lit kw)
     subsequentTypeName = P.lookAhead . P.optional $ do
       let lit' s = lit s <* sp
@@ -464,7 +465,23 @@ doc2 = do
         then P.customFailure (Token (InvalidSymbolyId (Text.unpack (HQ'.toTextWith Name.toText name))) start stop)
         else pure (WordyId name)
     ignore _ _ _ = []
-    body = join <$> P.many (sectionElem <* CP.space)
+    -- DUPLICATED
+    sp = P.try $ do
+      spaces <- P.takeWhile1P (Just "space") isSpace
+      close <- P.optional (P.lookAhead (lit "}}"))
+      case close of
+        Nothing -> guard $ ok spaces
+        Just _ -> pure ()
+      pure spaces
+      where
+        ok s = length [() | '\n' <- s] < 2
+
+-- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
+--   Unison wraps `Doc` literals in `}}`).
+docBody :: P [Token Lexeme]
+docBody = join <$> P.many (sectionElem <* CP.space)
+  where
+    wordyKw kw = separated wordySep (lit kw)
     sectionElem = section <|> fencedBlock <|> list <|> paragraph
     paragraph = wrap "syntax.docParagraph" $ join <$> spaced leaf
     reserved word = List.isPrefixOf "}}" word || all (== '#') word

From 543daa36c74ab0bf1d4643b37b4baef30de88994 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Wed, 3 Jul 2024 12:59:09 -0600
Subject: [PATCH 03/22] Move the `Annotated` class to the `Ann` module

This is in preparation for using `Ann` in the `Lexer` module, as that module
actually does some parsing.
---
 unison-syntax/src/Unison/Parser/Ann.hs    | 21 +++++++++++++++++++++
 unison-syntax/src/Unison/Syntax/Lexer.hs  |  5 +++++
 unison-syntax/src/Unison/Syntax/Parser.hs | 16 ++--------------
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/unison-syntax/src/Unison/Parser/Ann.hs b/unison-syntax/src/Unison/Parser/Ann.hs
index feec96279c..961bbcb30c 100644
--- a/unison-syntax/src/Unison/Parser/Ann.hs
+++ b/unison-syntax/src/Unison/Parser/Ann.hs
@@ -4,7 +4,10 @@
 
 module Unison.Parser.Ann where
 
+import Data.List.NonEmpty (NonEmpty)
+import Data.Void (absurd)
 import Unison.Lexer.Pos qualified as L
+import Unison.Prelude
 
 data Ann
   = -- Used for things like Builtins which don't have a source position.
@@ -79,3 +82,21 @@ encompasses (GeneratedFrom ann) other = encompasses ann other
 encompasses ann (GeneratedFrom other) = encompasses ann other
 encompasses (Ann start1 end1) (Ann start2 end2) =
   Just $ start1 <= start2 && end1 >= end2
+
+class Annotated a where
+  ann :: a -> Ann
+
+instance Annotated Ann where
+  ann = id
+
+instance (Annotated a) => Annotated [a] where
+  ann = foldMap ann
+
+instance (Annotated a) => Annotated (NonEmpty a) where
+  ann = foldMap ann
+
+instance (Annotated a) => Annotated (Maybe a) where
+  ann = foldMap ann
+
+instance Annotated Void where
+  ann = absurd
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 77c91b8e84..fa169e2d06 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE TemplateHaskell #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
 
 module Unison.Syntax.Lexer
   ( Token (..),
@@ -51,6 +52,7 @@ import Unison.Name qualified as Name
 import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
 import Unison.NameSegment.Internal qualified as NameSegment
+import Unison.Parser.Ann (Ann (Ann), Annotated (..))
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as SH
@@ -64,6 +66,9 @@ import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
 import Unison.Util.Bytes qualified as Bytes
 import Unison.Util.Monoid (intercalateMap)
 
+instance Annotated (Token a) where
+  ann (Token _ s e) = Ann s e
+
 type BlockName = String
 
 type Layout = [(BlockName, Column)]
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index affab5bf2c..4945f4347e 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE UndecidableInstances #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
 
 module Unison.Syntax.Parser
   ( Annotated (..),
@@ -77,7 +78,7 @@ import Unison.Hashable qualified as Hashable
 import Unison.Name as Name
 import Unison.Names (Names)
 import Unison.Names.ResolutionResult qualified as Names
-import Unison.Parser.Ann (Ann (..))
+import Unison.Parser.Ann (Ann (..), Annotated (..))
 import Unison.Pattern (Pattern)
 import Unison.Pattern qualified as Pattern
 import Unison.Prelude
@@ -177,25 +178,12 @@ newtype Input = Input {inputStream :: [L.Token L.Lexeme]}
   deriving stock (Eq, Ord, Show)
   deriving newtype (P.Stream, P.VisualStream)
 
-class Annotated a where
-  ann :: a -> Ann
-
-instance Annotated Ann where
-  ann = id
-
-instance Annotated (L.Token a) where
-  ann (L.Token _ s e) = Ann s e
-
 instance (Annotated a) => Annotated (ABT.Term f v a) where
   ann = ann . ABT.annotation
 
 instance (Annotated a) => Annotated (Pattern a) where
   ann = ann . Pattern.loc
 
-instance (Annotated a) => Annotated [a] where
-  ann [] = mempty
-  ann (h : t) = foldl' (\acc a -> acc <> ann a) (ann h) t
-
 instance (Annotated a, Annotated b) => Annotated (MatchCase a b) where
   ann (MatchCase p _ b) = ann p <> ann b
 

From 5f87b4152739662e20120810ee0525228f9a3363 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 1 Jul 2024 21:11:53 -0600
Subject: [PATCH 04/22] Un-hiding the `Doc` parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`doc2` was a parser in lexer’s clothing. It would parse recursively, but
then return the result as a flat list of tokens.

This separates the parsing from the “unparsing” (which returns the
tokens), so now we have a parser to a recursive `Doc` structure. This
currently immediately applies the unparser, and should result in an
identical stream of tokens as the previous version. Eventually, we
should be able to avoid unparsing the `Doc` structure.
---
 unison-syntax/package.yaml               |   1 +
 unison-syntax/src/Unison/Syntax/Lexer.hs | 451 ++++++++++++++++-------
 unison-syntax/unison-syntax.cabal        |   2 +
 3 files changed, 330 insertions(+), 124 deletions(-)

diff --git a/unison-syntax/package.yaml b/unison-syntax/package.yaml
index 8e1a478baf..ccb1a057d7 100644
--- a/unison-syntax/package.yaml
+++ b/unison-syntax/package.yaml
@@ -10,6 +10,7 @@ dependencies:
   - containers
   - cryptonite
   - extra
+  - free
   - lens
   - megaparsec
   - mtl
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index fa169e2d06..fd27118050 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -8,6 +8,16 @@ module Unison.Syntax.Lexer
     Err (..),
     Pos (..),
     Lexeme (..),
+    DocTree,
+    DocUntitledSection (..),
+    DocTop (..),
+    DocColumn (..),
+    DocLeaf (..),
+    DocEmbedLink (..),
+    DocSourceElement (..),
+    DocEmbedSignatureLink (..),
+    DocJoin (..),
+    DocEmbedAnnotation (..),
     lexer,
     escapeChars,
     debugFileLex,
@@ -28,16 +38,19 @@ module Unison.Syntax.Lexer
   )
 where
 
+import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.State qualified as S
 import Data.Char (isAlphaNum, isControl, isDigit, isSpace, ord, toLower)
 import Data.Foldable qualified as Foldable
 import Data.List qualified as List
 import Data.List.Extra qualified as List
+import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.List.NonEmpty qualified as Nel
 import Data.List.NonEmpty qualified as NonEmpty
 import Data.Map.Strict qualified as Map
 import Data.Set qualified as Set
 import Data.Text qualified as Text
+import Data.Void (vacuous)
 import GHC.Exts (sortWith)
 import Text.Megaparsec qualified as P
 import Text.Megaparsec.Char (char)
@@ -52,7 +65,7 @@ import Unison.Name qualified as Name
 import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
 import Unison.NameSegment.Internal qualified as NameSegment
-import Unison.Parser.Ann (Ann (Ann), Annotated (..))
+import Unison.Parser.Ann (Ann (Ann, GeneratedFrom), Annotated (..))
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as SH
@@ -66,6 +79,9 @@ import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
 import Unison.Util.Bytes qualified as Bytes
 import Unison.Util.Monoid (intercalateMap)
 
+instance (Annotated a) => Annotated (Cofree f a) where
+  ann (a :< _) = ann a
+
 instance Annotated (Token a) where
   ann (Token _ s e) = Ann s e
 
@@ -418,11 +434,9 @@ doc2 = do
   void $ lit "{{"
   openEnd <- posP
   CP.space
-  -- Construct the token for opening the doc block.
-  let openTok = Token (Open "syntax.docUntitledSection") openStart openEnd
   env0 <- S.get
   -- Disable layout while parsing the doc block and reset the section number
-  (bodyToks0, closeTok) <- local
+  (docToks, closeTok) <- local
     ( \env ->
         env
           { inLayout = False,
@@ -430,33 +444,30 @@ doc2 = do
           }
     )
     do
-      bodyToks <- docBody
+      bodyToks <- docBody (lit "}}")
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
-      pure (bodyToks, Token Close closeStart closeEnd)
-  let docToks = beforeStartToks <> [openTok] <> bodyToks0 <> [closeTok]
+      pure (docToLexemes (openStart, closeEnd) bodyToks, Token Close closeStart closeEnd)
   -- Parse any layout tokens after the doc block, e.g. virtual semicolon
   endToks <- token' ignore (pure ())
   -- Hack to allow anonymous doc blocks before type decls
   --   {{ Some docs }}             Foo.doc = {{ Some docs }}
   --   ability Foo where      =>   ability Foo where
   tn <- subsequentTypeName
-  pure $ case (tn) of
-    -- If we're followed by a type, we rewrite the doc block to be a named doc block.
-    (Just (WordyId tname))
-      | isTopLevel ->
-          beforeStartToks
-            <> [WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment)) <$ openTok, Open "=" <$ openTok]
-            <> [openTok]
-            <> bodyToks0
-            <> [closeTok]
-            -- We need an extra 'Close' here because we added an extra Open above.
-            <> [closeTok]
-            <> endToks
-      where
-        isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
-    _ -> docToks <> endToks
+  pure $
+    beforeStartToks <> case (tn) of
+      -- If we're followed by a type, we rewrite the doc block to be a named doc block.
+      Just (WordyId tname)
+        | isTopLevel ->
+            Token (WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment))) openStart openEnd
+              : Token (Open "=") openStart openEnd
+              : docToks
+                -- We need an extra 'Close' here because we added an extra Open above.
+                <> (closeTok : endToks)
+        where
+          isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
+      _ -> docToks <> endToks
   where
     -- DUPLICATED
     wordyKw kw = separated wordySep (lit kw)
@@ -481,17 +492,221 @@ doc2 = do
       where
         ok s = length [() | '\n' <- s] < 2
 
+-- | Like `P.some`, but returns an actual `NonEmpty`.
+some' :: P a -> P (NonEmpty a)
+some' p = liftA2 (:|) p $ many p
+
+-- | Like `P.someTill`, but returns an actual `NonEmpty`.
+someTill' :: P a -> P end -> P (NonEmpty a)
+someTill' p end = liftA2 (:|) p $ P.manyTill p end
+
+-- | Like `P.sepBy1`, but returns an actual `NonEmpty`.
+sepBy1' :: P a -> P sep -> P (NonEmpty a)
+sepBy1' p sep = liftA2 (:|) p . many $ sep *> p
+
+newtype DocUntitledSection a = DocUntitledSection [a]
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+-- | Haskell parallel to @unison/base.Doc@.
+--
+--   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
+--   mismatch with Unison is a problem, as someone can create a Unison Doc with explicit constructors or function calls,
+--   have it rendered to a scratch file, and then we can’t parse it. Changing the types here to match Unison wouldn’t
+--   fix the issue. We have to modify the types and parser in concert (in both Haskell and Unison) to bring them in
+--   line.
+--
+--  __NB__: Uses of @[`Token` `Lexeme`]@ here indicate a nested transition to the Unison lexer.
+data DocTop a
+  = -- | The first argument is always a Paragraph
+    DocSection a [a]
+  | DocEval [Token Lexeme]
+  | DocExampleBlock [Token Lexeme]
+  | DocCodeBlock (Token String) (Token String)
+  | DocBulletedList (NonEmpty (DocColumn a))
+  | DocNumberedList (NonEmpty (Token Word64, DocColumn a))
+  | DocParagraph (NonEmpty (DocLeaf a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data DocColumn a
+  = -- | The first is always a Paragraph, and the second a Bulleted or Numbered List
+    DocColumn a (Maybe a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data DocLeaf a
+  = DocLink DocEmbedLink
+  | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
+    --   Transcludes & Words)
+    DocNamedLink a (DocLeaf Void)
+  | DocExample [Token Lexeme]
+  | DocTransclude [Token Lexeme]
+  | -- | Always a Paragraph
+    DocBold a
+  | -- | Always a Paragraph
+    DocItalic a
+  | -- | Always a Paragraph
+    DocStrikethrough a
+  | -- | Always a Word
+    DocVerbatim (DocLeaf Void)
+  | -- | Always a Word
+    DocCode (DocLeaf Void)
+  | DocSource (NonEmpty DocSourceElement)
+  | DocFoldedSource (NonEmpty DocSourceElement)
+  | DocEvalInline [Token Lexeme]
+  | DocSignature (NonEmpty DocEmbedSignatureLink)
+  | DocSignatureInline DocEmbedSignatureLink
+  | DocWord (Token String)
+  | DocGroup (DocJoin a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data DocEmbedLink
+  = DocEmbedTypeLink (Token (HQ'.HashQualified Name))
+  | DocEmbedTermLink (Token (HQ'.HashQualified Name))
+  deriving (Eq, Ord, Show)
+
+data DocSourceElement = DocSourceElement DocEmbedLink [DocEmbedAnnotation]
+  deriving (Eq, Ord, Show)
+
+newtype DocEmbedSignatureLink = DocEmbedSignatureLink (Token (HQ'.HashQualified Name))
+  deriving (Eq, Ord, Show)
+
+newtype DocJoin a = DocJoin (NonEmpty (DocLeaf a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+newtype DocEmbedAnnotation
+  = -- | Always a DocTransclude
+    DocEmbedAnnotation (Either (Token (HQ'.HashQualified Name)) (DocLeaf Void))
+  deriving (Eq, Ord, Show)
+
+type DocTree = Cofree DocTop Ann
+
+instance (Annotated a) => Annotated (DocTop a) where
+  ann = \case
+    DocSection title body -> ann title <> ann body
+    DocEval code -> ann code
+    DocExampleBlock code -> ann code
+    DocCodeBlock label body -> ann label <> ann body
+    DocBulletedList items -> ann items
+    DocNumberedList items -> ann $ snd <$> items
+    DocParagraph leaves -> ann leaves
+
+instance (Annotated a) => Annotated (DocColumn a) where
+  ann (DocColumn para list) = ann para <> ann list
+
+instance (Annotated a) => Annotated (DocLeaf a) where
+  ann = \case
+    DocLink link -> ann link
+    DocNamedLink label target -> ann label <> ann target
+    DocExample code -> ann code
+    DocTransclude code -> ann code
+    DocBold para -> ann para
+    DocItalic para -> ann para
+    DocStrikethrough para -> ann para
+    DocVerbatim word -> ann word
+    DocCode word -> ann word
+    DocSource elems -> ann elems
+    DocFoldedSource elems -> ann elems
+    DocEvalInline code -> ann code
+    DocSignature links -> ann links
+    DocSignatureInline link -> ann link
+    DocWord text -> ann text
+    DocGroup (DocJoin leaves) -> ann leaves
+
+instance Annotated DocEmbedLink where
+  ann = \case
+    DocEmbedTypeLink name -> ann name
+    DocEmbedTermLink name -> ann name
+
+instance Annotated DocSourceElement where
+  ann (DocSourceElement link target) = ann link <> ann target
+
+instance Annotated DocEmbedSignatureLink where
+  ann (DocEmbedSignatureLink name) = ann name
+
+instance Annotated DocEmbedAnnotation where
+  ann (DocEmbedAnnotation a) = either ann ann a
+
+-- | This is a short-term hack to turn our parse tree back into the sequence of lexemes the current parser expects.
+--
+--   The medium-term solution is to preserve @[`DocTree`]@ as its own lexeme type, and hand it to the parser without
+--   flattening it back to tokens. Longer-term, maybe we add a real lexer for @Doc@, and then whatever is left of this
+--   parser moves into the actual parser.
+docToLexemes :: (Pos, Pos) -> DocUntitledSection DocTree -> [Token Lexeme]
+docToLexemes (startDoc, endDoc) (DocUntitledSection tops) =
+  Token (Open "syntax.docUntitledSection") startDoc startDoc
+    : concatMap cata tops <> pure (Token Close endDoc endDoc)
+  where
+    wrap :: Ann -> String -> [Token Lexeme] -> [Token Lexeme]
+    wrap ann suffix lexemes = go (extractStart ann) lexemes
+      where
+        extractStart = \case
+          Ann start _ -> start
+          GeneratedFrom a -> extractStart a
+          a -> error $ "expected a good Pos! Got: " <> show a
+        o = "syntax.doc" <> suffix
+        go start [] = [Token (Open o) start start, Token Close start start]
+        go start ts@(Token _ x _ : _) =
+          Token (Open o) start x : (ts ++ [Token Close (end final) (end final)])
+          where
+            final = last ts
+    cata :: DocTree -> [Token Lexeme]
+    cata (a :< top) = docTop a $ cata <$> top
+    docTop start = \case
+      DocSection title body -> wrap start "Section" $ title <> join body
+      DocEval code -> wrap start "Eval" code
+      DocExampleBlock code -> wrap start "ExampleBlock" code
+      DocCodeBlock label text -> wrap start "CodeBlock" [Textual <$> label, Textual <$> text]
+      DocBulletedList items -> wrap start "BulletedList" . concat $ (\col -> docColumn (ann col) col) <$> items
+      DocNumberedList items ->
+        wrap start "NumberedList" . concat $
+          uncurry (:) . bimap (Numeric . show <$>) (\col -> docColumn (ann col) col)
+            <$> items
+      DocParagraph body -> wrap start "Paragraph" . concat $ (\l -> docLeaf (ann l) l) <$> body
+    docColumn start (DocColumn para mlist) = wrap start "Column" $ foldr (flip (<>)) para mlist
+    docLeaf start = \case
+      DocLink link -> wrap start "Link" $ docEmbedLink (ann link) link
+      DocNamedLink name target -> wrap start "NamedLink" $ name <> docLeaf (ann target) (vacuous target)
+      DocExample code -> wrap start "Example" code
+      DocTransclude code -> wrap start "Transclude" code
+      DocBold para -> wrap start "Bold" para
+      DocItalic para -> wrap start "Italic" para
+      DocStrikethrough para -> wrap start "Strikethrough" para
+      DocVerbatim word -> wrap start "Verbatim" . docLeaf (ann word) $ vacuous word
+      DocCode word -> wrap start "Code" . docLeaf (ann word) $ vacuous word
+      DocSource elems -> wrap start "Source" . concat $ (\e -> docSourceElement (ann e) e) <$> elems
+      DocFoldedSource elems -> wrap start "FoldedSource" . concat $ (\e -> docSourceElement (ann e) e) <$> elems
+      DocEvalInline code -> wrap start "EvalInline" code
+      DocSignature links -> wrap start "Signature" . concat $ (\l -> docEmbedSignatureLink (ann l) l) <$> links
+      DocSignatureInline link -> wrap start "SignatureInline" $ docEmbedSignatureLink (ann link) link
+      DocWord text -> wrap start "Word" . pure $ Textual <$> text
+      DocGroup (DocJoin leaves) ->
+        wrap start "Group" . wrap start "Join" . concat $ (\l -> docLeaf (ann l) l) <$> leaves
+    docEmbedLink start = \case
+      DocEmbedTypeLink ident -> wrap start "EmbedTypeLink" . pure $ identifierLexeme <$> ident
+      DocEmbedTermLink ident -> wrap start "EmbedTermLink" . pure $ identifierLexeme <$> ident
+    docSourceElement start (DocSourceElement link anns) =
+      wrap start "SourceElement" $
+        docEmbedLink (ann link) link
+          <> maybe
+            []
+            ((Token (Reserved "@") (Pos 0 0) (Pos 0 0) :) . concatMap (\a -> docEmbedAnnotation (ann a) a))
+            (NonEmpty.nonEmpty anns)
+    docEmbedSignatureLink start (DocEmbedSignatureLink ident) =
+      wrap start "EmbedSignatureLink" . pure $ identifierLexeme <$> ident
+    docEmbedAnnotation start (DocEmbedAnnotation a) =
+      wrap start "EmbedAnnotation" $ either (pure . fmap identifierLexeme) (\l -> docLeaf (ann l) $ vacuous l) a
+
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
-docBody :: P [Token Lexeme]
-docBody = join <$> P.many (sectionElem <* CP.space)
+docBody :: P end -> P (DocUntitledSection DocTree)
+docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
   where
     wordyKw kw = separated wordySep (lit kw)
     sectionElem = section <|> fencedBlock <|> list <|> paragraph
-    paragraph = wrap "syntax.docParagraph" $ join <$> spaced leaf
+    paragraph = wrap' . DocParagraph <$> spaced leaf
     reserved word = List.isPrefixOf "}}" word || all (== '#') word
 
-    wordy closing = wrap "syntax.docWord" . tok . fmap Textual . P.try $ do
+    wordy :: P end -> P (DocLeaf void)
+    wordy closing = fmap DocWord . tokenP . P.try $ do
       let end =
             P.lookAhead $
               void docClose
@@ -520,65 +735,61 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       where
         comma = lit "," <* CP.space
         src =
-          src' "syntax.docSource" "@source"
-            <|> src' "syntax.docFoldedSource" "@foldedSource"
+          src' DocSource "@source"
+            <|> src' DocFoldedSource "@foldedSource"
         srcElem =
-          wrap "syntax.docSourceElement" $
-            (typeLink <|> termLink)
-              <+> ( fmap (fromMaybe []) . P.optional $
-                      (tok (Reserved <$> lit "@") <+> (CP.space *> annotations))
-                  )
+          DocSourceElement
+            <$> (typeLink <|> termLink)
+            <*> ( fmap (fromMaybe []) . P.optional $
+                    (lit "@") *> (CP.space *> annotations)
+                )
           where
-            annotation = tok identifierLexemeP <|> expr <* CP.space
+            annotation = fmap Left (tokenP identifierP) <|> fmap Right expr <* CP.space
             annotations =
-              join <$> P.some (wrap "syntax.docEmbedAnnotation" annotation)
-        src' name atName = wrap name $ do
+              P.some (DocEmbedAnnotation <$> annotation)
+        src' name atName = fmap name $ do
           _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
-          s <- P.sepBy1 srcElem comma
+          s <- sepBy1' srcElem comma
           _ <- lit "}"
-          pure (join s)
-        signature = wrap "syntax.docSignature" $ do
+          pure s
+        signature = fmap DocSignature $ do
           _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
-          s <- join <$> P.sepBy1 signatureLink comma
+          s <- sepBy1' signatureLink comma
           _ <- lit "}"
           pure s
-        signatureInline = wrap "syntax.docSignatureInline" $ do
+        signatureInline = fmap DocSignatureInline $ do
           _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
           s <- signatureLink
           _ <- lit "}"
           pure s
-        evalInline = wrap "syntax.docEvalInline" $ do
+        evalInline = fmap DocEvalInline $ do
           _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
           let inlineEvalClose = [] <$ lit "}"
           s <- lexemes' inlineEvalClose
           pure s
 
-    typeLink = wrap "syntax.docEmbedTypeLink" do
+    typeLink = fmap DocEmbedTypeLink $ do
       _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-      tok identifierLexemeP <* CP.space
+      tokenP identifierP <* CP.space
 
     termLink =
-      wrap "syntax.docEmbedTermLink" $
-        tok identifierLexemeP <* CP.space
+      fmap DocEmbedTermLink $
+        tokenP identifierP <* CP.space
 
     signatureLink =
-      wrap "syntax.docEmbedSignatureLink" $
-        tok identifierLexemeP <* CP.space
+      fmap DocEmbedSignatureLink $
+        tokenP identifierP <* CP.space
 
     groupy closing p = do
-      Token p start stop <- tokenP p
+      Token p _ _ <- tokenP p
       after <- P.optional . P.try $ leafy closing
       pure $ case after of
         Nothing -> p
         Just after ->
-          [ Token (Open "syntax.docGroup") start stop',
-            Token (Open "syntax.docJoin") start stop'
-          ]
-            <> p
-            <> after
-            <> (take 2 $ repeat (Token Close stop' stop'))
-          where
-            stop' = maybe stop end (lastMay after)
+          DocGroup
+            . DocJoin
+            $ p
+              :| pure after
 
     verbatim =
       P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
@@ -595,17 +806,17 @@ docBody = join <$> P.many (sectionElem <* CP.space)
             let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
             -- If it's a multi-line verbatim block we trim any whitespace representing
             -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-            wrap "syntax.docVerbatim" $
-              wrap "syntax.docWord" $
-                pure [Token (Textual txt) start stop]
+            pure . DocVerbatim $
+              DocWord $
+                Token txt start stop
           else
-            wrap "syntax.docCode" $
-              wrap "syntax.docWord" $
-                pure [Token (Textual originalText) start stop]
+            pure . DocCode $
+              DocWord $
+                Token originalText start stop
 
     exampleInline =
       P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
-        wrap "syntax.docExample" $ do
+        fmap DocExample $ do
           n <- P.try $ do
             _ <- lit "`"
             length <$> P.takeWhile1P (Just "backticks") (== '`')
@@ -613,19 +824,19 @@ docBody = join <$> P.many (sectionElem <* CP.space)
           ex <- CP.space *> lexemes' end
           pure ex
 
-    docClose = [] <$ lit "}}"
+    docClose = [] <$ docClose'
     docOpen = [] <$ lit "{{"
 
     link =
       P.label "link (examples: {type List}, {Nat.+})" $
-        wrap "syntax.docLink" $
+        fmap DocLink $
           P.try $
             lit "{" *> (typeLink <|> termLink) <* lit "}"
 
     expr =
-      P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+      fmap DocTransclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
         openAs "{{" "syntax.docTransclude"
-          <+> do
+          *> do
             env0 <- S.get
             -- we re-allow layout within a transclusion, then restore it to its
             -- previous state after
@@ -637,7 +848,7 @@ docBody = join <$> P.many (sectionElem <* CP.space)
             ts <- lexemes' (P.lookAhead ([] <$ lit "}}"))
             S.modify (\env -> env {inLayout = inLayout env0})
             pure ts
-          <+> close ["syntax.docTransclude"] (lit "}}")
+          <* close ["syntax.docTransclude"] (lit "}}")
 
     nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
     nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
@@ -654,7 +865,7 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       P.label "block eval (syntax: a fenced code block)" $
         evalUnison <|> exampleBlock <|> other
       where
-        evalUnison = wrap "syntax.docEval" $ do
+        evalUnison = fmap (wrap' . DocEval) $ do
           -- commit after seeing that ``` is on its own line
           fence <- P.try $ do
             fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
@@ -665,7 +876,7 @@ docBody = join <$> P.many (sectionElem <* CP.space)
               (\env -> env {inLayout = True, opening = Just "docEval"})
               (restoreStack "docEval" $ lexemes' ([] <$ lit fence))
 
-        exampleBlock = wrap "syntax.docExampleBlock" $ do
+        exampleBlock = fmap (wrap' . DocExampleBlock) $ do
           void $ lit "@typecheck" <* CP.space
           fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
           local
@@ -682,20 +893,20 @@ docBody = join <$> P.many (sectionElem <* CP.space)
               skip _ s = s
            in List.intercalate "\n" $ skip column <$> lines s
 
-        other = wrap "syntax.docCodeBlock" $ do
+        other = fmap (uncurry $ wrapSimple2 DocCodeBlock) $ do
           column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
           let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
           fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
           name <-
             P.takeWhileP Nothing nonNewlineSpace
-              *> tok (Textual <$> P.takeWhile1P Nothing (not . isSpace))
+              *> tokenP (P.takeWhile1P Nothing (not . isSpace))
               <* P.takeWhileP Nothing nonNewlineSpace
           _ <- void CP.eol
           verbatim <-
-            tok $
-              Textual . uncolumn column tabWidth . trimAroundDelimiters
+            tokenP $
+              uncolumn column tabWidth . trimAroundDelimiters
                 <$> P.someTill P.anySingle ([] <$ lit fence)
-          pure (name <> verbatim)
+          pure (name, verbatim)
 
     boldOrItalicOrStrikethrough closing = do
       let start =
@@ -705,30 +916,29 @@ docBody = join <$> P.many (sectionElem <* CP.space)
                 (P.satisfy (== '~'))
           name s =
             if take 1 s == "~"
-              then "syntax.docStrikethrough"
-              else if take 1 s == "*" then "syntax.docBold" else "syntax.docItalic"
+              then DocStrikethrough
+              else if take 1 s == "*" then DocBold else DocItalic
       end <- P.try $ do
         end <- start
         P.lookAhead (P.satisfy (not . isSpace))
         pure end
-      wrap (name end) . wrap "syntax.docParagraph" $
-        join
-          <$> P.someTill
-            (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
-            (lit end)
+      name end . wrap' . DocParagraph
+        <$> someTill'
+          (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
+          (lit end)
 
     externalLink =
       P.label "hyperlink (example: [link name](https://destination.com))" $
-        wrap "syntax.docNamedLink" $ do
+        fmap (uncurry DocNamedLink) $ do
           _ <- lit "["
           p <- leafies (void $ char ']')
           _ <- lit "]"
           _ <- lit "("
           target <-
-            wrap "syntax.docGroup" . wrap "syntax.docJoin" $
-              link <|> fmap join (P.some (expr <|> wordy (char ')')))
+            fmap (DocGroup . DocJoin) $
+              fmap pure link <|> some' (expr <|> wordy (char ')'))
           _ <- lit ")"
-          pure (p <> target)
+          pure (p, target)
 
     -- newline = P.optional (lit "\r") *> lit "\n"
 
@@ -742,15 +952,15 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       where
         ok s = length [() | '\n' <- s] < 2
 
-    spaced p = P.some (p <* P.optional sp)
-    leafies close = wrap "syntax.docParagraph" $ join <$> spaced (leafy close)
+    spaced p = some' (p <* P.optional sp)
+    leafies close = wrap' . DocParagraph <$> spaced (leafy close)
 
     list = bulletedList <|> numberedList
 
-    bulletedList = wrap "syntax.docBulletedList" $ join <$> P.sepBy1 bullet listSep
-    numberedList = wrap "syntax.docNumberedList" $ join <$> P.sepBy1 numberedItem listSep
+    bulletedList = wrap' . DocBulletedList <$> sepBy1' bullet listSep
+    numberedList = wrap' . DocNumberedList <$> sepBy1' numberedItem listSep
 
-    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (bulletedStart <|> numberedStart)
+    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
 
     bulletedStart = P.try $ do
       r <- listItemStart' $ [] <$ P.satisfy bulletChar
@@ -759,6 +969,7 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       where
         bulletChar ch = ch == '*' || ch == '-' || ch == '+'
 
+    listItemStart' :: P a -> P (Int, a)
     listItemStart' gutter = P.try $ do
       nonNewlineSpaces
       col <- column <$> posP
@@ -767,14 +978,11 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       (col,) <$> gutter
 
     numberedStart =
-      listItemStart' $ P.try (tok . fmap num $ LP.decimal <* lit ".")
-      where
-        num :: Word -> Lexeme
-        num n = Numeric (show n)
+      listItemStart' $ P.try (tokenP $ LP.decimal <* lit ".")
 
-    listItemParagraph = wrap "syntax.docParagraph" $ do
+    listItemParagraph = fmap (wrap' . DocParagraph) $ do
       col <- column <$> posP
-      join <$> P.some (leaf <* sep col)
+      some' (leaf <* sep col)
       where
         -- Trickiness here to support hard line breaks inside of
         -- a bulleted list, so for instance this parses as expected:
@@ -792,29 +1000,29 @@ docBody = join <$> P.many (sectionElem <* CP.space)
                 *> do
                   col2 <- column <$> posP
                   guard $ col2 >= col
-                  (P.notFollowedBy $ numberedStart <|> bulletedStart)
+                  (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
           pure ()
 
     numberedItem = P.label msg $ do
       (col, s) <- numberedStart
-      pure s
-        <+> ( wrap "syntax.docColumn" $ do
+      (s,)
+        <$> ( fmap (uncurry DocColumn) $ do
                 p <- nonNewlineSpaces *> listItemParagraph
                 subList <-
                   local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
-                pure (p <> fromMaybe [] subList)
+                pure (p, subList)
             )
       where
         msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
 
-    bullet = wrap "syntax.docColumn" . P.label "bullet (examples: * item1, - item2)" $ do
+    bullet = fmap (uncurry DocColumn) . P.label "bullet (examples: * item1, - item2)" $ do
       (col, _) <- bulletedStart
       p <- nonNewlineSpaces *> listItemParagraph
       subList <-
         local
           (\e -> e {parentListColumn = col})
           (P.optional $ listSep *> list)
-      pure (p <> fromMaybe [] subList)
+      pure (p, subList)
 
     newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
 
@@ -828,8 +1036,8 @@ docBody = join <$> P.many (sectionElem <* CP.space)
     -- A paragraph under this subsection.
 
     -- # A section title (not a subsection)
-    section :: P [Token Lexeme]
-    section = wrap "syntax.docSection" $ do
+    section :: P DocTree
+    section = fmap (wrap' . uncurry DocSection) $ do
       ns <- S.gets parentSections
       hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
       title <- paragraph <* CP.space
@@ -837,19 +1045,13 @@ docBody = join <$> P.many (sectionElem <* CP.space)
       body <-
         local (\env -> env {parentSections = (m : (tail ns))}) $
           P.many (sectionElem <* CP.space)
-      pure $ title <> join body
+      pure $ (title, body)
 
-    wrap :: String -> P [Token Lexeme] -> P [Token Lexeme]
-    wrap o p = do
-      start <- posP
-      lexemes <- p
-      pure $ go start lexemes
-      where
-        go start [] = [Token (Open o) start start, Token Close start start]
-        go start ts@(Token _ x _ : _) =
-          Token (Open o) start x : (ts ++ [Token Close (end final) (end final)])
-          where
-            final = last ts
+    wrap' :: DocTop DocTree -> DocTree
+    wrap' doc = ann doc :< doc
+
+    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> DocTop DocTree) -> a -> b -> DocTree
+    wrapSimple2 fn a b = ann a <> ann b :< fn a b
 
 lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
 lexemes' eof =
@@ -1289,12 +1491,13 @@ identifierP = do
 --   .foo.++.doc
 --   `.`.`..`     (This is a two-segment identifier without a leading dot: "." then "..")
 identifierLexemeP :: P Lexeme
-identifierLexemeP = do
-  name <- identifierP
-  pure
-    if Name.isSymboly (HQ'.toName name)
-      then SymbolyId name
-      else WordyId name
+identifierLexemeP = identifierLexeme <$> identifierP
+
+identifierLexeme :: HQ'.HashQualified Name -> Lexeme
+identifierLexeme name =
+  if Name.isSymboly (HQ'.toName name)
+    then SymbolyId name
+    else WordyId name
 
 wordyIdSegP :: P NameSegment
 wordyIdSegP =
diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal
index 888982134f..4b097e6021 100644
--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@@ -69,6 +69,7 @@ library
     , containers
     , cryptonite
     , extra
+    , free
     , lens
     , megaparsec
     , mtl
@@ -127,6 +128,7 @@ test-suite syntax-tests
     , cryptonite
     , easytest
     , extra
+    , free
     , lens
     , megaparsec
     , mtl

From 227ff27cea21af3a636bb94d0ccadc80fcd37d9f Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Fri, 5 Jul 2024 14:15:05 -0600
Subject: [PATCH 05/22] =?UTF-8?q?Don=E2=80=99t=20=E2=80=9Cun-parse?=
 =?UTF-8?q?=E2=80=9D=20`Doc`.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This removes the layer that makes the `Doc` parser look like a lexer and
replaces it with a function that converts the Doc structure directly
Unison Terms.
---
 .../src/Unison/Syntax/TermParser.hs           | 301 +++++++++---------
 unison-syntax/src/Unison/Syntax/Lexer.hs      | 103 ++----
 unison-syntax/src/Unison/Syntax/Parser.hs     |   9 +-
 3 files changed, 178 insertions(+), 235 deletions(-)

diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 635a974d89..1f1dda24c1 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -12,6 +12,7 @@ module Unison.Syntax.TermParser
   )
 where
 
+import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.Reader (asks, local)
 import Data.Char qualified as Char
 import Data.Foldable (foldrM)
@@ -24,6 +25,7 @@ import Data.Sequence qualified as Sequence
 import Data.Set qualified as Set
 import Data.Text qualified as Text
 import Data.Tuple.Extra qualified as TupleE
+import Data.Void (vacuous)
 import Text.Megaparsec qualified as P
 import U.Core.ABT qualified as ABT
 import Unison.ABT qualified as ABT
@@ -38,7 +40,7 @@ import Unison.NameSegment qualified as NameSegment
 import Unison.Names (Names)
 import Unison.Names qualified as Names
 import Unison.NamesWithHistory qualified as Names
-import Unison.Parser.Ann (Ann)
+import Unison.Parser.Ann (Ann (Ann))
 import Unison.Parser.Ann qualified as Ann
 import Unison.Pattern (Pattern)
 import Unison.Pattern qualified as Pattern
@@ -113,8 +115,10 @@ rewriteBlock = do
       pure (DD.rewriteType (ann kw <> ann rhs) (L.payload <$> vs) lhs rhs)
 
 typeLink' :: (Monad m, Var v) => P v m (L.Token Reference)
-typeLink' = do
-  id <- hqPrefixId
+typeLink' = findUniqueType =<< hqPrefixId
+
+findUniqueType :: (Monad m, Var v) => L.Token (HQ.HashQualified Name) -> P v m (L.Token Reference)
+findUniqueType id = do
   ns <- asks names
   case Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns of
     s
@@ -434,7 +438,7 @@ resolveHashQualified tok = do
   names <- asks names
   case L.payload tok of
     HQ.NameOnly n -> pure $ Term.var (ann tok) (Name.toVar n)
-    _ -> case Names.lookupHQTerm Names.IncludeSuffixes (L.payload tok) names of
+    hqn -> case Names.lookupHQTerm Names.IncludeSuffixes hqn names of
       s
         | Set.null s -> failCommitted $ UnknownTerm tok s
         | Set.size s > 1 -> failCommitted $ UnknownTerm tok s
@@ -461,160 +465,155 @@ termLeaf =
       doc2Block <&> \(spanAnn, trm) -> trm {ABT.annotation = ABT.annotation trm <> spanAnn}
     ]
 
--- Syntax for documentation v2 blocks, which are surrounded by {{ }}.
+-- | Gives a parser an explicit stream to parse, so that it consumes nothing from the original stream when it runs.
+--
+--   This is used inside the `Doc` -> `Term` conversion, where we have chunks of Unison code embedded that need to be
+--   parsed. It’s a consequence of parsing Doc in the midst of the Unison lexer.
+subParse :: (Ord v, Monad m) => P v m a -> [L.Token L.Lexeme] -> P v m a
+subParse p toks = do
+  orig <- P.getInput
+  P.setInput $ Input toks
+  result <- p <* P.eof
+  P.setInput orig
+  pure result
+
+-- | Syntax for documentation v2 blocks, which are surrounded by @{{@ @}}@.
 -- The lexer does most of the heavy lifting so there's not a lot for
 -- the parser to do. For instance, in
 --
---   {{
---   Hi there!
---
---   goodbye.
---   }}
+-- > {{
+-- > Hi there!
+-- >
+-- > goodbye.
+-- > }}
 --
 -- the lexer will produce:
 --
--- [Open "syntax.docUntitledSection",
---    Open "syntax.docParagraph",
---      Open "syntax.docWord", Textual "Hi", Close,
---      Open "syntax.docWord", Textual "there!", Close,
---    Close
---    Open "syntax.docParagraph",
---      Open "syntax.docWord", Textual "goodbye", Close,
---    Close
---  Close]
+-- > [ Doc
+-- >   ( DocUntitledSection
+-- >     (DocParagraph (DocWord "Hi" :| [DocWord "there!"]))
+-- >     (DocParagraph (DocWord "goodbye" :| []))
+-- >   )
+-- > ]
 --
 -- The parser will parse this into the Unison expression:
 --
---   syntax.docUntitledSection [
---     syntax.docParagraph [syntax.docWord "Hi", syntax.docWord "there!"],
---     syntax.docParagraph [syntax.docWord "goodbye"]
---   ]
+-- > syntax.docUntitledSection [
+-- >   syntax.docParagraph [syntax.docWord "Hi", syntax.docWord "there!"],
+-- >   syntax.docParagraph [syntax.docWord "goodbye"]
+-- > ]
 --
--- Where `syntax.doc{Paragraph, UntitledSection,...}` are all ordinary term
+-- Where @syntax.doc{Paragraph, UntitledSection,...}@ are all ordinary term
 -- variables that will be looked up in the environment like anything else. This
 -- means that the documentation syntax can have its meaning changed by
--- overriding what functions the names `syntax.doc*` correspond to.
+-- overriding what functions the names @syntax.doc*@ correspond to.
 doc2Block :: forall m v. (Monad m, Var v) => P v m (Ann {- Annotation for the whole spanning block -}, Term v Ann)
 doc2Block = do
-  P.lookAhead (openBlockWith "syntax.docUntitledSection") *> elem
+  L.Token docContents startDoc endDoc <- doc
+  let docAnn = Ann startDoc endDoc
+  (docAnn,) . docUntitledSection (gann docAnn) <$> traverse (cata $ docTop <=< sequenceA) docContents
   where
-    -- For terms which aren't blocks the spanning annotation is the same as the
-    -- term annotation.
-    selfAnnotated :: Term v Ann -> (Ann, Term v Ann)
-    selfAnnotated t = (ann t, t)
-    elem :: P v m (Ann {- Annotation for the whole spanning block -}, Term v Ann)
-    elem =
-      (selfAnnotated <$> text) <|> do
-        startTok <- openBlock
-        let -- here, `t` will be something like `Open "syntax.docWord"`
-            -- so `f` will be a term var with the name "syntax.docWord".
-            f = f' startTok
-            f' t = Term.var (ann t) (Var.nameds (L.payload t))
-
-            -- follows are some common syntactic forms used for parsing child elements
-
-            -- regular is parsed into `f child1 child2 child3` for however many children
-            regular = do
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f cs
-              pure (ann startTok <> ann endTok, trm)
-
-            -- variadic is parsed into: `f [child1, child2, ...]`
-            variadic = variadic' f
-            variadic' f = do
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f [Term.list (ann cs) cs]
-              pure (ann startTok <> ann endTok, trm)
-
-            -- sectionLike is parsed into: `f tm [child1, child2, ...]`
-            sectionLike = do
-              arg1 <- (snd <$> elem)
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f [arg1, Term.list (ann cs) cs]
-              pure (ann startTok <> ann endTok, trm)
-
-            evalLike wrap = do
-              tm <- term
-              endTok <- closeBlock
-              let trm = Term.apps' f [wrap tm]
-              pure (ann startTok <> ann endTok, trm)
-
-            -- converts `tm` to `'tm`
-            --
-            -- Embedded examples like ``1 + 1`` are represented as terms,
-            -- but are wrapped in delays so they are left unevaluated for the
-            -- code which renders documents. (We want the doc display to get
-            -- the unevaluated expression `1 + 1` and not `2`)
-            addDelay tm = Term.delay (ann tm) tm
-        case L.payload startTok of
-          "syntax.docJoin" -> variadic
-          "syntax.docUntitledSection" -> variadic
-          "syntax.docColumn" -> variadic
-          "syntax.docParagraph" -> variadic
-          "syntax.docSignature" -> variadic
-          "syntax.docSource" -> variadic
-          "syntax.docFoldedSource" -> variadic
-          "syntax.docBulletedList" -> variadic
-          "syntax.docSourceAnnotations" -> variadic
-          "syntax.docSourceElement" -> do
-            link <- (snd <$> elem)
-            anns <- P.optional $ reserved "@" *> (snd <$> elem)
-            endTok <- closeBlock
-            let trm = Term.apps' f [link, fromMaybe (Term.list (ann link) mempty) anns]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docNumberedList" -> do
-            nitems@((n, _) : _) <- P.some nitem
-            endTok <- closeBlock
-            let items = snd <$> nitems
-            let trm = Term.apps' f [n, Term.list (ann items) items]
-            pure (ann startTok <> ann endTok, trm)
-            where
-              nitem = do
-                n <- number
-                t <- openBlockWith "syntax.docColumn"
-                let f = f' ("syntax.docColumn" <$ t)
-                (_spanAnn, child) <- variadic' f
-                pure (n, child)
-          "syntax.docSection" -> sectionLike
-          -- @source{ type Blah, foo, type Bar }
-          "syntax.docEmbedTermLink" -> do
-            tm <- addDelay <$> (hashQualifiedPrefixTerm <|> hashQualifiedInfixTerm)
-            endTok <- closeBlock
-            let trm = Term.apps' f [tm]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docEmbedSignatureLink" -> do
-            tm <- addDelay <$> (hashQualifiedPrefixTerm <|> hashQualifiedInfixTerm)
-            endTok <- closeBlock
-            let trm = Term.apps' f [tm]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docEmbedTypeLink" -> do
-            r <- typeLink'
-            endTok <- closeBlock
-            let trm = Term.apps' f [Term.typeLink (ann r) (L.payload r)]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docExample" -> do
-            trm <- term
-            endTok <- closeBlock
-            let spanAnn = ann startTok <> ann endTok
-            pure . (spanAnn,) $ case trm of
-              tm@(Term.Apps' _ xs) ->
-                let fvs = List.Extra.nubOrd $ concatMap (toList . Term.freeVars) xs
-                    n = Term.nat (ann tm) (fromIntegral (length fvs))
-                    lam = addDelay $ Term.lam' (ann tm) ((Ann.GeneratedFrom spanAnn,) <$> fvs) tm
-                 in Term.apps' f [n, lam]
-              tm -> Term.apps' f [Term.nat (ann tm) 0, addDelay tm]
-          "syntax.docTransclude" -> evalLike id
-          "syntax.docEvalInline" -> evalLike addDelay
-          "syntax.docExampleBlock" -> do
-            (spanAnn, tm) <- block'' False True "syntax.docExampleBlock" (pure (void startTok)) closeBlock
-            pure $ (spanAnn, Term.apps' f [Term.nat (ann tm) 0, addDelay tm])
-          "syntax.docEval" -> do
-            (spanAnn, tm) <- block' False "syntax.docEval" (pure (void startTok)) closeBlock
-            pure $ (spanAnn, Term.apps' f [addDelay tm])
-          _ -> regular
+    cata :: (Functor f) => (f a -> a) -> Cofree f x -> a
+    cata fn (_ :< fx) = fn $ cata fn <$> fx
+
+    gann :: (Annotated a) => a -> Ann
+    gann = Ann.GeneratedFrom . ann
+
+    addDelay :: Term v Ann -> Term v Ann
+    addDelay tm = Term.delay (ann tm) tm
+
+    f :: (Annotated a) => a -> String -> Term v Ann
+    f a = Term.var (gann a) . Var.nameds . ("syntax.doc" <>)
+
+    docUntitledSection :: Ann -> L.DocUntitledSection (Term v Ann) -> Term v Ann
+    docUntitledSection ann (L.DocUntitledSection tops) =
+      Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops
+
+    docTop :: L.DocTop (Term v Ann) -> TermP v m
+    docTop d = case d of
+      L.DocSection title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
+      L.DocEval code ->
+        Term.app (gann d) (f d "Eval") . addDelay . snd
+          <$> subParse (block' False False "syntax.docEval" (pure $ pure ()) $ Ann.External <$ P.eof) code
+      L.DocExampleBlock code ->
+        Term.apps' (f d "ExampleBlock") . (Term.nat (gann d) 0 :) . pure . addDelay . snd
+          <$> subParse (block' False True "syntax.docExampleBlock" (pure $ pure ()) $ Ann.External <$ P.eof) code
+      L.DocCodeBlock label body ->
+        pure $
+          Term.apps'
+            (f d "CodeBlock")
+            [Term.text (ann label) . Text.pack $ L.payload label, Term.text (ann body) . Text.pack $ L.payload body]
+      L.DocBulletedList items ->
+        pure $ Term.app (gann d) (f d "BulletedList") . Term.list (gann items) . toList $ docColumn <$> items
+      L.DocNumberedList items@((n, _) :| _) ->
+        pure $
+          Term.apps'
+            (f d "NumberedList")
+            [Term.nat (ann d) $ L.payload n, Term.list (gann $ snd <$> items) . toList $ docColumn . snd <$> items]
+      L.DocParagraph leaves ->
+        Term.app (gann d) (f d "Paragraph") . Term.list (ann leaves) . toList <$> traverse docLeaf leaves
+
+    docColumn :: L.DocColumn (Term v Ann) -> Term v Ann
+    docColumn d@(L.DocColumn para sublist) =
+      Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist
+
+    docLeaf :: L.DocLeaf (Term v Ann) -> TermP v m
+    docLeaf d = case d of
+      L.DocLink link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
+      L.DocNamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
+      L.DocExample code -> do
+        trm <- subParse term code
+        pure . Term.apps' (f d "Example") $ case trm of
+          tm@(Term.Apps' _ xs) ->
+            let fvs = List.Extra.nubOrd $ concatMap (toList . Term.freeVars) xs
+                n = Term.nat (ann tm) (fromIntegral (length fvs))
+                lam = addDelay $ Term.lam' (ann tm) ((mempty,) <$> fvs) tm
+             in [n, lam]
+          tm -> [Term.nat (ann tm) 0, addDelay tm]
+      L.DocTransclude code -> Term.app (gann d) (f d "Transclude") <$> subParse term code
+      L.DocBold para -> pure $ Term.app (gann d) (f d "Bold") para
+      L.DocItalic para -> pure $ Term.app (gann d) (f d "Italic") para
+      L.DocStrikethrough para -> pure $ Term.app (gann d) (f d "Strikethrough") para
+      L.DocVerbatim leaf -> Term.app (gann d) (f d "Verbatim") <$> docLeaf (vacuous leaf)
+      L.DocCode leaf -> Term.app (gann d) (f d "Code") <$> docLeaf (vacuous leaf)
+      L.DocSource elems ->
+        Term.app (gann d) (f d "Source") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
+      L.DocFoldedSource elems ->
+        Term.app (gann d) (f d "FoldedSource") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
+      L.DocEvalInline code -> Term.app (gann d) (f d "EvalInline") . addDelay <$> subParse term code
+      L.DocSignature links ->
+        Term.app (gann d) (f d "Signature") . Term.list (ann links) . toList <$> traverse docEmbedSignatureLink links
+      L.DocSignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink link
+      L.DocWord txt -> pure . Term.app (gann d) (f d "Word") . Term.text (ann txt) . Text.pack $ L.payload txt
+      L.DocGroup (L.DocJoin leaves) ->
+        Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList
+          <$> traverse docLeaf leaves
+
+    docEmbedLink :: L.DocEmbedLink -> TermP v m
+    docEmbedLink d = case d of
+      L.DocEmbedTypeLink ident ->
+        Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload
+          <$> findUniqueType (HQ'.toHQ <$> ident)
+      L.DocEmbedTermLink ident ->
+        Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
+
+    docSourceElement :: L.DocSourceElement -> TermP v m
+    docSourceElement d@(L.DocSourceElement link anns) = do
+      link' <- docEmbedLink link
+      anns' <- traverse docEmbedAnnotation anns
+      pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns']
+
+    docEmbedSignatureLink :: L.DocEmbedSignatureLink -> TermP v m
+    docEmbedSignatureLink d@(L.DocEmbedSignatureLink ident) =
+      Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
+
+    docEmbedAnnotation :: L.DocEmbedAnnotation -> TermP v m
+    docEmbedAnnotation d@(L.DocEmbedAnnotation a) =
+      -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
+      -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
+      -- produced for an identifier and the lexemes consumed for text line up. So, I think this is a bugfix I can’t
+      -- avoid.
+      Term.app (gann d) (f d "EmbedAnnotation") <$> either (resolveHashQualified . fmap HQ'.toHQ) (docLeaf . vacuous) a
 
 docBlock :: (Monad m, Var v) => TermP v m
 docBlock = do
@@ -1143,7 +1142,7 @@ customFailure :: (P.MonadParsec e s m) => e -> m a
 customFailure = P.customFailure
 
 block :: forall m v. (Monad m, Var v) => String -> P v m (Ann, Term v Ann)
-block s = block' False s (openBlockWith s) closeBlock
+block s = block' False False s (openBlockWith s) closeBlock
 
 -- example: use Foo.bar.Baz + ++ x
 -- + ++ and x are called the "suffixes" of the `use` statement, and
@@ -1213,24 +1212,16 @@ substImports ns imports =
       ]
 
 block' ::
-  (Monad m, Var v) =>
-  IsTop ->
-  String ->
-  P v m (L.Token ()) ->
-  P v m (L.Token ()) ->
-  P v m (Ann {- ann which spans the whole block -}, Term v Ann)
-block' isTop = block'' isTop False
-
-block'' ::
   forall m v end.
   (Monad m, Var v, Annotated end) =>
   IsTop ->
-  Bool -> -- `True` means insert `()` at end of block if it ends with a statement
+  -- | `True` means insert `()` at end of block if it ends with a statement
+  Bool ->
   String ->
   P v m (L.Token ()) ->
   P v m end ->
   P v m (Ann {- ann which spans the whole block -}, Term v Ann)
-block'' isTop implicitUnitAtEnd s openBlock closeBlock = do
+block' isTop implicitUnitAtEnd s openBlock closeBlock = do
   open <- openBlock
   (names, imports) <- imports
   _ <- optional semi
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index fd27118050..3c5041cf6f 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -42,6 +42,7 @@ import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.State qualified as S
 import Data.Char (isAlphaNum, isControl, isDigit, isSpace, ord, toLower)
 import Data.Foldable qualified as Foldable
+import Data.Functor.Classes
 import Data.List qualified as List
 import Data.List.Extra qualified as List
 import Data.List.NonEmpty (NonEmpty ((:|)))
@@ -50,7 +51,6 @@ import Data.List.NonEmpty qualified as NonEmpty
 import Data.Map.Strict qualified as Map
 import Data.Set qualified as Set
 import Data.Text qualified as Text
-import Data.Void (vacuous)
 import GHC.Exts (sortWith)
 import Text.Megaparsec qualified as P
 import Text.Megaparsec.Char (char)
@@ -65,7 +65,7 @@ import Unison.Name qualified as Name
 import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
 import Unison.NameSegment.Internal qualified as NameSegment
-import Unison.Parser.Ann (Ann (Ann, GeneratedFrom), Annotated (..))
+import Unison.Parser.Ann (Ann (Ann), Annotated (..))
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as SH
@@ -158,6 +158,7 @@ data Lexeme
   | Bytes Bytes.Bytes -- bytes literals
   | Hash ShortHash -- hash literals
   | Err Err
+  | Doc (DocUntitledSection DocTree)
   deriving stock (Eq, Show, Ord)
 
 type IsVirtual = Bool -- is it a virtual semi or an actual semi?
@@ -389,6 +390,7 @@ displayLexeme = \case
   Bytes _b -> "bytes literal"
   Hash h -> Text.unpack (SH.toText h)
   Err e -> show e
+  Doc _ -> "doc structure"
 
 infixl 2 <+>
 
@@ -436,7 +438,7 @@ doc2 = do
   CP.space
   env0 <- S.get
   -- Disable layout while parsing the doc block and reset the section number
-  (docToks, closeTok) <- local
+  (docTok, closeTok) <- local
     ( \env ->
         env
           { inLayout = False,
@@ -444,16 +446,18 @@ doc2 = do
           }
     )
     do
-      bodyToks <- docBody (lit "}}")
+      body <- docBody (lit "}}")
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
-      pure (docToLexemes (openStart, closeEnd) bodyToks, Token Close closeStart closeEnd)
+      pure (Token (Doc body) openStart closeEnd, Token Close closeStart closeEnd)
   -- Parse any layout tokens after the doc block, e.g. virtual semicolon
   endToks <- token' ignore (pure ())
   -- Hack to allow anonymous doc blocks before type decls
   --   {{ Some docs }}             Foo.doc = {{ Some docs }}
   --   ability Foo where      =>   ability Foo where
+  --
+  -- __FIXME__: This should be done _after_ parsing, not in lexing.
   tn <- subsequentTypeName
   pure $
     beforeStartToks <> case (tn) of
@@ -462,12 +466,13 @@ doc2 = do
         | isTopLevel ->
             Token (WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment))) openStart openEnd
               : Token (Open "=") openStart openEnd
-              : docToks
-                -- We need an extra 'Close' here because we added an extra Open above.
-                <> (closeTok : endToks)
+              : docTok
+              -- We need an extra 'Close' here because we added an extra Open above.
+              : closeTok
+              : endToks
         where
           isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
-      _ -> docToks <> endToks
+      _ -> docTok : endToks
   where
     -- DUPLICATED
     wordyKw kw = separated wordySep (lit kw)
@@ -527,6 +532,15 @@ data DocTop a
   | DocParagraph (NonEmpty (DocLeaf a))
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
+instance Eq1 DocTop where
+  liftEq _ _ _ = True
+
+instance Ord1 DocTop where
+  liftCompare _ _ _ = LT
+
+instance Show1 DocTop where
+  liftShowsPrec _ _ _ _ x = x
+
 data DocColumn a
   = -- | The first is always a Paragraph, and the second a Bulleted or Numbered List
     DocColumn a (Maybe a)
@@ -625,76 +639,6 @@ instance Annotated DocEmbedSignatureLink where
 instance Annotated DocEmbedAnnotation where
   ann (DocEmbedAnnotation a) = either ann ann a
 
--- | This is a short-term hack to turn our parse tree back into the sequence of lexemes the current parser expects.
---
---   The medium-term solution is to preserve @[`DocTree`]@ as its own lexeme type, and hand it to the parser without
---   flattening it back to tokens. Longer-term, maybe we add a real lexer for @Doc@, and then whatever is left of this
---   parser moves into the actual parser.
-docToLexemes :: (Pos, Pos) -> DocUntitledSection DocTree -> [Token Lexeme]
-docToLexemes (startDoc, endDoc) (DocUntitledSection tops) =
-  Token (Open "syntax.docUntitledSection") startDoc startDoc
-    : concatMap cata tops <> pure (Token Close endDoc endDoc)
-  where
-    wrap :: Ann -> String -> [Token Lexeme] -> [Token Lexeme]
-    wrap ann suffix lexemes = go (extractStart ann) lexemes
-      where
-        extractStart = \case
-          Ann start _ -> start
-          GeneratedFrom a -> extractStart a
-          a -> error $ "expected a good Pos! Got: " <> show a
-        o = "syntax.doc" <> suffix
-        go start [] = [Token (Open o) start start, Token Close start start]
-        go start ts@(Token _ x _ : _) =
-          Token (Open o) start x : (ts ++ [Token Close (end final) (end final)])
-          where
-            final = last ts
-    cata :: DocTree -> [Token Lexeme]
-    cata (a :< top) = docTop a $ cata <$> top
-    docTop start = \case
-      DocSection title body -> wrap start "Section" $ title <> join body
-      DocEval code -> wrap start "Eval" code
-      DocExampleBlock code -> wrap start "ExampleBlock" code
-      DocCodeBlock label text -> wrap start "CodeBlock" [Textual <$> label, Textual <$> text]
-      DocBulletedList items -> wrap start "BulletedList" . concat $ (\col -> docColumn (ann col) col) <$> items
-      DocNumberedList items ->
-        wrap start "NumberedList" . concat $
-          uncurry (:) . bimap (Numeric . show <$>) (\col -> docColumn (ann col) col)
-            <$> items
-      DocParagraph body -> wrap start "Paragraph" . concat $ (\l -> docLeaf (ann l) l) <$> body
-    docColumn start (DocColumn para mlist) = wrap start "Column" $ foldr (flip (<>)) para mlist
-    docLeaf start = \case
-      DocLink link -> wrap start "Link" $ docEmbedLink (ann link) link
-      DocNamedLink name target -> wrap start "NamedLink" $ name <> docLeaf (ann target) (vacuous target)
-      DocExample code -> wrap start "Example" code
-      DocTransclude code -> wrap start "Transclude" code
-      DocBold para -> wrap start "Bold" para
-      DocItalic para -> wrap start "Italic" para
-      DocStrikethrough para -> wrap start "Strikethrough" para
-      DocVerbatim word -> wrap start "Verbatim" . docLeaf (ann word) $ vacuous word
-      DocCode word -> wrap start "Code" . docLeaf (ann word) $ vacuous word
-      DocSource elems -> wrap start "Source" . concat $ (\e -> docSourceElement (ann e) e) <$> elems
-      DocFoldedSource elems -> wrap start "FoldedSource" . concat $ (\e -> docSourceElement (ann e) e) <$> elems
-      DocEvalInline code -> wrap start "EvalInline" code
-      DocSignature links -> wrap start "Signature" . concat $ (\l -> docEmbedSignatureLink (ann l) l) <$> links
-      DocSignatureInline link -> wrap start "SignatureInline" $ docEmbedSignatureLink (ann link) link
-      DocWord text -> wrap start "Word" . pure $ Textual <$> text
-      DocGroup (DocJoin leaves) ->
-        wrap start "Group" . wrap start "Join" . concat $ (\l -> docLeaf (ann l) l) <$> leaves
-    docEmbedLink start = \case
-      DocEmbedTypeLink ident -> wrap start "EmbedTypeLink" . pure $ identifierLexeme <$> ident
-      DocEmbedTermLink ident -> wrap start "EmbedTermLink" . pure $ identifierLexeme <$> ident
-    docSourceElement start (DocSourceElement link anns) =
-      wrap start "SourceElement" $
-        docEmbedLink (ann link) link
-          <> maybe
-            []
-            ((Token (Reserved "@") (Pos 0 0) (Pos 0 0) :) . concatMap (\a -> docEmbedAnnotation (ann a) a))
-            (NonEmpty.nonEmpty anns)
-    docEmbedSignatureLink start (DocEmbedSignatureLink ident) =
-      wrap start "EmbedSignatureLink" . pure $ identifierLexeme <$> ident
-    docEmbedAnnotation start (DocEmbedAnnotation a) =
-      wrap start "EmbedAnnotation" $ either (pure . fmap identifierLexeme) (\l -> docLeaf (ann l) $ vacuous l) a
-
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
 docBody :: P end -> P (DocUntitledSection DocTree)
@@ -1741,6 +1685,7 @@ instance P.VisualStream [Token Lexeme] where
       pretty Close = "<outdent>"
       pretty (Semi True) = "<virtual semicolon>"
       pretty (Semi False) = ";"
+      pretty (Doc d) = show d
       pad (Pos line1 col1) (Pos line2 col2) =
         if line1 == line2
           then replicate (col2 - col1) ' '
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 4945f4347e..733ecc93cf 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -5,7 +5,8 @@ module Unison.Syntax.Parser
   ( Annotated (..),
     Err,
     Error (..),
-    Input,
+    -- FIXME: Don’t export the data constructor
+    Input (..),
     P,
     ParsingEnv (..),
     UniqueName,
@@ -16,6 +17,7 @@ module Unison.Syntax.Parser
     chainr1,
     character,
     closeBlock,
+    doc,
     failCommitted,
     failureIf,
     hqInfixId,
@@ -393,6 +395,11 @@ string = queryToken getString
     getString (L.Textual s) = Just (Text.pack s)
     getString _ = Nothing
 
+doc :: (Ord v) => P v m (L.Token (L.DocUntitledSection L.DocTree))
+doc = queryToken \case
+  L.Doc d -> pure d
+  _ -> Nothing
+
 -- | Parses a tuple of 'a's, or a single parenthesized 'a'
 --
 -- returns the result of combining elements with 'pair', alongside the annotation containing

From 159ea3a433ad18a30af64984673fede127866def Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 22 Jul 2024 15:03:00 -0600
Subject: [PATCH 06/22] Extract `preParse` from `lexer`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After running the core of the lexer, the `lexer` function then does some
work to turn the stream into a tree, and reorder some lexemes. It then
throws away the tree structure.

This is the first step of preserving the tree structure for the parser.
It extracts the “pre-parser” from `lexer` so
that it can eventually be used _after_ the lexer, rather than internally.

This also moves `fixup` to be applied on each block as we reorder it,
rather than across the entire stream at the end (since the goal is to
not _have_ an entire stream any more).
---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 3c5041cf6f..fd48d6abb3 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -1564,7 +1564,7 @@ stanzas = go []
 -- Moves type and ability declarations to the front of the token stream
 -- and move `use` statements to the front of each block
 reorder :: [T (Token Lexeme)] -> [T (Token Lexeme)]
-reorder = join . sortWith f . stanzas
+reorder = foldr fixup [] . join . sortWith f . stanzas
   where
     f [] = 3 :: Int
     f (t0 : _) = case payload $ headToken t0 of
@@ -1572,16 +1572,17 @@ reorder = join . sortWith f . stanzas
       Open typOrA | Set.member (Text.pack typOrA) typeOrAbility -> 1
       Reserved "use" -> 0
       _ -> 3 :: Int
+    -- after reordering can end up with trailing semicolon at the end of
+    -- a block, which we remove with this pass
+    fixup (payload . headToken -> Semi _) [] = []
+    fixup tok tail = tok : tail
+
+-- | This turns the lexeme stream into a tree, reordering some lexeme subsequences.
+preParse :: [Token Lexeme] -> T (Token Lexeme)
+preParse = reorderTree reorder . tree
 
 lexer :: String -> String -> [Token Lexeme]
-lexer scope rem =
-  let t = tree $ lexer0' scope rem
-      -- after reordering can end up with trailing semicolon at the end of
-      -- a block, which we remove with this pass
-      fixup ((payload -> Semi _) : t@(payload -> Close) : tl) = t : fixup tl
-      fixup [] = []
-      fixup (h : t) = h : fixup t
-   in fixup . toList $ reorderTree reorder t
+lexer scope = toList . preParse . lexer0' scope
 
 isDelayOrForce :: Char -> Bool
 isDelayOrForce op = op == '\'' || op == '!'

From 32472bd9e01cb87ac310f375a199e44800c05b9a Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Wed, 24 Jul 2024 19:46:17 -0600
Subject: [PATCH 07/22] Allow EOF to close layout blocks

This removes the need to pad the lexer stream with trailing `Close` lexemes. If
EOF is reached, the parser will automatically close any layout blocks (but not
context-free blocks).
---
 .../src/Unison/Syntax/TermParser.hs           | 25 +++---
 .../reparses-with-same-hash.u                 |  2 +-
 .../transcripts/error-messages.output.md      |  2 +
 unison-syntax/src/Unison/Syntax/Lexer.hs      | 87 ++++++++++---------
 unison-syntax/src/Unison/Syntax/Parser.hs     |  6 ++
 5 files changed, 71 insertions(+), 51 deletions(-)

diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 1f1dda24c1..999d5658ba 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -103,7 +103,7 @@ rewriteBlock = do
     rewriteTermlike kw mk = do
       kw <- quasikeyword kw
       lhs <- term
-      (_spanAnn, rhs) <- block "==>"
+      (_spanAnn, rhs) <- layoutBlock "==>"
       pure (mk (ann kw <> ann rhs) lhs rhs)
     rewriteTerm = rewriteTermlike "term" DD.rewriteTerm
     rewriteCase = rewriteTermlike "case" DD.rewriteCase
@@ -164,13 +164,13 @@ match :: (Monad m, Var v) => TermP v m
 match = do
   start <- openBlockWith "match"
   scrutinee <- term
-  _ <- closeBlock
+  _ <- optionalCloseBlock
   _ <-
     P.try (openBlockWith "with") <|> do
       t <- anyToken
       P.customFailure (ExpectedBlockOpen "with" t)
   (_arities, cases) <- NonEmpty.unzip <$> matchCases1 start
-  _ <- closeBlock
+  _ <- optionalCloseBlock
   pure $
     Term.match
       (ann start <> ann (NonEmpty.last cases))
@@ -212,10 +212,10 @@ matchCase = do
             [ Nothing <$ P.try (quasikeyword "otherwise"),
               Just <$> infixAppOrBooleanOp
             ]
-        (_spanAnn, t) <- block "->"
+        (_spanAnn, t) <- layoutBlock "->"
         pure (guard, t)
   let unguardedBlock = label "case match" do
-        (_spanAnn, t) <- block "->"
+        (_spanAnn, t) <- layoutBlock "->"
         pure (Nothing, t)
   -- a pattern's RHS is either one or more guards, or a single unguarded block.
   guardsAndBlocks <- guardedBlocks <|> (pure @[] <$> unguardedBlock)
@@ -357,10 +357,10 @@ lam p = label "lambda" $ mkLam <$> P.try (some prefixDefinitionName <* reserved
        in Term.lam' (ann (head vs) <> ann b) annotatedArgs b
 
 letBlock, handle, ifthen :: (Monad m, Var v) => TermP v m
-letBlock = label "let" $ (snd <$> block "let")
+letBlock = label "let" $ (snd <$> layoutBlock "let")
 handle = label "handle" do
   (handleSpan, b) <- block "handle"
-  (_withSpan, handler) <- block "with"
+  (_withSpan, handler) <- layoutBlock "with"
   -- We don't use the annotation span from 'with' here because it will
   -- include a dedent if it's at the end of block.
   -- Meaning the newline gets overwritten when pretty-printing and it messes things up.
@@ -377,7 +377,7 @@ lamCase = do
   start <- openBlockWith "cases"
   cases <- matchCases1 start
   (arity, cases) <- checkCasesArities cases
-  _ <- closeBlock
+  _ <- optionalCloseBlock
   lamvars <- replicateM arity (Parser.uniqueName 10)
   let vars =
         Var.named <$> [tweak v i | (v, i) <- lamvars `zip` [(1 :: Int) ..]]
@@ -396,7 +396,7 @@ ifthen = label "if" do
   start <- peekAny
   (_spanAnn, c) <- block "if"
   (_spanAnn, t) <- block "then"
-  (_spanAnn, f) <- block "else"
+  (_spanAnn, f) <- layoutBlock "else"
   pure $ Term.iff (ann start <> ann f) c t f
 
 text :: (Var v) => TermP v m
@@ -987,7 +987,7 @@ delayQuote = P.label "quote" do
 
 delayBlock :: (Monad m, Var v) => P v m (Ann {- Ann spanning the whole block -}, Term v Ann)
 delayBlock = P.label "do" do
-  (spanAnn, b) <- block "do"
+  (spanAnn, b) <- layoutBlock "do"
   let argSpan = (ann b {- would be nice to use the annotation for 'do' here, but it's not terribly important -})
   pure $ (spanAnn, DD.delayTerm (ann b) argSpan b)
 
@@ -1074,7 +1074,7 @@ destructuringBind = do
     let boundVars' = snd <$> boundVars
     _ <- P.lookAhead (openBlockWith "=")
     pure (p, boundVars')
-  (_spanAnn, scrute) <- block "=" -- Dwight K. Scrute ("The People's Scrutinee")
+  (_spanAnn, scrute) <- layoutBlock "=" -- Dwight K. Scrute ("The People's Scrutinee")
   let guard = Nothing
   let absChain vs t = foldr (\v t -> ABT.abs' (ann t) v t) t vs
       thecase t = Term.MatchCase p (fmap (absChain boundVars) guard) $ absChain boundVars t
@@ -1144,6 +1144,9 @@ customFailure = P.customFailure
 block :: forall m v. (Monad m, Var v) => String -> P v m (Ann, Term v Ann)
 block s = block' False False s (openBlockWith s) closeBlock
 
+layoutBlock :: forall m v. (Monad m, Var v) => String -> P v m (Ann, Term v Ann)
+layoutBlock s = block' False False s (openBlockWith s) optionalCloseBlock
+
 -- example: use Foo.bar.Baz + ++ x
 -- + ++ and x are called the "suffixes" of the `use` statement, and
 -- `Foo.bar.Baz` is called the prefix. A `use` statement has the effect
diff --git a/unison-src/transcripts-round-trip/reparses-with-same-hash.u b/unison-src/transcripts-round-trip/reparses-with-same-hash.u
index 98fbe28a57..5d75eff442 100644
--- a/unison-src/transcripts-round-trip/reparses-with-same-hash.u
+++ b/unison-src/transcripts-round-trip/reparses-with-same-hash.u
@@ -542,7 +542,7 @@ fix_4384d = {{ {{ docExampleBlock 0 '[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
 fix_4384e =
   id : x -> x
   id x = x
-  {{ {{ docExampleBlock 0 (id id id id id id id id id id id id id id id id id id id id id (x -> 0) }} }}
+  {{ {{ docExampleBlock 0 (id id id id id id id id id id id id id id id id id id id id id (x -> 0)) }} }}
 
 fnApplicationSyntax = 
   Environment.default = do 1 + 1
diff --git a/unison-src/transcripts/error-messages.output.md b/unison-src/transcripts/error-messages.output.md
index 0b3e334aa6..03e7e652ac 100644
--- a/unison-src/transcripts/error-messages.output.md
+++ b/unison-src/transcripts/error-messages.output.md
@@ -290,6 +290,7 @@ x = match Some a with
   I was surprised to find a -> here.
   I was expecting one of these instead:
   
+  * end of input
   * newline or semicolon
 
 ```
@@ -312,6 +313,7 @@ x = match Some a with
   I was surprised to find a '|' here.
   I was expecting one of these instead:
   
+  * end of input
   * newline or semicolon
 
 ```
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index fd48d6abb3..2ca3dc3738 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -288,9 +288,9 @@ showErrorFancy = \case
         GT -> "greater than "
   P.ErrorCustom a -> P.showErrorComponent a
 
-lexer0' :: String -> String -> [Token Lexeme]
-lexer0' scope rem =
-  case flip S.evalState env0 $ P.runParserT lexemes scope rem of
+lexer :: String -> String -> [Token Lexeme]
+lexer scope rem =
+  case flip S.evalState env0 $ P.runParserT (lexemes eof) scope rem of
     Left e ->
       let errsWithSourcePos =
             fst $
@@ -326,8 +326,14 @@ lexer0' scope rem =
                   endPos = startPos & \(Pos l c) -> Pos l (c + errorLength)
                in [Token (Err err) startPos endPos]
        in errsWithSourcePos >>= errorToTokens
-    Right ts -> Token (Open scope) topLeftCorner topLeftCorner : tweak ts
+    Right ts -> postLex $ Token (Open scope) topLeftCorner topLeftCorner : ts
   where
+    eof :: P [Token Lexeme]
+    eof = P.try do
+      p <- P.eof >> posP
+      n <- maybe 0 (const 1) <$> S.gets opening
+      l <- S.gets layout
+      pure $ replicate (length l + n) (Token Close p p)
     errorItemToString :: EP.ErrorItem Char -> String
     errorItemToString = \case
       (P.Tokens ts) -> Foldable.toList ts
@@ -336,28 +342,31 @@ lexer0' scope rem =
     customErrs es = [Err <$> e | P.ErrorCustom e <- toList es]
     toPos (P.SourcePos _ line col) = Pos (P.unPos line) (P.unPos col)
     env0 = ParsingEnv [] (Just scope) True [0] 0
-    -- hacky postprocessing pass to do some cleanup of stuff that's annoying to
-    -- fix without adding more state to the lexer:
-    --   - 1+1 lexes as [1, +1], convert this to [1, +, 1]
-    --   - when a semi followed by a virtual semi, drop the virtual, lets you
-    --     write
-    --       foo x = action1;
-    --               2
-    --   - semi immediately after first Open is ignored
-    tweak [] = []
-    tweak (h@(payload -> Semi False) : (payload -> Semi True) : t) = h : tweak t
-    tweak (h@(payload -> Reserved _) : t) = h : tweak t
-    tweak (t1 : t2@(payload -> Numeric num) : rem)
-      | notLayout t1 && touches t1 t2 && isSigned num =
-          t1
-            : Token
-              (SymbolyId (HQ'.fromName (Name.unsafeParseText (Text.pack (take 1 num)))))
-              (start t2)
-              (inc $ start t2)
-            : Token (Numeric (drop 1 num)) (inc $ start t2) (end t2)
-            : tweak rem
-    tweak (h : t) = h : tweak t
+
+-- | hacky postprocessing pass to do some cleanup of stuff that's annoying to
+-- fix without adding more state to the lexer:
+--   - 1+1 lexes as [1, +1], convert this to [1, +, 1]
+--   - when a semi followed by a virtual semi, drop the virtual, lets you
+--     write
+--       foo x = action1;
+--               2
+--   - semi immediately after first Open is ignored
+tweak :: (Token Lexeme) -> [Token Lexeme] -> [Token Lexeme]
+tweak h@(Token (Semi False) _ _) (Token (Semi True) _ _ : t) = h : t
+-- __NB__: This case only exists to guard against the following one
+tweak h@(Token (Reserved _) _ _) t = h : t
+tweak t1 (t2@(Token (Numeric num) _ _) : rem)
+  | notLayout t1 && touches t1 t2 && isSigned num =
+      t1
+        : Token
+          (SymbolyId (HQ'.fromName (Name.unsafeParseText (Text.pack (take 1 num)))))
+          (start t2)
+          (inc $ start t2)
+        : Token (Numeric (drop 1 num)) (inc $ start t2) (end t2)
+        : rem
+  where
     isSigned num = all (\ch -> ch == '-' || ch == '+') $ take 1 num
+tweak h t = h : t
 
 formatTrivialError :: Set String -> Set String -> [Char]
 formatTrivialError unexpectedTokens expectedTokens =
@@ -377,7 +386,7 @@ formatTrivialError unexpectedTokens expectedTokens =
 displayLexeme :: Lexeme -> String
 displayLexeme = \case
   Open o -> o
-  Semi True -> "end of section"
+  Semi True -> "end of stanza"
   Semi False -> "semicolon"
   Close -> "end of section"
   Reserved r -> "'" <> r <> "'"
@@ -397,16 +406,6 @@ infixl 2 <+>
 (<+>) :: (Monoid a) => P a -> P a -> P a
 p1 <+> p2 = do a1 <- p1; a2 <- p2; pure (a1 <> a2)
 
-lexemes :: P [Token Lexeme]
-lexemes = lexemes' eof
-  where
-    eof :: P [Token Lexeme]
-    eof = P.try do
-      p <- P.eof >> posP
-      n <- maybe 0 (const 1) <$> S.gets opening
-      l <- S.gets layout
-      pure $ replicate (length l + n) (Token Close p p)
-
 -- Runs the parser `p`, then:
 --   1. resets the layout stack to be what it was before `p`.
 --   2. emits enough closing tokens to reach `lbl` but not pop it.
@@ -998,7 +997,14 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
     wrapSimple2 fn a b = ann a <> ann b :< fn a b
 
 lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
-lexemes' eof =
+lexemes' =
+  -- NB: `postLex` requires the token stream to start with an `Open`, otherwise it can’t create a `T`, so this adds one,
+  --     runs `postLex`, then removes it.
+  fmap (tail . postLex . (Token (Open "fake") mempty mempty :)) . lexemes
+
+-- | Consumes an entire Unison “module”.
+lexemes :: P [Token Lexeme] -> P [Token Lexeme]
+lexemes eof =
   P.optional space >> do
     hd <- join <$> P.manyTill toks (P.lookAhead eof)
     tl <- eof
@@ -1581,8 +1587,11 @@ reorder = foldr fixup [] . join . sortWith f . stanzas
 preParse :: [Token Lexeme] -> T (Token Lexeme)
 preParse = reorderTree reorder . tree
 
-lexer :: String -> String -> [Token Lexeme]
-lexer scope = toList . preParse . lexer0' scope
+-- | A few transformations that happen between lexing and parsing.
+--
+--   All of these things should move out of the lexer, and be applied in the parse.
+postLex :: [Token Lexeme] -> [Token Lexeme]
+postLex = toList . preParse . foldr tweak []
 
 isDelayOrForce :: Char -> Bool
 isDelayOrForce op = op == '\'' || op == '!'
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 733ecc93cf..498e460f3f 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -17,6 +17,7 @@ module Unison.Syntax.Parser
     chainr1,
     character,
     closeBlock,
+    optionalCloseBlock,
     doc,
     failCommitted,
     failureIf,
@@ -270,6 +271,11 @@ semi = label "newline or semicolon" $ queryToken go
 closeBlock :: (Ord v) => P v m (L.Token ())
 closeBlock = void <$> matchToken L.Close
 
+-- | With layout, blocks might “close” without an explicit outdent (e.g., not even a newline at the end of a
+-- `DocTransclude`). This allows those blocks to be closed by EOF.
+optionalCloseBlock :: (Ord v) => P v m (L.Token ())
+optionalCloseBlock = closeBlock <|> (\() -> L.Token () mempty mempty) <$> P.eof
+
 wordyPatternName :: (Var v) => P v m (L.Token v)
 wordyPatternName = queryToken \case
   L.WordyId (HQ'.NameOnly n) -> Just $ Name.toVar n

From 94065e06104b155a66fd8268e6498e0540ba5108 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Wed, 24 Jul 2024 19:54:06 -0600
Subject: [PATCH 08/22] Make comments into Haddock

---
 unison-syntax/src/Unison/Syntax/Lexer.hs  | 13 +++---
 unison-syntax/src/Unison/Syntax/Parser.hs | 55 ++++++++++++-----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 2ca3dc3738..ce2d63b564 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -90,17 +90,16 @@ type BlockName = String
 type Layout = [(BlockName, Column)]
 
 data ParsingEnv = ParsingEnv
-  { -- layout stack
+  { -- | layout stack
     layout :: !Layout,
-    -- `Just b` if a block of type `b` is being opened
+    -- | `Just b` if a block of type `b` is being opened
     opening :: Maybe BlockName,
-    -- are we inside a construct that uses layout?
+    -- | are we inside a construct that uses layout?
     inLayout :: Bool,
-    -- Use a stack to remember the parent section and
-    -- allow docSections within docSections.
-    -- 1 means we are inside a # Heading 1
+    -- | Use a stack to remember the parent section and allow docSections within docSections.
+    -- - 1 means we are inside a # Heading 1
     parentSections :: [Int],
-    -- 4 means we are inside a list starting at the fourth column
+    -- | 4 means we are inside a list starting at the fourth column
     parentListColumn :: Int
   }
   deriving (Show)
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 498e460f3f..e12a2a94c4 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -158,19 +158,20 @@ data Error v
   | UnknownType (L.Token (HQ.HashQualified Name)) (Set Reference)
   | UnknownId (L.Token (HQ.HashQualified Name)) (Set Referent) (Set Reference)
   | ExpectedBlockOpen String (L.Token L.Lexeme)
-  | -- Indicates a cases or match/with which doesn't have any patterns
+  | -- | Indicates a cases or match/with which doesn't have any patterns
     EmptyMatch (L.Token ())
   | EmptyWatch Ann
   | UseInvalidPrefixSuffix (Either (L.Token Name) (L.Token Name)) (Maybe [L.Token Name])
   | UseEmpty (L.Token String) -- an empty `use` statement
   | DidntExpectExpression (L.Token L.Lexeme) (Maybe (L.Token L.Lexeme))
   | TypeDeclarationErrors [UF.Error v Ann]
-  | -- MissingTypeModifier (type|ability) name
+  | -- | MissingTypeModifier (type|ability) name
     MissingTypeModifier (L.Token String) (L.Token v)
   | ResolutionFailures [Names.ResolutionFailure v Ann]
   | DuplicateTypeNames [(v, [Ann])]
   | DuplicateTermNames [(v, [Ann])]
-  | PatternArityMismatch Int Int Ann -- PatternArityMismatch expectedArity actualArity location
+  | -- | PatternArityMismatch expectedArity actualArity location
+    PatternArityMismatch Int Int Ann
   | FloatPattern Ann
   deriving (Show, Eq, Ord)
 
@@ -242,11 +243,11 @@ run' p s name env =
 run :: (Monad m, Ord v) => P v m a -> String -> ParsingEnv m -> m (Either (Err v) a)
 run p s = run' p s ""
 
--- Virtual pattern match on a lexeme.
+-- | Virtual pattern match on a lexeme.
 queryToken :: (Ord v) => (L.Lexeme -> Maybe a) -> P v m (L.Token a)
 queryToken f = P.token (traverse f) Set.empty
 
--- Consume a block opening and return the string that opens the block.
+-- | Consume a block opening and return the string that opens the block.
 openBlock :: (Ord v) => P v m (L.Token String)
 openBlock = queryToken getOpen
   where
@@ -256,23 +257,23 @@ openBlock = queryToken getOpen
 openBlockWith :: (Ord v) => String -> P v m (L.Token ())
 openBlockWith s = void <$> P.satisfy ((L.Open s ==) . L.payload)
 
--- Match a particular lexeme exactly, and consume it.
+-- | Match a particular lexeme exactly, and consume it.
 matchToken :: (Ord v) => L.Lexeme -> P v m (L.Token L.Lexeme)
 matchToken x = P.satisfy ((==) x . L.payload)
 
--- Consume a virtual semicolon
+-- | Consume a virtual semicolon
 semi :: (Ord v) => P v m (L.Token ())
 semi = label "newline or semicolon" $ queryToken go
   where
     go (L.Semi _) = Just ()
     go _ = Nothing
 
--- Consume the end of a block
+-- | Consume the end of a block
 closeBlock :: (Ord v) => P v m (L.Token ())
 closeBlock = void <$> matchToken L.Close
 
 -- | With layout, blocks might “close” without an explicit outdent (e.g., not even a newline at the end of a
--- `DocTransclude`). This allows those blocks to be closed by EOF.
+--  `DocTransclude`). This allows those blocks to be closed by EOF.
 optionalCloseBlock :: (Ord v) => P v m (L.Token ())
 optionalCloseBlock = closeBlock <|> (\() -> L.Token () mempty mempty) <$> P.eof
 
@@ -281,13 +282,13 @@ wordyPatternName = queryToken \case
   L.WordyId (HQ'.NameOnly n) -> Just $ Name.toVar n
   _ -> Nothing
 
--- Parse an prefix identifier e.g. Foo or (+), discarding any hash
+-- | Parse a prefix identifier e.g. Foo or (+), discarding any hash
 prefixDefinitionName :: (Var v) => P v m (L.Token v)
 prefixDefinitionName =
   wordyDefinitionName <|> parenthesize symbolyDefinitionName
 
--- Parse a prefix identifier e.g. Foo or (+), rejecting any hash
--- This is useful for term declarations, where type signatures and term names should not have hashes.
+-- | Parse a prefix identifier e.g. Foo or (+), rejecting any hash
+--   This is useful for term declarations, where type signatures and term names should not have hashes.
 prefixTermName :: (Var v) => P v m (L.Token v)
 prefixTermName = wordyTermName <|> parenthesize symbolyTermName
   where
@@ -299,34 +300,34 @@ prefixTermName = wordyTermName <|> parenthesize symbolyTermName
       L.SymbolyId (HQ'.NameOnly n) -> Just $ Name.toVar n
       _ -> Nothing
 
--- Parse a wordy identifier e.g. Foo, discarding any hash
+-- | Parse a wordy identifier e.g. Foo, discarding any hash
 wordyDefinitionName :: (Var v) => P v m (L.Token v)
 wordyDefinitionName = queryToken $ \case
   L.WordyId n -> Just $ Name.toVar (HQ'.toName n)
   L.Blank s -> Just $ Var.nameds ("_" <> s)
   _ -> Nothing
 
--- Parse a wordyId as a Name, rejecting any hash
+-- | Parse a wordyId as a Name, rejecting any hash
 importWordyId :: (Ord v) => P v m (L.Token Name)
 importWordyId = queryToken \case
   L.WordyId (HQ'.NameOnly n) -> Just n
   L.Blank s | not (null s) -> Just $ Name.unsafeParseText (Text.pack ("_" <> s))
   _ -> Nothing
 
--- The `+` in: use Foo.bar + as a Name
+-- | The `+` in: use Foo.bar + as a Name
 importSymbolyId :: (Ord v) => P v m (L.Token Name)
 importSymbolyId = queryToken \case
   L.SymbolyId (HQ'.NameOnly n) -> Just n
   _ -> Nothing
 
--- Parse a symboly ID like >>= or &&, discarding any hash
+-- | Parse a symboly ID like >>= or &&, discarding any hash
 symbolyDefinitionName :: (Var v) => P v m (L.Token v)
 symbolyDefinitionName = queryToken $ \case
   L.SymbolyId n -> Just $ Name.toVar (HQ'.toName n)
   _ -> Nothing
 
 -- | Expect parentheses around a token, includes the parentheses within the start/end
--- annotations of the resulting token.
+--   annotations of the resulting token.
 parenthesize :: (Ord v) => P v m (L.Token a) -> P v m (L.Token a)
 parenthesize p = do
   (start, a) <- P.try do
@@ -340,7 +341,7 @@ hqPrefixId, hqInfixId :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqPrefixId = hqWordyId_ <|> parenthesize hqSymbolyId_
 hqInfixId = hqSymbolyId_
 
--- Parse a hash-qualified alphanumeric identifier
+-- | Parse a hash-qualified alphanumeric identifier
 hqWordyId_ :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqWordyId_ = queryToken \case
   L.WordyId n -> Just $ HQ'.toHQ n
@@ -348,20 +349,20 @@ hqWordyId_ = queryToken \case
   L.Blank s | not (null s) -> Just $ HQ.NameOnly (Name.unsafeParseText (Text.pack ("_" <> s)))
   _ -> Nothing
 
--- Parse a hash-qualified symboly ID like >>=#foo or &&
+-- | Parse a hash-qualified symboly ID like >>=#foo or &&
 hqSymbolyId_ :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqSymbolyId_ = queryToken \case
   L.SymbolyId n -> Just (HQ'.toHQ n)
   _ -> Nothing
 
--- Parse a reserved word
+-- | Parse a reserved word
 reserved :: (Ord v) => String -> P v m (L.Token String)
 reserved w = label w $ queryToken getReserved
   where
     getReserved (L.Reserved w') | w == w' = Just w
     getReserved _ = Nothing
 
--- Parse a placeholder or typed hole
+-- | Parse a placeholder or typed hole
 blank :: (Ord v) => P v m (L.Token String)
 blank = label "blank" $ queryToken getBlank
   where
@@ -436,12 +437,12 @@ chainr1 p op = go1
     go1 = p >>= go2
     go2 hd = do { op <- op; op hd <$> go1 } <|> pure hd
 
--- Parse `p` 1+ times, combining with `op`
+-- | Parse `p` 1+ times, combining with `op`
 chainl1 :: (Ord v) => P v m a -> P v m (a -> a -> a) -> P v m a
 chainl1 p op = foldl (flip ($)) <$> p <*> P.many (flip <$> op <*> p)
 
--- If `p` would succeed, this fails uncommitted.
--- Otherwise, `failIfOk` used to produce the output
+-- | If `p` would succeed, this fails uncommitted.
+--   Otherwise, `failIfOk` used to produce the output
 failureIf :: (Ord v) => P v m (P v m b) -> P v m a -> P v m b
 failureIf failIfOk p = do
   dontwant <- P.try . P.lookAhead $ failIfOk
@@ -449,9 +450,9 @@ failureIf failIfOk p = do
   when (isJust p) $ fail "failureIf"
   dontwant
 
--- Gives this var an id based on its position - a useful trick to
--- obtain a variable whose id won't match any other id in the file
--- `positionalVar a Var.missingResult`
+-- | Gives this var an id based on its position - a useful trick to
+--   obtain a variable whose id won't match any other id in the file
+--  `positionalVar a Var.missingResult`
 positionalVar :: (Annotated a, Var v) => a -> v -> v
 positionalVar a v =
   let s = start (ann a)

From 567238fae8dccfe399e935c919e93a363a30a03a Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 22 Jul 2024 16:44:17 -0600
Subject: [PATCH 09/22] Expose `preParse` to the parser

---
 parser-typechecker/src/Unison/PrintError.hs |  2 +-
 unison-syntax/src/Unison/Syntax/Lexer.hs    | 28 ++++++++-------------
 unison-syntax/src/Unison/Syntax/Parser.hs   | 11 +++-----
 unison-syntax/test/Main.hs                  |  4 +--
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/parser-typechecker/src/Unison/PrintError.hs b/parser-typechecker/src/Unison/PrintError.hs
index 5647ccde63..8b73b179f1 100644
--- a/parser-typechecker/src/Unison/PrintError.hs
+++ b/parser-typechecker/src/Unison/PrintError.hs
@@ -1336,7 +1336,7 @@ prettyParseError s e =
     lexerOutput :: Pretty (AnnotatedText a)
     lexerOutput =
       if showLexerOutput
-        then "\nLexer output:\n" <> fromString (L.debugLex' s)
+        then "\nLexer output:\n" <> fromString (L.debugPreParse' s)
         else mempty
 
 renderParseErrors ::
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index ce2d63b564..dc755e7c79 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -19,11 +19,11 @@ module Unison.Syntax.Lexer
     DocJoin (..),
     DocEmbedAnnotation (..),
     lexer,
+    preParse,
     escapeChars,
-    debugFileLex,
-    debugLex',
-    debugLex'',
-    debugLex''',
+    debugFilePreParse,
+    debugPreParse,
+    debugPreParse',
     showEscapeChar,
     touches,
 
@@ -1628,14 +1628,11 @@ typeModifiersAlt f =
 inc :: Pos -> Pos
 inc (Pos line col) = Pos line (col + 1)
 
-debugFileLex :: String -> IO ()
-debugFileLex file = do
-  contents <- readUtf8 file
-  let s = debugLex'' (lexer file (Text.unpack contents))
-  putStrLn s
+debugFilePreParse :: FilePath -> IO ()
+debugFilePreParse file = putStrLn . debugPreParse . preParse . lexer file . Text.unpack =<< readUtf8 file
 
-debugLex'' :: [Token Lexeme] -> String
-debugLex'' [Token (Err (UnexpectedTokens msg)) start end] =
+debugPreParse :: T (Token Lexeme) -> String
+debugPreParse (L (Token (Err (UnexpectedTokens msg)) start end)) =
   (if start == end then msg1 else msg2) <> ":\n" <> msg
   where
     msg1 = "Error on line " <> show (line start) <> ", column " <> show (column start)
@@ -1648,13 +1645,10 @@ debugLex'' [Token (Err (UnexpectedTokens msg)) start end] =
         <> show (line end)
         <> ", column "
         <> show (column end)
-debugLex'' ts = show . fmap payload . tree $ ts
+debugPreParse ts = show $ payload <$> ts
 
-debugLex' :: String -> String
-debugLex' = debugLex'' . lexer "debugLex"
-
-debugLex''' :: String -> String -> String
-debugLex''' s = debugLex'' . lexer s
+debugPreParse' :: String -> String
+debugPreParse' = debugPreParse . preParse . lexer "debugPreParse"
 
 instance EP.ShowErrorComponent (Token Err) where
   showErrorComponent (Token err _ _) = go err
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index e12a2a94c4..344de0fd1b 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -61,6 +61,7 @@ where
 import Control.Monad.Reader (ReaderT (..))
 import Control.Monad.Reader.Class (asks)
 import Crypto.Random qualified as Random
+import Data.Bool (bool)
 import Data.Bytes.Put (runPutS)
 import Data.Bytes.Serial (serialize)
 import Data.Bytes.VarInt (VarInt (..))
@@ -199,8 +200,7 @@ label = P.label
 traceRemainingTokens :: (Ord v) => String -> P v m ()
 traceRemainingTokens label = do
   remainingTokens <- lookAhead $ many anyToken
-  let _ =
-        trace ("REMAINDER " ++ label ++ ":\n" ++ L.debugLex'' remainingTokens) ()
+  let _ = trace ("REMAINDER " ++ label ++ ":\n" ++ L.debugPreParse (L.preParse remainingTokens)) ()
   pure ()
 
 mkAnn :: (Annotated a, Annotated b) => a -> b -> Ann
@@ -231,12 +231,9 @@ rootFile p = p <* P.eof
 
 run' :: (Monad m, Ord v) => P v m a -> String -> String -> ParsingEnv m -> m (Either (Err v) a)
 run' p s name env =
-  let lex =
-        if debug
-          then L.lexer name (trace (L.debugLex''' "lexer receives" s) s)
-          else L.lexer name s
+  let lex = bool id (traceWith L.debugPreParse) debug . L.preParse $ L.lexer name s
       pTraced = traceRemainingTokens "parser receives" *> p
-   in runReaderT (runParserT pTraced name (Input lex)) env <&> \case
+   in runReaderT (runParserT pTraced name . Input $ toList lex) env <&> \case
         Left err -> Left (Nel.head (P.bundleErrors err))
         Right x -> Right x
 
diff --git a/unison-syntax/test/Main.hs b/unison-syntax/test/Main.hs
index bd40c7ded8..5c13940b0a 100644
--- a/unison-syntax/test/Main.hs
+++ b/unison-syntax/test/Main.hs
@@ -221,8 +221,8 @@ test =
 
 t :: String -> [Lexeme] -> Test ()
 t s expected =
-  let actual0 = payload <$> lexer "ignored filename" s
-      actual = take (length actual0 - 2) . drop 1 $ actual0
+  let actual0 = payload <$> preParse (lexer "ignored filename" s)
+      actual = take (length actual0 - 2) . drop 1 $ toList actual0
    in scope s $
         if actual == expected
           then ok

From 6c561f314628c1e8b6e77fa6c7e0118f67265a71 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 22 Jul 2024 16:45:21 -0600
Subject: [PATCH 10/22] Rename `T` to `BlockTree`

---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 49 +++++++++++++-----------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index dc755e7c79..a83d4da38b 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -1520,15 +1520,18 @@ pop = drop 1
 topLeftCorner :: Pos
 topLeftCorner = Pos 1 1
 
-data T a = T a [T a] [a] | L a deriving (Functor, Foldable, Traversable)
-
-headToken :: T a -> a
-headToken (T a _ _) = a
-headToken (L a) = a
-
-instance (Show a) => Show (T a) where
-  show (L a) = show a
-  show (T open mid close) =
+data BlockTree a
+  = Block a [BlockTree a] [a]
+  | Leaf a
+  deriving (Functor, Foldable, Traversable)
+
+headToken :: BlockTree a -> a
+headToken (Block a _ _) = a
+headToken (Leaf a) = a
+
+instance (Show a) => Show (BlockTree a) where
+  show (Leaf a) = show a
+  show (Block open mid close) =
     show open
       ++ "\n"
       ++ indent "  " (intercalateMap "\n" show mid)
@@ -1539,26 +1542,26 @@ instance (Show a) => Show (T a) where
       go by '\n' = '\n' : by
       go _ c = [c]
 
-reorderTree :: ([T a] -> [T a]) -> T a -> T a
-reorderTree _ l@(L _) = l
-reorderTree f (T open mid close) = T open (f (reorderTree f <$> mid)) close
+reorderTree :: ([BlockTree a] -> [BlockTree a]) -> BlockTree a -> BlockTree a
+reorderTree f (Block open mid close) = Block open (f (reorderTree f <$> mid)) close
+reorderTree _ l = l
 
-tree :: [Token Lexeme] -> T (Token Lexeme)
+tree :: [Token Lexeme] -> BlockTree (Token Lexeme)
 tree toks = one toks const
   where
-    one (open@(payload -> Open _) : ts) k = many (T open) [] ts k
-    one (t : ts) k = k (L t) ts
+    one (open@(payload -> Open _) : ts) k = many (Block open) [] ts k
+    one (t : ts) k = k (Leaf t) ts
     one [] k = k lastErr []
       where
-        lastErr = case drop (length toks - 1) toks of
-          [] -> L (Token (Err LayoutError) topLeftCorner topLeftCorner)
-          (t : _) -> L $ t {payload = Err LayoutError}
+        lastErr = Leaf case drop (length toks - 1) toks of
+          [] -> Token (Err LayoutError) topLeftCorner topLeftCorner
+          (t : _) -> t {payload = Err LayoutError}
 
     many open acc [] k = k (open (reverse acc) []) []
     many open acc (t@(payload -> Close) : ts) k = k (open (reverse acc) [t]) ts
     many open acc ts k = one ts $ \t ts -> many open (t : acc) ts k
 
-stanzas :: [T (Token Lexeme)] -> [[T (Token Lexeme)]]
+stanzas :: [BlockTree (Token Lexeme)] -> [[BlockTree (Token Lexeme)]]
 stanzas = go []
   where
     go acc [] = [reverse acc]
@@ -1568,7 +1571,7 @@ stanzas = go []
 
 -- Moves type and ability declarations to the front of the token stream
 -- and move `use` statements to the front of each block
-reorder :: [T (Token Lexeme)] -> [T (Token Lexeme)]
+reorder :: [BlockTree (Token Lexeme)] -> [BlockTree (Token Lexeme)]
 reorder = foldr fixup [] . join . sortWith f . stanzas
   where
     f [] = 3 :: Int
@@ -1583,7 +1586,7 @@ reorder = foldr fixup [] . join . sortWith f . stanzas
     fixup tok tail = tok : tail
 
 -- | This turns the lexeme stream into a tree, reordering some lexeme subsequences.
-preParse :: [Token Lexeme] -> T (Token Lexeme)
+preParse :: [Token Lexeme] -> BlockTree (Token Lexeme)
 preParse = reorderTree reorder . tree
 
 -- | A few transformations that happen between lexing and parsing.
@@ -1631,8 +1634,8 @@ inc (Pos line col) = Pos line (col + 1)
 debugFilePreParse :: FilePath -> IO ()
 debugFilePreParse file = putStrLn . debugPreParse . preParse . lexer file . Text.unpack =<< readUtf8 file
 
-debugPreParse :: T (Token Lexeme) -> String
-debugPreParse (L (Token (Err (UnexpectedTokens msg)) start end)) =
+debugPreParse :: BlockTree (Token Lexeme) -> String
+debugPreParse (Leaf (Token (Err (UnexpectedTokens msg)) start end)) =
   (if start == end then msg1 else msg2) <> ":\n" <> msg
   where
     msg1 = "Error on line " <> show (line start) <> ", column " <> show (column start)

From 3158e666033b124c17ec20ff7f96549548533703 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Tue, 23 Jul 2024 13:59:12 -0600
Subject: [PATCH 11/22] Restructure `BlockTree`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We now build the stanzas at the same time as the tree, and don’t discard them after reordering.

This also changes the closing element of `Block` to be `Maybe` instead of `[]`.
---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 50 +++++++++++++++---------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index a83d4da38b..a356757bf7 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -39,6 +39,7 @@ module Unison.Syntax.Lexer
 where
 
 import Control.Comonad.Cofree (Cofree ((:<)))
+import Control.Lens qualified as Lens
 import Control.Monad.State qualified as S
 import Data.Char (isAlphaNum, isControl, isDigit, isSpace, ord, toLower)
 import Data.Foldable qualified as Foldable
@@ -1521,7 +1522,13 @@ topLeftCorner :: Pos
 topLeftCorner = Pos 1 1
 
 data BlockTree a
-  = Block a [BlockTree a] [a]
+  = Block
+      -- | The token that opens the block
+      a
+      -- | “Stanzas” of nested tokens
+      [[BlockTree a]]
+      -- | The closing token, if any
+      (Maybe a)
   | Leaf a
   deriving (Functor, Foldable, Traversable)
 
@@ -1534,22 +1541,22 @@ instance (Show a) => Show (BlockTree a) where
   show (Block open mid close) =
     show open
       ++ "\n"
-      ++ indent "  " (intercalateMap "\n" show mid)
+      ++ indent "  " (intercalateMap "\n" (intercalateMap " " show) mid)
       ++ "\n"
-      ++ intercalateMap "" show close
+      ++ maybe "" show close
     where
       indent by s = by ++ (s >>= go by)
       go by '\n' = '\n' : by
       go _ c = [c]
 
-reorderTree :: ([BlockTree a] -> [BlockTree a]) -> BlockTree a -> BlockTree a
-reorderTree f (Block open mid close) = Block open (f (reorderTree f <$> mid)) close
+reorderTree :: ([[BlockTree a]] -> [[BlockTree a]]) -> BlockTree a -> BlockTree a
+reorderTree f (Block open mid close) = Block open (f (fmap (reorderTree f) <$> mid)) close
 reorderTree _ l = l
 
 tree :: [Token Lexeme] -> BlockTree (Token Lexeme)
 tree toks = one toks const
   where
-    one (open@(payload -> Open _) : ts) k = many (Block open) [] ts k
+    one (open@(payload -> Open _) : ts) k = many (Block open . stanzas) [] ts k
     one (t : ts) k = k (Leaf t) ts
     one [] k = k lastErr []
       where
@@ -1557,22 +1564,24 @@ tree toks = one toks const
           [] -> Token (Err LayoutError) topLeftCorner topLeftCorner
           (t : _) -> t {payload = Err LayoutError}
 
-    many open acc [] k = k (open (reverse acc) []) []
-    many open acc (t@(payload -> Close) : ts) k = k (open (reverse acc) [t]) ts
+    many open acc [] k = k (open (reverse acc) Nothing) []
+    many open acc (t@(payload -> Close) : ts) k = k (open (reverse acc) $ pure t) ts
     many open acc ts k = one ts $ \t ts -> many open (t : acc) ts k
 
 stanzas :: [BlockTree (Token Lexeme)] -> [[BlockTree (Token Lexeme)]]
-stanzas = go []
-  where
-    go acc [] = [reverse acc]
-    go acc (t : ts) = case payload $ headToken t of
-      Semi _ -> reverse (t : acc) : go [] ts
-      _ -> go (t : acc) ts
+stanzas =
+  toList
+    . foldr
+      ( \tok (curr :| stanzas) -> case tok of
+          Leaf (Token (Semi _) _ _) -> [tok] :| curr : stanzas
+          _ -> (tok : curr) :| stanzas
+      )
+      ([] :| [])
 
 -- Moves type and ability declarations to the front of the token stream
 -- and move `use` statements to the front of each block
-reorder :: [BlockTree (Token Lexeme)] -> [BlockTree (Token Lexeme)]
-reorder = foldr fixup [] . join . sortWith f . stanzas
+reorder :: [[BlockTree (Token Lexeme)]] -> [[BlockTree (Token Lexeme)]]
+reorder = foldr fixup [] . sortWith f
   where
     f [] = 3 :: Int
     f (t0 : _) = case payload $ headToken t0 of
@@ -1582,8 +1591,13 @@ reorder = foldr fixup [] . join . sortWith f . stanzas
       _ -> 3 :: Int
     -- after reordering can end up with trailing semicolon at the end of
     -- a block, which we remove with this pass
-    fixup (payload . headToken -> Semi _) [] = []
-    fixup tok tail = tok : tail
+    fixup stanza [] = case Lens.unsnoc stanza of
+      Nothing -> []
+      -- remove any trailing `Semi` from the last non-empty stanza
+      Just (init, Leaf (Token (Semi _) _ _)) -> [init]
+      -- don’t touch other stanzas
+      Just (_, _) -> [stanza]
+    fixup stanza tail = stanza : tail
 
 -- | This turns the lexeme stream into a tree, reordering some lexeme subsequences.
 preParse :: [Token Lexeme] -> BlockTree (Token Lexeme)

From a6f6d9c8dc35adabb6052eccb5e341b80083d317 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Wed, 24 Jul 2024 22:59:18 -0600
Subject: [PATCH 12/22] Remove unnecessary `docOpen` in Doc parser

---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index a356757bf7..9f2119011a 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -653,7 +653,6 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
       let end =
             P.lookAhead $
               void docClose
-                <|> void docOpen
                 <|> void (P.satisfy isSpace)
                 <|> void closing
       word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
@@ -768,7 +767,6 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
           pure ex
 
     docClose = [] <$ docClose'
-    docOpen = [] <$ lit "{{"
 
     link =
       P.label "link (examples: {type List}, {Nat.+})" $

From c53cb088e1262a2f06a81fc1dc66f685d86cc707 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Thu, 25 Jul 2024 10:28:33 -0600
Subject: [PATCH 13/22] Split `Doc` into its own module

---
 .../src/Unison/Syntax/TermParser.hs           |  79 +++---
 unison-syntax/src/Unison/Syntax/Lexer.hs      | 232 ++++--------------
 .../src/Unison/Syntax/Lexer/Token.hs          |   4 +
 unison-syntax/src/Unison/Syntax/Parser.hs     |   5 +-
 .../src/Unison/Syntax/Parser/Doc/Data.hs      | 166 +++++++++++++
 unison-syntax/unison-syntax.cabal             |   1 +
 6 files changed, 260 insertions(+), 227 deletions(-)
 create mode 100644 unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs

diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 999d5658ba..89d5504079 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -25,7 +25,7 @@ import Data.Sequence qualified as Sequence
 import Data.Set qualified as Set
 import Data.Text qualified as Text
 import Data.Tuple.Extra qualified as TupleE
-import Data.Void (vacuous)
+import Data.Void (absurd, vacuous)
 import Text.Megaparsec qualified as P
 import U.Core.ABT qualified as ABT
 import Unison.ABT qualified as ABT
@@ -52,6 +52,7 @@ import Unison.Syntax.Name qualified as Name (toText, toVar, unsafeParseVar)
 import Unison.Syntax.NameSegment qualified as NameSegment
 import Unison.Syntax.Parser hiding (seq)
 import Unison.Syntax.Parser qualified as Parser (seq, uniqueName)
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Syntax.TypeParser qualified as TypeParser
 import Unison.Term (IsTop, Term)
 import Unison.Term qualified as Term
@@ -525,43 +526,43 @@ doc2Block = do
     f :: (Annotated a) => a -> String -> Term v Ann
     f a = Term.var (gann a) . Var.nameds . ("syntax.doc" <>)
 
-    docUntitledSection :: Ann -> L.DocUntitledSection (Term v Ann) -> Term v Ann
-    docUntitledSection ann (L.DocUntitledSection tops) =
+    docUntitledSection :: Ann -> Doc.UntitledSection (Term v Ann) -> Term v Ann
+    docUntitledSection ann (Doc.UntitledSection tops) =
       Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops
 
-    docTop :: L.DocTop (Term v Ann) -> TermP v m
+    docTop :: Doc.Top [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docTop d = case d of
-      L.DocSection title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
-      L.DocEval code ->
+      Doc.Section title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
+      Doc.Eval code ->
         Term.app (gann d) (f d "Eval") . addDelay . snd
           <$> subParse (block' False False "syntax.docEval" (pure $ pure ()) $ Ann.External <$ P.eof) code
-      L.DocExampleBlock code ->
+      Doc.ExampleBlock code ->
         Term.apps' (f d "ExampleBlock") . (Term.nat (gann d) 0 :) . pure . addDelay . snd
           <$> subParse (block' False True "syntax.docExampleBlock" (pure $ pure ()) $ Ann.External <$ P.eof) code
-      L.DocCodeBlock label body ->
+      Doc.CodeBlock label body ->
         pure $
           Term.apps'
             (f d "CodeBlock")
             [Term.text (ann label) . Text.pack $ L.payload label, Term.text (ann body) . Text.pack $ L.payload body]
-      L.DocBulletedList items ->
+      Doc.BulletedList items ->
         pure $ Term.app (gann d) (f d "BulletedList") . Term.list (gann items) . toList $ docColumn <$> items
-      L.DocNumberedList items@((n, _) :| _) ->
+      Doc.NumberedList items@((n, _) :| _) ->
         pure $
           Term.apps'
             (f d "NumberedList")
             [Term.nat (ann d) $ L.payload n, Term.list (gann $ snd <$> items) . toList $ docColumn . snd <$> items]
-      L.DocParagraph leaves ->
+      Doc.Paragraph leaves ->
         Term.app (gann d) (f d "Paragraph") . Term.list (ann leaves) . toList <$> traverse docLeaf leaves
 
-    docColumn :: L.DocColumn (Term v Ann) -> Term v Ann
-    docColumn d@(L.DocColumn para sublist) =
+    docColumn :: Doc.Column (Term v Ann) -> Term v Ann
+    docColumn d@(Doc.Column para sublist) =
       Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist
 
-    docLeaf :: L.DocLeaf (Term v Ann) -> TermP v m
+    docLeaf :: Doc.Leaf [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docLeaf d = case d of
-      L.DocLink link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
-      L.DocNamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
-      L.DocExample code -> do
+      Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
+      Doc.NamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
+      Doc.Example code -> do
         trm <- subParse term code
         pure . Term.apps' (f d "Example") $ case trm of
           tm@(Term.Apps' _ xs) ->
@@ -570,45 +571,45 @@ doc2Block = do
                 lam = addDelay $ Term.lam' (ann tm) ((mempty,) <$> fvs) tm
              in [n, lam]
           tm -> [Term.nat (ann tm) 0, addDelay tm]
-      L.DocTransclude code -> Term.app (gann d) (f d "Transclude") <$> subParse term code
-      L.DocBold para -> pure $ Term.app (gann d) (f d "Bold") para
-      L.DocItalic para -> pure $ Term.app (gann d) (f d "Italic") para
-      L.DocStrikethrough para -> pure $ Term.app (gann d) (f d "Strikethrough") para
-      L.DocVerbatim leaf -> Term.app (gann d) (f d "Verbatim") <$> docLeaf (vacuous leaf)
-      L.DocCode leaf -> Term.app (gann d) (f d "Code") <$> docLeaf (vacuous leaf)
-      L.DocSource elems ->
+      Doc.Transclude code -> Term.app (gann d) (f d "Transclude") <$> subParse term code
+      Doc.Bold para -> pure $ Term.app (gann d) (f d "Bold") para
+      Doc.Italic para -> pure $ Term.app (gann d) (f d "Italic") para
+      Doc.Strikethrough para -> pure $ Term.app (gann d) (f d "Strikethrough") para
+      Doc.Verbatim leaf -> Term.app (gann d) (f d "Verbatim") <$> docLeaf (bimap absurd absurd leaf)
+      Doc.Code leaf -> Term.app (gann d) (f d "Code") <$> docLeaf (bimap absurd absurd leaf)
+      Doc.Source elems ->
         Term.app (gann d) (f d "Source") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
-      L.DocFoldedSource elems ->
+      Doc.FoldedSource elems ->
         Term.app (gann d) (f d "FoldedSource") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
-      L.DocEvalInline code -> Term.app (gann d) (f d "EvalInline") . addDelay <$> subParse term code
-      L.DocSignature links ->
+      Doc.EvalInline code -> Term.app (gann d) (f d "EvalInline") . addDelay <$> subParse term code
+      Doc.Signature links ->
         Term.app (gann d) (f d "Signature") . Term.list (ann links) . toList <$> traverse docEmbedSignatureLink links
-      L.DocSignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink link
-      L.DocWord txt -> pure . Term.app (gann d) (f d "Word") . Term.text (ann txt) . Text.pack $ L.payload txt
-      L.DocGroup (L.DocJoin leaves) ->
+      Doc.SignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink link
+      Doc.Word txt -> pure . Term.app (gann d) (f d "Word") . Term.text (ann txt) . Text.pack $ L.payload txt
+      Doc.Group (Doc.Join leaves) ->
         Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList
           <$> traverse docLeaf leaves
 
-    docEmbedLink :: L.DocEmbedLink -> TermP v m
+    docEmbedLink :: Doc.EmbedLink -> TermP v m
     docEmbedLink d = case d of
-      L.DocEmbedTypeLink ident ->
+      Doc.EmbedTypeLink ident ->
         Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload
           <$> findUniqueType (HQ'.toHQ <$> ident)
-      L.DocEmbedTermLink ident ->
+      Doc.EmbedTermLink ident ->
         Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docSourceElement :: L.DocSourceElement -> TermP v m
-    docSourceElement d@(L.DocSourceElement link anns) = do
+    docSourceElement :: Doc.SourceElement [L.Token L.Lexeme] -> TermP v m
+    docSourceElement d@(Doc.SourceElement link anns) = do
       link' <- docEmbedLink link
       anns' <- traverse docEmbedAnnotation anns
       pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns']
 
-    docEmbedSignatureLink :: L.DocEmbedSignatureLink -> TermP v m
-    docEmbedSignatureLink d@(L.DocEmbedSignatureLink ident) =
+    docEmbedSignatureLink :: Doc.EmbedSignatureLink -> TermP v m
+    docEmbedSignatureLink d@(Doc.EmbedSignatureLink ident) =
       Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docEmbedAnnotation :: L.DocEmbedAnnotation -> TermP v m
-    docEmbedAnnotation d@(L.DocEmbedAnnotation a) =
+    docEmbedAnnotation :: Doc.EmbedAnnotation [L.Token L.Lexeme] -> TermP v m
+    docEmbedAnnotation d@(Doc.EmbedAnnotation a) =
       -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
       -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
       -- produced for an identifier and the lexemes consumed for text line up. So, I think this is a bugfix I can’t
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index 9f2119011a..c0d1c3c04c 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -9,15 +9,6 @@ module Unison.Syntax.Lexer
     Pos (..),
     Lexeme (..),
     DocTree,
-    DocUntitledSection (..),
-    DocTop (..),
-    DocColumn (..),
-    DocLeaf (..),
-    DocEmbedLink (..),
-    DocSourceElement (..),
-    DocEmbedSignatureLink (..),
-    DocJoin (..),
-    DocEmbedAnnotation (..),
     lexer,
     preParse,
     escapeChars,
@@ -43,7 +34,6 @@ import Control.Lens qualified as Lens
 import Control.Monad.State qualified as S
 import Data.Char (isAlphaNum, isControl, isDigit, isSpace, ord, toLower)
 import Data.Foldable qualified as Foldable
-import Data.Functor.Classes
 import Data.List qualified as List
 import Data.List.Extra qualified as List
 import Data.List.NonEmpty (NonEmpty ((:|)))
@@ -66,7 +56,7 @@ import Unison.Name qualified as Name
 import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
 import Unison.NameSegment.Internal qualified as NameSegment
-import Unison.Parser.Ann (Ann (Ann), Annotated (..))
+import Unison.Parser.Ann (Ann, Annotated (..))
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as SH
@@ -75,6 +65,7 @@ import Unison.Syntax.Lexer.Token (Token (..), posP, tokenP)
 import Unison.Syntax.Name qualified as Name (isSymboly, nameP, toText, unsafeParseText)
 import Unison.Syntax.NameSegment (symbolyIdChar, wordyIdChar, wordyIdStartChar)
 import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
 import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
 import Unison.Util.Bytes qualified as Bytes
@@ -83,9 +74,6 @@ import Unison.Util.Monoid (intercalateMap)
 instance (Annotated a) => Annotated (Cofree f a) where
   ann (a :< _) = ann a
 
-instance Annotated (Token a) where
-  ann (Token _ s e) = Ann s e
-
 type BlockName = String
 
 type Layout = [(BlockName, Column)]
@@ -158,7 +146,7 @@ data Lexeme
   | Bytes Bytes.Bytes -- bytes literals
   | Hash ShortHash -- hash literals
   | Err Err
-  | Doc (DocUntitledSection DocTree)
+  | Doc (Doc.UntitledSection DocTree)
   deriving stock (Eq, Show, Ord)
 
 type IsVirtual = Bool -- is it a virtual semi or an actual semi?
@@ -422,6 +410,8 @@ restoreStack lbl p = do
   S.put (s2 {layout = layout1})
   pure $ p <> closes
 
+type DocTree = Cofree (Doc.Top [Token Lexeme]) Ann
+
 -- | The `Doc` lexer as documented on unison-lang.org
 doc2 :: P [Token Lexeme]
 doc2 = do
@@ -508,148 +498,18 @@ someTill' p end = liftA2 (:|) p $ P.manyTill p end
 sepBy1' :: P a -> P sep -> P (NonEmpty a)
 sepBy1' p sep = liftA2 (:|) p . many $ sep *> p
 
-newtype DocUntitledSection a = DocUntitledSection [a]
-  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
-
--- | Haskell parallel to @unison/base.Doc@.
---
---   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
---   mismatch with Unison is a problem, as someone can create a Unison Doc with explicit constructors or function calls,
---   have it rendered to a scratch file, and then we can’t parse it. Changing the types here to match Unison wouldn’t
---   fix the issue. We have to modify the types and parser in concert (in both Haskell and Unison) to bring them in
---   line.
---
---  __NB__: Uses of @[`Token` `Lexeme`]@ here indicate a nested transition to the Unison lexer.
-data DocTop a
-  = -- | The first argument is always a Paragraph
-    DocSection a [a]
-  | DocEval [Token Lexeme]
-  | DocExampleBlock [Token Lexeme]
-  | DocCodeBlock (Token String) (Token String)
-  | DocBulletedList (NonEmpty (DocColumn a))
-  | DocNumberedList (NonEmpty (Token Word64, DocColumn a))
-  | DocParagraph (NonEmpty (DocLeaf a))
-  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
-
-instance Eq1 DocTop where
-  liftEq _ _ _ = True
-
-instance Ord1 DocTop where
-  liftCompare _ _ _ = LT
-
-instance Show1 DocTop where
-  liftShowsPrec _ _ _ _ x = x
-
-data DocColumn a
-  = -- | The first is always a Paragraph, and the second a Bulleted or Numbered List
-    DocColumn a (Maybe a)
-  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
-
-data DocLeaf a
-  = DocLink DocEmbedLink
-  | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
-    --   Transcludes & Words)
-    DocNamedLink a (DocLeaf Void)
-  | DocExample [Token Lexeme]
-  | DocTransclude [Token Lexeme]
-  | -- | Always a Paragraph
-    DocBold a
-  | -- | Always a Paragraph
-    DocItalic a
-  | -- | Always a Paragraph
-    DocStrikethrough a
-  | -- | Always a Word
-    DocVerbatim (DocLeaf Void)
-  | -- | Always a Word
-    DocCode (DocLeaf Void)
-  | DocSource (NonEmpty DocSourceElement)
-  | DocFoldedSource (NonEmpty DocSourceElement)
-  | DocEvalInline [Token Lexeme]
-  | DocSignature (NonEmpty DocEmbedSignatureLink)
-  | DocSignatureInline DocEmbedSignatureLink
-  | DocWord (Token String)
-  | DocGroup (DocJoin a)
-  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
-
-data DocEmbedLink
-  = DocEmbedTypeLink (Token (HQ'.HashQualified Name))
-  | DocEmbedTermLink (Token (HQ'.HashQualified Name))
-  deriving (Eq, Ord, Show)
-
-data DocSourceElement = DocSourceElement DocEmbedLink [DocEmbedAnnotation]
-  deriving (Eq, Ord, Show)
-
-newtype DocEmbedSignatureLink = DocEmbedSignatureLink (Token (HQ'.HashQualified Name))
-  deriving (Eq, Ord, Show)
-
-newtype DocJoin a = DocJoin (NonEmpty (DocLeaf a))
-  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
-
-newtype DocEmbedAnnotation
-  = -- | Always a DocTransclude
-    DocEmbedAnnotation (Either (Token (HQ'.HashQualified Name)) (DocLeaf Void))
-  deriving (Eq, Ord, Show)
-
-type DocTree = Cofree DocTop Ann
-
-instance (Annotated a) => Annotated (DocTop a) where
-  ann = \case
-    DocSection title body -> ann title <> ann body
-    DocEval code -> ann code
-    DocExampleBlock code -> ann code
-    DocCodeBlock label body -> ann label <> ann body
-    DocBulletedList items -> ann items
-    DocNumberedList items -> ann $ snd <$> items
-    DocParagraph leaves -> ann leaves
-
-instance (Annotated a) => Annotated (DocColumn a) where
-  ann (DocColumn para list) = ann para <> ann list
-
-instance (Annotated a) => Annotated (DocLeaf a) where
-  ann = \case
-    DocLink link -> ann link
-    DocNamedLink label target -> ann label <> ann target
-    DocExample code -> ann code
-    DocTransclude code -> ann code
-    DocBold para -> ann para
-    DocItalic para -> ann para
-    DocStrikethrough para -> ann para
-    DocVerbatim word -> ann word
-    DocCode word -> ann word
-    DocSource elems -> ann elems
-    DocFoldedSource elems -> ann elems
-    DocEvalInline code -> ann code
-    DocSignature links -> ann links
-    DocSignatureInline link -> ann link
-    DocWord text -> ann text
-    DocGroup (DocJoin leaves) -> ann leaves
-
-instance Annotated DocEmbedLink where
-  ann = \case
-    DocEmbedTypeLink name -> ann name
-    DocEmbedTermLink name -> ann name
-
-instance Annotated DocSourceElement where
-  ann (DocSourceElement link target) = ann link <> ann target
-
-instance Annotated DocEmbedSignatureLink where
-  ann (DocEmbedSignatureLink name) = ann name
-
-instance Annotated DocEmbedAnnotation where
-  ann (DocEmbedAnnotation a) = either ann ann a
-
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
-docBody :: P end -> P (DocUntitledSection DocTree)
-docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
+docBody :: P end -> P (Doc.UntitledSection DocTree)
+docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
   where
     wordyKw kw = separated wordySep (lit kw)
     sectionElem = section <|> fencedBlock <|> list <|> paragraph
-    paragraph = wrap' . DocParagraph <$> spaced leaf
+    paragraph = wrap' . Doc.Paragraph <$> spaced leaf
     reserved word = List.isPrefixOf "}}" word || all (== '#') word
 
-    wordy :: P end -> P (DocLeaf void)
-    wordy closing = fmap DocWord . tokenP . P.try $ do
+    wordy :: P end -> P (Doc.Leaf [Token Lexeme] void)
+    wordy closing = fmap Doc.Word . tokenP . P.try $ do
       let end =
             P.lookAhead $
               void docClose
@@ -677,10 +537,10 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
       where
         comma = lit "," <* CP.space
         src =
-          src' DocSource "@source"
-            <|> src' DocFoldedSource "@foldedSource"
+          src' Doc.Source "@source"
+            <|> src' Doc.FoldedSource "@foldedSource"
         srcElem =
-          DocSourceElement
+          Doc.SourceElement
             <$> (typeLink <|> termLink)
             <*> ( fmap (fromMaybe []) . P.optional $
                     (lit "@") *> (CP.space *> annotations)
@@ -688,38 +548,38 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
           where
             annotation = fmap Left (tokenP identifierP) <|> fmap Right expr <* CP.space
             annotations =
-              P.some (DocEmbedAnnotation <$> annotation)
+              P.some (Doc.EmbedAnnotation <$> annotation)
         src' name atName = fmap name $ do
           _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
           s <- sepBy1' srcElem comma
           _ <- lit "}"
           pure s
-        signature = fmap DocSignature $ do
+        signature = fmap Doc.Signature $ do
           _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
           s <- sepBy1' signatureLink comma
           _ <- lit "}"
           pure s
-        signatureInline = fmap DocSignatureInline $ do
+        signatureInline = fmap Doc.SignatureInline $ do
           _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
           s <- signatureLink
           _ <- lit "}"
           pure s
-        evalInline = fmap DocEvalInline $ do
+        evalInline = fmap Doc.EvalInline $ do
           _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
           let inlineEvalClose = [] <$ lit "}"
           s <- lexemes' inlineEvalClose
           pure s
 
-    typeLink = fmap DocEmbedTypeLink $ do
+    typeLink = fmap Doc.EmbedTypeLink $ do
       _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
       tokenP identifierP <* CP.space
 
     termLink =
-      fmap DocEmbedTermLink $
+      fmap Doc.EmbedTermLink $
         tokenP identifierP <* CP.space
 
     signatureLink =
-      fmap DocEmbedSignatureLink $
+      fmap Doc.EmbedSignatureLink $
         tokenP identifierP <* CP.space
 
     groupy closing p = do
@@ -728,8 +588,8 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
       pure $ case after of
         Nothing -> p
         Just after ->
-          DocGroup
-            . DocJoin
+          Doc.Group
+            . Doc.Join
             $ p
               :| pure after
 
@@ -748,17 +608,17 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
             let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
             -- If it's a multi-line verbatim block we trim any whitespace representing
             -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-            pure . DocVerbatim $
-              DocWord $
+            pure . Doc.Verbatim $
+              Doc.Word $
                 Token txt start stop
           else
-            pure . DocCode $
-              DocWord $
+            pure . Doc.Code $
+              Doc.Word $
                 Token originalText start stop
 
     exampleInline =
       P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
-        fmap DocExample $ do
+        fmap Doc.Example $ do
           n <- P.try $ do
             _ <- lit "`"
             length <$> P.takeWhile1P (Just "backticks") (== '`')
@@ -770,12 +630,12 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
 
     link =
       P.label "link (examples: {type List}, {Nat.+})" $
-        fmap DocLink $
+        fmap Doc.Link $
           P.try $
             lit "{" *> (typeLink <|> termLink) <* lit "}"
 
     expr =
-      fmap DocTransclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+      fmap Doc.Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
         openAs "{{" "syntax.docTransclude"
           *> do
             env0 <- S.get
@@ -806,7 +666,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
       P.label "block eval (syntax: a fenced code block)" $
         evalUnison <|> exampleBlock <|> other
       where
-        evalUnison = fmap (wrap' . DocEval) $ do
+        evalUnison = fmap (wrap' . Doc.Eval) $ do
           -- commit after seeing that ``` is on its own line
           fence <- P.try $ do
             fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
@@ -817,7 +677,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
               (\env -> env {inLayout = True, opening = Just "docEval"})
               (restoreStack "docEval" $ lexemes' ([] <$ lit fence))
 
-        exampleBlock = fmap (wrap' . DocExampleBlock) $ do
+        exampleBlock = fmap (wrap' . Doc.ExampleBlock) $ do
           void $ lit "@typecheck" <* CP.space
           fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
           local
@@ -834,7 +694,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
               skip _ s = s
            in List.intercalate "\n" $ skip column <$> lines s
 
-        other = fmap (uncurry $ wrapSimple2 DocCodeBlock) $ do
+        other = fmap (uncurry $ wrapSimple2 Doc.CodeBlock) $ do
           column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
           let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
           fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
@@ -857,26 +717,26 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
                 (P.satisfy (== '~'))
           name s =
             if take 1 s == "~"
-              then DocStrikethrough
-              else if take 1 s == "*" then DocBold else DocItalic
+              then Doc.Strikethrough
+              else if take 1 s == "*" then Doc.Bold else Doc.Italic
       end <- P.try $ do
         end <- start
         P.lookAhead (P.satisfy (not . isSpace))
         pure end
-      name end . wrap' . DocParagraph
+      name end . wrap' . Doc.Paragraph
         <$> someTill'
           (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
           (lit end)
 
     externalLink =
       P.label "hyperlink (example: [link name](https://destination.com))" $
-        fmap (uncurry DocNamedLink) $ do
+        fmap (uncurry Doc.NamedLink) $ do
           _ <- lit "["
           p <- leafies (void $ char ']')
           _ <- lit "]"
           _ <- lit "("
           target <-
-            fmap (DocGroup . DocJoin) $
+            fmap (Doc.Group . Doc.Join) $
               fmap pure link <|> some' (expr <|> wordy (char ')'))
           _ <- lit ")"
           pure (p, target)
@@ -894,12 +754,12 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
         ok s = length [() | '\n' <- s] < 2
 
     spaced p = some' (p <* P.optional sp)
-    leafies close = wrap' . DocParagraph <$> spaced (leafy close)
+    leafies close = wrap' . Doc.Paragraph <$> spaced (leafy close)
 
     list = bulletedList <|> numberedList
 
-    bulletedList = wrap' . DocBulletedList <$> sepBy1' bullet listSep
-    numberedList = wrap' . DocNumberedList <$> sepBy1' numberedItem listSep
+    bulletedList = wrap' . Doc.BulletedList <$> sepBy1' bullet listSep
+    numberedList = wrap' . Doc.NumberedList <$> sepBy1' numberedItem listSep
 
     listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
 
@@ -921,7 +781,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
     numberedStart =
       listItemStart' $ P.try (tokenP $ LP.decimal <* lit ".")
 
-    listItemParagraph = fmap (wrap' . DocParagraph) $ do
+    listItemParagraph = fmap (wrap' . Doc.Paragraph) $ do
       col <- column <$> posP
       some' (leaf <* sep col)
       where
@@ -947,7 +807,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
     numberedItem = P.label msg $ do
       (col, s) <- numberedStart
       (s,)
-        <$> ( fmap (uncurry DocColumn) $ do
+        <$> ( fmap (uncurry Doc.Column) $ do
                 p <- nonNewlineSpaces *> listItemParagraph
                 subList <-
                   local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
@@ -956,7 +816,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
       where
         msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
 
-    bullet = fmap (uncurry DocColumn) . P.label "bullet (examples: * item1, - item2)" $ do
+    bullet = fmap (uncurry Doc.Column) . P.label "bullet (examples: * item1, - item2)" $ do
       (col, _) <- bulletedStart
       p <- nonNewlineSpaces *> listItemParagraph
       subList <-
@@ -978,7 +838,7 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
 
     -- # A section title (not a subsection)
     section :: P DocTree
-    section = fmap (wrap' . uncurry DocSection) $ do
+    section = fmap (wrap' . uncurry Doc.Section) $ do
       ns <- S.gets parentSections
       hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
       title <- paragraph <* CP.space
@@ -988,10 +848,10 @@ docBody docClose' = DocUntitledSection <$> P.many (sectionElem <* CP.space)
           P.many (sectionElem <* CP.space)
       pure $ (title, body)
 
-    wrap' :: DocTop DocTree -> DocTree
+    wrap' :: Doc.Top [Token Lexeme] DocTree -> DocTree
     wrap' doc = ann doc :< doc
 
-    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> DocTop DocTree) -> a -> b -> DocTree
+    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> Doc.Top [Token Lexeme] DocTree) -> a -> b -> DocTree
     wrapSimple2 fn a b = ann a <> ann b :< fn a b
 
 lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Token.hs b/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
index 81842c409e..e29f276c5e 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
@@ -9,6 +9,7 @@ import Data.Text qualified as Text
 import Text.Megaparsec (ParsecT, TraversableStream)
 import Text.Megaparsec qualified as P
 import Unison.Lexer.Pos (Pos (Pos))
+import Unison.Parser.Ann (Ann (Ann), Annotated (..))
 import Unison.Prelude
 
 data Token a = Token
@@ -18,6 +19,9 @@ data Token a = Token
   }
   deriving stock (Eq, Ord, Show, Functor, Foldable, Traversable)
 
+instance Annotated (Token a) where
+  ann (Token _ s e) = Ann s e
+
 instance Applicative Token where
   pure a = Token a (Pos 0 0) (Pos 0 0)
   Token f start _ <*> Token a _ end = Token (f a) start end
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 344de0fd1b..1bee4d08f4 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -90,6 +90,7 @@ import Unison.Reference (Reference)
 import Unison.Referent (Referent)
 import Unison.Syntax.Lexer qualified as L
 import Unison.Syntax.Name qualified as Name (toVar, unsafeParseText)
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Term (MatchCase (..))
 import Unison.UnisonFile.Error qualified as UF
 import Unison.Util.Bytes (Bytes)
@@ -270,7 +271,7 @@ closeBlock :: (Ord v) => P v m (L.Token ())
 closeBlock = void <$> matchToken L.Close
 
 -- | With layout, blocks might “close” without an explicit outdent (e.g., not even a newline at the end of a
---  `DocTransclude`). This allows those blocks to be closed by EOF.
+--  `Doc.Transclude`). This allows those blocks to be closed by EOF.
 optionalCloseBlock :: (Ord v) => P v m (L.Token ())
 optionalCloseBlock = closeBlock <|> (\() -> L.Token () mempty mempty) <$> P.eof
 
@@ -399,7 +400,7 @@ string = queryToken getString
     getString (L.Textual s) = Just (Text.pack s)
     getString _ = Nothing
 
-doc :: (Ord v) => P v m (L.Token (L.DocUntitledSection L.DocTree))
+doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection L.DocTree))
 doc = queryToken \case
   L.Doc d -> pure d
   _ -> Nothing
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
new file mode 100644
index 0000000000..4a88200b8b
--- /dev/null
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
@@ -0,0 +1,166 @@
+-- | Haskell parallel to @unison/base.Doc@.
+--
+--   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
+--   mismatch with Unison is a problem, as someone can create a Unison Doc with explicit constructors or function calls,
+--   have it rendered to a scratch file, and then we can’t parse it. Changing the types here to match Unison wouldn’t
+--   fix the issue. We have to modify the types and parser in concert (in both Haskell and Unison) to bring them in
+--   line.
+module Unison.Syntax.Parser.Doc.Data where
+
+import Data.Functor.Classes
+import Data.List.NonEmpty (NonEmpty)
+import Unison.HashQualifiedPrime qualified as HQ'
+import Unison.Name (Name)
+import Unison.Parser.Ann (Annotated (..))
+import Unison.Prelude
+import Unison.Syntax.Lexer.Token (Token (..))
+
+newtype UntitledSection a = UntitledSection [a]
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data Top code a
+  = -- | The first argument is always a Paragraph
+    Section a [a]
+  | Eval code
+  | ExampleBlock code
+  | CodeBlock (Token String) (Token String)
+  | BulletedList (NonEmpty (Column a))
+  | NumberedList (NonEmpty (Token Word64, Column a))
+  | Paragraph (NonEmpty (Leaf code a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+instance Eq2 Top where
+  liftEq2 _ _ _ _ = True
+
+instance (Eq code) => Eq1 (Top code)
+
+instance Ord2 Top where
+  liftCompare2 _ _ _ _ = LT
+
+instance (Ord code) => Ord1 (Top code)
+
+instance Show2 Top where
+  liftShowsPrec2 _ _ _ _ _ _ x = x
+
+instance (Show code) => Show1 (Top code)
+
+data Column a
+  = -- | The first is always a Paragraph, and the second a Bulleted or Numbered List
+    Column a (Maybe a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data Leaf code a
+  = Link EmbedLink
+  | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
+    --   Transcludes & Words)
+    NamedLink a (Leaf code Void)
+  | Example code
+  | Transclude code
+  | -- | Always a Paragraph
+    Bold a
+  | -- | Always a Paragraph
+    Italic a
+  | -- | Always a Paragraph
+    Strikethrough a
+  | -- | Always a Word
+    Verbatim (Leaf Void Void)
+  | -- | Always a Word
+    Code (Leaf Void Void)
+  | Source (NonEmpty (SourceElement code))
+  | FoldedSource (NonEmpty (SourceElement code))
+  | EvalInline code
+  | Signature (NonEmpty EmbedSignatureLink)
+  | SignatureInline EmbedSignatureLink
+  | Word (Token String)
+  | Group (Join code a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+instance Bifunctor Leaf where
+  bimap f g = \case
+    Link x -> Link x
+    NamedLink a leaf -> NamedLink (g a) $ first f leaf
+    Example code -> Example $ f code
+    Transclude code -> Transclude $ f code
+    Bold a -> Bold $ g a
+    Italic a -> Italic $ g a
+    Strikethrough a -> Strikethrough $ g a
+    Verbatim leaf -> Verbatim leaf
+    Code leaf -> Code leaf
+    Source elems -> Source $ fmap f <$> elems
+    FoldedSource elems -> FoldedSource $ fmap f <$> elems
+    EvalInline code -> EvalInline $ f code
+    Signature x -> Signature x
+    SignatureInline x -> SignatureInline x
+    Word x -> Word x
+    Group join -> Group $ bimap f g join
+
+data EmbedLink
+  = EmbedTypeLink (Token (HQ'.HashQualified Name))
+  | EmbedTermLink (Token (HQ'.HashQualified Name))
+  deriving (Eq, Ord, Show)
+
+data SourceElement code = SourceElement EmbedLink [EmbedAnnotation code]
+  deriving (Eq, Ord, Show, Functor)
+
+newtype EmbedSignatureLink = EmbedSignatureLink (Token (HQ'.HashQualified Name))
+  deriving (Eq, Ord, Show)
+
+newtype Join code a = Join (NonEmpty (Leaf code a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+instance Bifunctor Join where
+  bimap f g (Join leaves) = Join $ bimap f g <$> leaves
+
+newtype EmbedAnnotation code
+  = -- | Always a Transclude
+    EmbedAnnotation (Either (Token (HQ'.HashQualified Name)) (Leaf code Void))
+  deriving (Eq, Ord, Show)
+
+instance Functor EmbedAnnotation where
+  fmap f (EmbedAnnotation ann) = EmbedAnnotation $ first f <$> ann
+
+instance (Annotated code, Annotated a) => Annotated (Top code a) where
+  ann = \case
+    Section title body -> ann title <> ann body
+    Eval code -> ann code
+    ExampleBlock code -> ann code
+    CodeBlock label body -> ann label <> ann body
+    BulletedList items -> ann items
+    NumberedList items -> ann $ snd <$> items
+    Paragraph leaves -> ann leaves
+
+instance (Annotated a) => Annotated (Column a) where
+  ann (Column para list) = ann para <> ann list
+
+instance (Annotated code, Annotated a) => Annotated (Leaf code a) where
+  ann = \case
+    Link link -> ann link
+    NamedLink label target -> ann label <> ann target
+    Example code -> ann code
+    Transclude code -> ann code
+    Bold para -> ann para
+    Italic para -> ann para
+    Strikethrough para -> ann para
+    Verbatim word -> ann word
+    Code word -> ann word
+    Source elems -> ann elems
+    FoldedSource elems -> ann elems
+    EvalInline code -> ann code
+    Signature links -> ann links
+    SignatureInline link -> ann link
+    Word text -> ann text
+    Group (Join leaves) -> ann leaves
+
+instance Annotated EmbedLink where
+  ann = \case
+    EmbedTypeLink name -> ann name
+    EmbedTermLink name -> ann name
+
+instance (Annotated code) => Annotated (SourceElement code) where
+  ann (SourceElement link target) = ann link <> ann target
+
+instance Annotated EmbedSignatureLink where
+  ann (EmbedSignatureLink name) = ann name
+
+instance (Annotated code) => Annotated (EmbedAnnotation code) where
+  ann (EmbedAnnotation a) = either ann ann a
diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal
index 4b097e6021..31ee026b7c 100644
--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@@ -26,6 +26,7 @@ library
       Unison.Syntax.Name
       Unison.Syntax.NameSegment
       Unison.Syntax.Parser
+      Unison.Syntax.Parser.Doc.Data
       Unison.Syntax.ReservedWords
       Unison.Syntax.ShortHash
       Unison.Syntax.Var

From 70fe615570ed57026fb58b8939a643d9155e58b0 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Thu, 25 Jul 2024 14:33:40 -0600
Subject: [PATCH 14/22] Add `Data.Functor.Classes` instances

These are needed for the new Doc types, but had been stubbed out. Moving
the Doc types to their own module forced the changes that got in the way
of generating these with Template Haskell.
---
 .../src/Unison/Syntax/TermParser.hs           |  4 +-
 unison-syntax/package.yaml                    |  1 +
 .../src/Unison/Syntax/Parser/Doc/Data.hs      | 90 +++++++++++--------
 unison-syntax/unison-syntax.cabal             |  2 +
 4 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 89d5504079..6433bf220c 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -598,7 +598,7 @@ doc2Block = do
       Doc.EmbedTermLink ident ->
         Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docSourceElement :: Doc.SourceElement [L.Token L.Lexeme] -> TermP v m
+    docSourceElement :: Doc.SourceElement (Doc.Leaf [L.Token L.Lexeme] Void) -> TermP v m
     docSourceElement d@(Doc.SourceElement link anns) = do
       link' <- docEmbedLink link
       anns' <- traverse docEmbedAnnotation anns
@@ -608,7 +608,7 @@ doc2Block = do
     docEmbedSignatureLink d@(Doc.EmbedSignatureLink ident) =
       Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docEmbedAnnotation :: Doc.EmbedAnnotation [L.Token L.Lexeme] -> TermP v m
+    docEmbedAnnotation :: Doc.EmbedAnnotation (Doc.Leaf [L.Token L.Lexeme] Void) -> TermP v m
     docEmbedAnnotation d@(Doc.EmbedAnnotation a) =
       -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
       -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
diff --git a/unison-syntax/package.yaml b/unison-syntax/package.yaml
index ccb1a057d7..b093dc182f 100644
--- a/unison-syntax/package.yaml
+++ b/unison-syntax/package.yaml
@@ -9,6 +9,7 @@ dependencies:
   - bytes
   - containers
   - cryptonite
+  - deriving-compat
   - extra
   - free
   - lens
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
index 4a88200b8b..5167b2bcf6 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE TemplateHaskell #-}
+
 -- | Haskell parallel to @unison/base.Doc@.
 --
 --   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
@@ -7,8 +9,10 @@
 --   line.
 module Unison.Syntax.Parser.Doc.Data where
 
-import Data.Functor.Classes
+import Data.Eq.Deriving (deriveEq1, deriveEq2)
 import Data.List.NonEmpty (NonEmpty)
+import Data.Ord.Deriving (deriveOrd1, deriveOrd2)
+import Text.Show.Deriving (deriveShow1, deriveShow2)
 import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Name (Name)
 import Unison.Parser.Ann (Annotated (..))
@@ -19,7 +23,7 @@ newtype UntitledSection a = UntitledSection [a]
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 data Top code a
-  = -- | The first argument is always a Paragraph
+  = -- | The first argument is always a `Paragraph`
     Section a [a]
   | Eval code
   | ExampleBlock code
@@ -29,30 +33,15 @@ data Top code a
   | Paragraph (NonEmpty (Leaf code a))
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-instance Eq2 Top where
-  liftEq2 _ _ _ _ = True
-
-instance (Eq code) => Eq1 (Top code)
-
-instance Ord2 Top where
-  liftCompare2 _ _ _ _ = LT
-
-instance (Ord code) => Ord1 (Top code)
-
-instance Show2 Top where
-  liftShowsPrec2 _ _ _ _ _ _ x = x
-
-instance (Show code) => Show1 (Top code)
-
 data Column a
-  = -- | The first is always a Paragraph, and the second a Bulleted or Numbered List
+  = -- | The first is always a `Paragraph`, and the second a `BulletedList` or `NumberedList`
     Column a (Maybe a)
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 data Leaf code a
   = Link EmbedLink
   | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
-    --   Transcludes & Words)
+    --   `Transclude`s & `Word`s)
     NamedLink a (Leaf code Void)
   | Example code
   | Transclude code
@@ -66,13 +55,15 @@ data Leaf code a
     Verbatim (Leaf Void Void)
   | -- | Always a Word
     Code (Leaf Void Void)
-  | Source (NonEmpty (SourceElement code))
-  | FoldedSource (NonEmpty (SourceElement code))
+  | -- | Always a Transclude
+    Source (NonEmpty (SourceElement (Leaf code Void)))
+  | -- | Always a Transclude
+    FoldedSource (NonEmpty (SourceElement (Leaf code Void)))
   | EvalInline code
   | Signature (NonEmpty EmbedSignatureLink)
   | SignatureInline EmbedSignatureLink
   | Word (Token String)
-  | Group (Join code a)
+  | Group (Join (Leaf code a))
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 instance Bifunctor Leaf where
@@ -86,38 +77,31 @@ instance Bifunctor Leaf where
     Strikethrough a -> Strikethrough $ g a
     Verbatim leaf -> Verbatim leaf
     Code leaf -> Code leaf
-    Source elems -> Source $ fmap f <$> elems
-    FoldedSource elems -> FoldedSource $ fmap f <$> elems
+    Source elems -> Source $ fmap (first f) <$> elems
+    FoldedSource elems -> FoldedSource $ fmap (first f) <$> elems
     EvalInline code -> EvalInline $ f code
     Signature x -> Signature x
     SignatureInline x -> SignatureInline x
     Word x -> Word x
-    Group join -> Group $ bimap f g join
+    Group join -> Group $ bimap f g <$> join
 
 data EmbedLink
   = EmbedTypeLink (Token (HQ'.HashQualified Name))
   | EmbedTermLink (Token (HQ'.HashQualified Name))
   deriving (Eq, Ord, Show)
 
-data SourceElement code = SourceElement EmbedLink [EmbedAnnotation code]
-  deriving (Eq, Ord, Show, Functor)
+data SourceElement a = SourceElement EmbedLink [EmbedAnnotation a]
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 newtype EmbedSignatureLink = EmbedSignatureLink (Token (HQ'.HashQualified Name))
   deriving (Eq, Ord, Show)
 
-newtype Join code a = Join (NonEmpty (Leaf code a))
+newtype Join a = Join (NonEmpty a)
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-instance Bifunctor Join where
-  bimap f g (Join leaves) = Join $ bimap f g <$> leaves
-
-newtype EmbedAnnotation code
-  = -- | Always a Transclude
-    EmbedAnnotation (Either (Token (HQ'.HashQualified Name)) (Leaf code Void))
-  deriving (Eq, Ord, Show)
-
-instance Functor EmbedAnnotation where
-  fmap f (EmbedAnnotation ann) = EmbedAnnotation $ first f <$> ann
+newtype EmbedAnnotation a
+  = EmbedAnnotation (Either (Token (HQ'.HashQualified Name)) a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 instance (Annotated code, Annotated a) => Annotated (Top code a) where
   ann = \case
@@ -164,3 +148,33 @@ instance Annotated EmbedSignatureLink where
 
 instance (Annotated code) => Annotated (EmbedAnnotation code) where
   ann (EmbedAnnotation a) = either ann ann a
+
+$(deriveEq1 ''Column)
+$(deriveOrd1 ''Column)
+$(deriveShow1 ''Column)
+
+$(deriveEq1 ''EmbedAnnotation)
+$(deriveOrd1 ''EmbedAnnotation)
+$(deriveShow1 ''EmbedAnnotation)
+
+$(deriveEq1 ''SourceElement)
+$(deriveOrd1 ''SourceElement)
+$(deriveShow1 ''SourceElement)
+
+$(deriveEq1 ''Join)
+$(deriveOrd1 ''Join)
+$(deriveShow1 ''Join)
+
+$(deriveEq1 ''Leaf)
+$(deriveOrd1 ''Leaf)
+$(deriveShow1 ''Leaf)
+$(deriveEq2 ''Leaf)
+$(deriveOrd2 ''Leaf)
+$(deriveShow2 ''Leaf)
+
+$(deriveEq1 ''Top)
+$(deriveOrd1 ''Top)
+$(deriveShow1 ''Top)
+$(deriveEq2 ''Top)
+$(deriveOrd2 ''Top)
+$(deriveShow2 ''Top)
diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal
index 31ee026b7c..853da4c817 100644
--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@@ -69,6 +69,7 @@ library
     , bytes
     , containers
     , cryptonite
+    , deriving-compat
     , extra
     , free
     , lens
@@ -127,6 +128,7 @@ test-suite syntax-tests
     , code-page
     , containers
     , cryptonite
+    , deriving-compat
     , easytest
     , extra
     , free

From 31f952201c5a1448af0eb78b5a9dbff63f12c05f Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Thu, 25 Jul 2024 15:59:51 -0600
Subject: [PATCH 15/22] Simplify `restoreStack`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It’s only used inside `local`, so its attempts to restore the layout are for naught.
---
 unison-syntax/src/Unison/Syntax/Lexer.hs | 62 ++++++------------------
 1 file changed, 15 insertions(+), 47 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index c0d1c3c04c..e2cba29dc4 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -257,7 +257,7 @@ token'' tok p = do
     topHasClosePair :: Layout -> Bool
     topHasClosePair [] = False
     topHasClosePair ((name, _) : _) =
-      name `elem` ["syntax.docTransclude", "{", "(", "[", "handle", "match", "if", "then"]
+      name `elem` ["DUMMY", "{", "(", "[", "handle", "match", "if", "then"]
 
 showErrorFancy :: (P.ShowErrorComponent e) => P.ErrorFancy e -> String
 showErrorFancy = \case
@@ -394,22 +394,6 @@ infixl 2 <+>
 (<+>) :: (Monoid a) => P a -> P a -> P a
 p1 <+> p2 = do a1 <- p1; a2 <- p2; pure (a1 <> a2)
 
--- Runs the parser `p`, then:
---   1. resets the layout stack to be what it was before `p`.
---   2. emits enough closing tokens to reach `lbl` but not pop it.
---      (you can think of this as just dealing with a final "unclosed"
---       block at the end of `p`)
-restoreStack :: String -> P [Token Lexeme] -> P [Token Lexeme]
-restoreStack lbl p = do
-  layout1 <- S.gets layout
-  p <- p
-  s2 <- S.get
-  let (pos1, pos2) = foldl' (\_ b -> (start b, end b)) mempty p
-      unclosed = takeWhile (\(lbl', _) -> lbl' /= lbl) (layout s2)
-      closes = replicate (length unclosed) (Token Close pos1 pos2)
-  S.put (s2 {layout = layout1})
-  pure $ p <> closes
-
 type DocTree = Cofree (Doc.Top [Token Lexeme]) Ann
 
 -- | The `Doc` lexer as documented on unison-lang.org
@@ -501,7 +485,7 @@ sepBy1' p sep = liftA2 (:|) p . many $ sep *> p
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
 docBody :: P end -> P (Doc.UntitledSection DocTree)
-docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
+docBody docClose = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
   where
     wordyKw kw = separated wordySep (lit kw)
     sectionElem = section <|> fencedBlock <|> list <|> paragraph
@@ -626,8 +610,6 @@ docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
           ex <- CP.space *> lexemes' end
           pure ex
 
-    docClose = [] <$ docClose'
-
     link =
       P.label "link (examples: {type List}, {Nat.+})" $
         fmap Doc.Link $
@@ -636,20 +618,7 @@ docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
 
     expr =
       fmap Doc.Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
-        openAs "{{" "syntax.docTransclude"
-          *> do
-            env0 <- S.get
-            -- we re-allow layout within a transclusion, then restore it to its
-            -- previous state after
-            S.put (env0 {inLayout = True})
-            -- Note: this P.lookAhead ensures the }} isn't consumed,
-            -- so it can be consumed below by the `close` which will
-            -- pop items off the layout stack up to the nearest enclosing
-            -- syntax.docTransclude.
-            ts <- lexemes' (P.lookAhead ([] <$ lit "}}"))
-            S.modify (\env -> env {inLayout = inLayout env0})
-            pure ts
-          <* close ["syntax.docTransclude"] (lit "}}")
+        lit "{{" *> lexemes' ([] <$ lit "}}")
 
     nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
     nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
@@ -673,16 +642,12 @@ docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
             b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
             fence <$ guard b
           CP.space
-            *> local
-              (\env -> env {inLayout = True, opening = Just "docEval"})
-              (restoreStack "docEval" $ lexemes' ([] <$ lit fence))
+            *> lexemes' ([] <$ lit fence)
 
         exampleBlock = fmap (wrap' . Doc.ExampleBlock) $ do
           void $ lit "@typecheck" <* CP.space
           fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-          local
-            (\env -> env {inLayout = True, opening = Just "docExampleBlock"})
-            (restoreStack "docExampleBlock" $ lexemes' ([] <$ lit fence))
+          lexemes' $ [] <$ lit fence
 
         uncolumn column tabWidth s =
           let skip col r | col < 1 = r
@@ -855,10 +820,16 @@ docBody docClose' = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
     wrapSimple2 fn a b = ann a <> ann b :< fn a b
 
 lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
-lexemes' =
+lexemes' eof =
   -- NB: `postLex` requires the token stream to start with an `Open`, otherwise it can’t create a `T`, so this adds one,
   --     runs `postLex`, then removes it.
-  fmap (tail . postLex . (Token (Open "fake") mempty mempty :)) . lexemes
+  fmap (tail . postLex . (Token (Open "fake") mempty mempty :)) $
+    local (\env -> env {inLayout = True, opening = Just "DUMMY"}) do
+      p <- lexemes eof
+      -- deals with a final "unclosed" block at the end of `p`)
+      unclosed <- takeWhile (("DUMMY" /=) . fst) . layout <$> S.get
+      let pos = end $ last p
+      pure $ p <> replicate (length unclosed) (Token Close pos pos)
 
 -- | Consumes an entire Unison “module”.
 lexemes :: P [Token Lexeme] -> P [Token Lexeme]
@@ -1245,11 +1216,8 @@ separated :: (Char -> Bool) -> P a -> P a
 separated ok p = P.try $ p <* P.lookAhead (void (P.satisfy ok) <|> P.eof)
 
 open :: String -> P [Token Lexeme]
-open b = openAs b b
-
-openAs :: String -> String -> P [Token Lexeme]
-openAs syntax b = do
-  token <- tokenP $ lit syntax
+open b = do
+  token <- tokenP $ lit b
   env <- S.get
   S.put (env {opening = Just b})
   pure [Open b <$ token]

From 6f2d188e5c8a0edae756046c7adaa0fbd9581407 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Thu, 25 Jul 2024 14:35:26 -0600
Subject: [PATCH 16/22] Split Doc parser from Unison lexer

---
 parser-typechecker/src/Unison/PrintError.hs   |    2 +-
 .../src/Unison/Syntax/TermParser.hs           |    2 +-
 .../src/Unison/Syntax/TermPrinter.hs          |    2 +-
 .../src/Unison/Codebase/Editor/HandleInput.hs |    5 +-
 unison-cli/src/Unison/LSP/FileAnalysis.hs     |    2 +-
 unison-cli/src/Unison/LSP/Types.hs            |    2 +-
 unison-syntax/src/Unison/Syntax/Lexer.hs      | 1354 +----------------
 .../src/Unison/Syntax/Lexer/Unison.hs         |  910 +++++++++++
 unison-syntax/src/Unison/Syntax/Parser.hs     |    5 +-
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs |  476 ++++++
 unison-syntax/test/Main.hs                    |    2 +-
 unison-syntax/unison-syntax.cabal             |    2 +
 12 files changed, 1430 insertions(+), 1334 deletions(-)
 create mode 100644 unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
 create mode 100644 unison-syntax/src/Unison/Syntax/Parser/Doc.hs

diff --git a/parser-typechecker/src/Unison/PrintError.hs b/parser-typechecker/src/Unison/PrintError.hs
index 8b73b179f1..dd796c0159 100644
--- a/parser-typechecker/src/Unison/PrintError.hs
+++ b/parser-typechecker/src/Unison/PrintError.hs
@@ -54,7 +54,7 @@ import Unison.Result qualified as Result
 import Unison.Settings qualified as Settings
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualified qualified as HQ (toText)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText)
 import Unison.Syntax.NamePrinter (prettyHashQualified0)
 import Unison.Syntax.Parser (Annotated, ann)
diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 6433bf220c..4c3069b9ff 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -47,7 +47,7 @@ import Unison.Pattern qualified as Pattern
 import Unison.Prelude
 import Unison.Reference (Reference)
 import Unison.Referent (Referent)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText, toVar, unsafeParseVar)
 import Unison.Syntax.NameSegment qualified as NameSegment
 import Unison.Syntax.Parser hiding (seq)
diff --git a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
index faeda76020..5c41701bf8 100644
--- a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
@@ -51,7 +51,7 @@ import Unison.Reference qualified as Reference
 import Unison.Referent (Referent)
 import Unison.Referent qualified as Referent
 import Unison.Syntax.HashQualified qualified as HQ (unsafeFromVar)
-import Unison.Syntax.Lexer (showEscapeChar)
+import Unison.Syntax.Lexer.Unison (showEscapeChar)
 import Unison.Syntax.Name qualified as Name (isSymboly, parseText, parseTextEither, toText, unsafeParseText)
 import Unison.Syntax.NamePrinter (styleHashQualified'')
 import Unison.Syntax.NameSegment qualified as NameSegment (toEscapedText)
diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs
index e85879cc4a..e17d3fdd9e 100644
--- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs
+++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs
@@ -151,8 +151,7 @@ import Unison.ShortHash qualified as SH
 import Unison.Sqlite qualified as Sqlite
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualified qualified as HQ (parseTextWith, toText)
-import Unison.Syntax.Lexer qualified as L
-import Unison.Syntax.Lexer qualified as Lexer
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText, toVar, unsafeParseVar)
 import Unison.Syntax.NameSegment qualified as NameSegment
 import Unison.Syntax.Parser qualified as Parser
@@ -1137,7 +1136,7 @@ handleFindI isVerbose fscope ws input = do
 
           -- name query
           qs -> do
-            let anythingBeforeHash :: Megaparsec.Parsec (Lexer.Token Text) [Char] Text
+            let anythingBeforeHash :: Megaparsec.Parsec (L.Token Text) [Char] Text
                 anythingBeforeHash = Text.pack <$> Megaparsec.takeWhileP Nothing (/= '#')
             let srs =
                   searchBranchScored
diff --git a/unison-cli/src/Unison/LSP/FileAnalysis.hs b/unison-cli/src/Unison/LSP/FileAnalysis.hs
index 7a7ae006cf..bec9f8bf9f 100644
--- a/unison-cli/src/Unison/LSP/FileAnalysis.hs
+++ b/unison-cli/src/Unison/LSP/FileAnalysis.hs
@@ -57,7 +57,7 @@ import Unison.Result (Note)
 import Unison.Result qualified as Result
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name
 import Unison.Syntax.Parser qualified as Parser
 import Unison.Syntax.TypePrinter qualified as TypePrinter
diff --git a/unison-cli/src/Unison/LSP/Types.hs b/unison-cli/src/Unison/LSP/Types.hs
index b368e915ef..268034ea5a 100644
--- a/unison-cli/src/Unison/LSP/Types.hs
+++ b/unison-cli/src/Unison/LSP/Types.hs
@@ -41,7 +41,7 @@ import Unison.Server.Backend qualified as Backend
 import Unison.Server.NameSearch (NameSearch)
 import Unison.Sqlite qualified as Sqlite
 import Unison.Symbol
-import Unison.Syntax.Lexer qualified as Lexer
+import Unison.Syntax.Lexer.Unison qualified as Lexer
 import Unison.Type (Type)
 import Unison.UnisonFile qualified as UF
 import Unison.UnisonFile.Summary (FileSummary (..))
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index e2cba29dc4..cfd932cd7e 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -1,21 +1,13 @@
 {-# LANGUAGE TemplateHaskell #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
 
+-- | This currently contains a mix of general lexing utilities and identifier-y lexers.
 module Unison.Syntax.Lexer
   ( Token (..),
     Line,
     Column,
     Err (..),
     Pos (..),
-    Lexeme (..),
-    DocTree,
-    lexer,
-    preParse,
-    escapeChars,
-    debugFilePreParse,
-    debugPreParse,
-    debugPreParse',
-    showEscapeChar,
     touches,
 
     -- * Character classifiers
@@ -23,28 +15,40 @@ module Unison.Syntax.Lexer
     wordyIdStartChar,
     symbolyIdChar,
 
-    -- * Error formatting
-    formatTrivialError,
-    displayLexeme,
+    -- * new exports
+    BlockName,
+    Layout,
+    ParsingEnv (..),
+    P,
+    local,
+    parseFailure,
+    space,
+    lit,
+    err,
+    commitAfter2,
+    (<+>),
+    some',
+    someTill',
+    sepBy1',
+    separated,
+    wordySep,
+    identifierP,
+    wordyIdSegP,
+    shortHashP,
+    topBlockName,
+    pop,
+    typeOrAbilityAlt,
+    typeModifiersAlt,
+    inc,
   )
 where
 
 import Control.Comonad.Cofree (Cofree ((:<)))
-import Control.Lens qualified as Lens
 import Control.Monad.State qualified as S
-import Data.Char (isAlphaNum, isControl, isDigit, isSpace, ord, toLower)
-import Data.Foldable qualified as Foldable
-import Data.List qualified as List
-import Data.List.Extra qualified as List
+import Data.Char (isSpace)
 import Data.List.NonEmpty (NonEmpty ((:|)))
-import Data.List.NonEmpty qualified as Nel
-import Data.List.NonEmpty qualified as NonEmpty
-import Data.Map.Strict qualified as Map
-import Data.Set qualified as Set
 import Data.Text qualified as Text
-import GHC.Exts (sortWith)
 import Text.Megaparsec qualified as P
-import Text.Megaparsec.Char (char)
 import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Text.Megaparsec.Error qualified as EP
@@ -52,24 +56,16 @@ import Text.Megaparsec.Internal qualified as PI
 import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Lexer.Pos (Column, Line, Pos (Pos), column, line)
 import Unison.Name (Name)
-import Unison.Name qualified as Name
 import Unison.NameSegment (NameSegment)
-import Unison.NameSegment qualified as NameSegment (docSegment)
-import Unison.NameSegment.Internal qualified as NameSegment
-import Unison.Parser.Ann (Ann, Annotated (..))
+import Unison.Parser.Ann (Annotated (..))
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
-import Unison.ShortHash qualified as SH
-import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
-import Unison.Syntax.Lexer.Token (Token (..), posP, tokenP)
-import Unison.Syntax.Name qualified as Name (isSymboly, nameP, toText, unsafeParseText)
+import Unison.Syntax.Lexer.Token (Token (..), posP)
+import Unison.Syntax.Name qualified as Name (nameP)
 import Unison.Syntax.NameSegment (symbolyIdChar, wordyIdChar, wordyIdStartChar)
 import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
-import Unison.Syntax.Parser.Doc.Data qualified as Doc
-import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
+import Unison.Syntax.ReservedWords (typeModifiers, typeOrAbility)
 import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
-import Unison.Util.Bytes qualified as Bytes
-import Unison.Util.Monoid (intercalateMap)
 
 instance (Annotated a) => Annotated (Cofree f a) where
   ann (a :< _) = ann a
@@ -128,29 +124,6 @@ data Err
   | UnexpectedTokens String -- Catch-all for all other lexer errors, representing some unexpected tokens.
   deriving stock (Eq, Ord, Show) -- richer algebra
 
--- Design principle:
---   `[Lexeme]` should be sufficient information for parsing without
---   further knowledge of spacing or indentation levels
---   any knowledge of comments
-data Lexeme
-  = Open String -- start of a block
-  | Semi IsVirtual -- separator between elements of a block
-  | Close -- end of a block
-  | Reserved String -- reserved tokens such as `{`, `(`, `type`, `of`, etc
-  | Textual String -- text literals, `"foo bar"`
-  | Character Char -- character literals, `?X`
-  | WordyId (HQ'.HashQualified Name) -- a (non-infix) identifier. invariant: last segment is wordy
-  | SymbolyId (HQ'.HashQualified Name) -- an infix identifier. invariant: last segment is symboly
-  | Blank String -- a typed hole or placeholder
-  | Numeric String -- numeric literals, left unparsed
-  | Bytes Bytes.Bytes -- bytes literals
-  | Hash ShortHash -- hash literals
-  | Err Err
-  | Doc (Doc.UntitledSection DocTree)
-  deriving stock (Eq, Show, Ord)
-
-type IsVirtual = Bool -- is it a virtual semi or an actual semi?
-
 space :: P ()
 space =
   LP.space
@@ -163,15 +136,6 @@ space =
 lit :: String -> P String
 lit = P.try . LP.symbol (pure ())
 
-token :: P Lexeme -> P [Token Lexeme]
-token = token' (\a start end -> [Token a start end])
-
--- Token parser: strips trailing whitespace and comments after a
--- successful parse, and also takes care of emitting layout tokens
--- (such as virtual semicolons and closing tokens).
-token' :: (a -> Pos -> Pos -> [Token Lexeme]) -> P a -> P [Token Lexeme]
-token' tok p = LP.lexeme space (token'' tok p)
-
 -- Committed failure
 err :: Pos -> Err -> P x
 err start t = do
@@ -193,283 +157,11 @@ commitAfter2 a b f = do
   (a, b) <- P.try $ liftA2 (,) a b
   f a b
 
--- Token parser implementation which leaves trailing whitespace and comments
--- but does emit layout tokens such as virtual semicolons and closing tokens.
-token'' :: (a -> Pos -> Pos -> [Token Lexeme]) -> P a -> P [Token Lexeme]
-token'' tok p = do
-  start <- posP
-  -- We save the current state so we can backtrack the state if `p` fails.
-  env <- S.get
-  layoutToks <- case opening env of
-    -- If we're opening a block named b, we push (b, currentColumn) onto
-    -- the layout stack. Example:
-    --
-    --   blah = cases
-    --       {- A comment -}
-    --          -- A one-line comment
-    --     0 -> "hi"
-    --     1 -> "bye"
-    --
-    -- After the `cases` token, the state will be opening = Just "cases",
-    -- meaning the parser is searching for the next non-whitespace/comment
-    -- character to determine the leftmost column of the `cases` block.
-    -- That will be the column of the `0`.
-    Just blockname ->
-      -- special case - handling of empty blocks, as in:
-      --   foo =
-      --   bar = 42
-      if blockname == "=" && column start <= top l && not (null l)
-        then do
-          S.put (env {layout = (blockname, column start + 1) : l, opening = Nothing})
-          pops start
-        else [] <$ S.put (env {layout = layout', opening = Nothing})
-      where
-        layout' = (blockname, column start) : l
-        l = layout env
-    -- If we're not opening a block, we potentially pop from
-    -- the layout stack and/or emit virtual semicolons.
-    Nothing -> if inLayout env then pops start else pure []
-  beforeTokenPos <- posP
-  a <- p <|> (S.put env >> fail "resetting state")
-  endPos <- posP
-  pure $ layoutToks ++ tok a beforeTokenPos endPos
-  where
-    pops :: Pos -> P [Token Lexeme]
-    pops p = do
-      env <- S.get
-      let l = layout env
-      if top l == column p && topContainsVirtualSemis l
-        then pure [Token (Semi True) p p]
-        else
-          if column p > top l || topHasClosePair l
-            then pure []
-            else
-              if column p < top l
-                then S.put (env {layout = pop l}) >> ((Token Close p p :) <$> pops p)
-                else error "impossible"
-
-    -- don't emit virtual semis in (, {, or [ blocks
-    topContainsVirtualSemis :: Layout -> Bool
-    topContainsVirtualSemis = \case
-      [] -> False
-      ((name, _) : _) -> name /= "(" && name /= "{" && name /= "["
-
-    topHasClosePair :: Layout -> Bool
-    topHasClosePair [] = False
-    topHasClosePair ((name, _) : _) =
-      name `elem` ["DUMMY", "{", "(", "[", "handle", "match", "if", "then"]
-
-showErrorFancy :: (P.ShowErrorComponent e) => P.ErrorFancy e -> String
-showErrorFancy = \case
-  P.ErrorFail msg -> msg
-  P.ErrorIndentation ord ref actual ->
-    "incorrect indentation (got "
-      <> show (P.unPos actual)
-      <> ", should be "
-      <> p
-      <> show (P.unPos ref)
-      <> ")"
-    where
-      p = case ord of
-        LT -> "less than "
-        EQ -> "equal to "
-        GT -> "greater than "
-  P.ErrorCustom a -> P.showErrorComponent a
-
-lexer :: String -> String -> [Token Lexeme]
-lexer scope rem =
-  case flip S.evalState env0 $ P.runParserT (lexemes eof) scope rem of
-    Left e ->
-      let errsWithSourcePos =
-            fst $
-              P.attachSourcePos
-                P.errorOffset
-                (toList (P.bundleErrors e))
-                (P.bundlePosState e)
-          errorToTokens :: (EP.ParseError String (Token Err), P.SourcePos) -> [Token Lexeme]
-          errorToTokens (err, top) = case err of
-            P.FancyError _ (customErrs -> es) | not (null es) -> es
-            P.FancyError _errOffset es ->
-              let msg = intercalateMap "\n" showErrorFancy es
-               in [Token (Err (UnexpectedTokens msg)) (toPos top) (toPos top)]
-            P.TrivialError _errOffset mayUnexpectedTokens expectedTokens ->
-              let unexpectedStr :: Set String
-                  unexpectedStr =
-                    mayUnexpectedTokens
-                      & fmap errorItemToString
-                      & maybeToList
-                      & Set.fromList
-                  errorLength :: Int
-                  errorLength = case Set.toList unexpectedStr of
-                    [] -> 0
-                    (x : _) -> length x
-                  expectedStr :: Set String
-                  expectedStr =
-                    expectedTokens
-                      & Set.map errorItemToString
-                  err = UnexpectedTokens $ formatTrivialError unexpectedStr expectedStr
-                  startPos = toPos top
-                  -- This is just an attempt to highlight errors better in source excerpts.
-                  -- It may not work in all cases, but should generally provide a better experience.
-                  endPos = startPos & \(Pos l c) -> Pos l (c + errorLength)
-               in [Token (Err err) startPos endPos]
-       in errsWithSourcePos >>= errorToTokens
-    Right ts -> postLex $ Token (Open scope) topLeftCorner topLeftCorner : ts
-  where
-    eof :: P [Token Lexeme]
-    eof = P.try do
-      p <- P.eof >> posP
-      n <- maybe 0 (const 1) <$> S.gets opening
-      l <- S.gets layout
-      pure $ replicate (length l + n) (Token Close p p)
-    errorItemToString :: EP.ErrorItem Char -> String
-    errorItemToString = \case
-      (P.Tokens ts) -> Foldable.toList ts
-      (P.Label ts) -> Foldable.toList ts
-      (P.EndOfInput) -> "end of input"
-    customErrs es = [Err <$> e | P.ErrorCustom e <- toList es]
-    toPos (P.SourcePos _ line col) = Pos (P.unPos line) (P.unPos col)
-    env0 = ParsingEnv [] (Just scope) True [0] 0
-
--- | hacky postprocessing pass to do some cleanup of stuff that's annoying to
--- fix without adding more state to the lexer:
---   - 1+1 lexes as [1, +1], convert this to [1, +, 1]
---   - when a semi followed by a virtual semi, drop the virtual, lets you
---     write
---       foo x = action1;
---               2
---   - semi immediately after first Open is ignored
-tweak :: (Token Lexeme) -> [Token Lexeme] -> [Token Lexeme]
-tweak h@(Token (Semi False) _ _) (Token (Semi True) _ _ : t) = h : t
--- __NB__: This case only exists to guard against the following one
-tweak h@(Token (Reserved _) _ _) t = h : t
-tweak t1 (t2@(Token (Numeric num) _ _) : rem)
-  | notLayout t1 && touches t1 t2 && isSigned num =
-      t1
-        : Token
-          (SymbolyId (HQ'.fromName (Name.unsafeParseText (Text.pack (take 1 num)))))
-          (start t2)
-          (inc $ start t2)
-        : Token (Numeric (drop 1 num)) (inc $ start t2) (end t2)
-        : rem
-  where
-    isSigned num = all (\ch -> ch == '-' || ch == '+') $ take 1 num
-tweak h t = h : t
-
-formatTrivialError :: Set String -> Set String -> [Char]
-formatTrivialError unexpectedTokens expectedTokens =
-  let unexpectedMsg = case Set.toList unexpectedTokens of
-        [] -> "I found something I didn't expect."
-        [x] ->
-          let article = case x of
-                (c : _) | c `elem` ("aeiou" :: String) -> "an"
-                _ -> "a"
-           in "I was surprised to find " <> article <> " " <> x <> " here."
-        xs -> "I was surprised to find these:\n\n* " <> List.intercalate "\n* " xs
-      expectedMsg = case Set.toList expectedTokens of
-        [] -> Nothing
-        xs -> Just $ "\nI was expecting one of these instead:\n\n* " <> List.intercalate "\n* " xs
-   in concat $ catMaybes [Just unexpectedMsg, expectedMsg]
-
-displayLexeme :: Lexeme -> String
-displayLexeme = \case
-  Open o -> o
-  Semi True -> "end of stanza"
-  Semi False -> "semicolon"
-  Close -> "end of section"
-  Reserved r -> "'" <> r <> "'"
-  Textual t -> "\"" <> t <> "\""
-  Character c -> "?" <> [c]
-  WordyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
-  SymbolyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
-  Blank b -> b
-  Numeric n -> n
-  Bytes _b -> "bytes literal"
-  Hash h -> Text.unpack (SH.toText h)
-  Err e -> show e
-  Doc _ -> "doc structure"
-
 infixl 2 <+>
 
 (<+>) :: (Monoid a) => P a -> P a -> P a
 p1 <+> p2 = do a1 <- p1; a2 <- p2; pure (a1 <> a2)
 
-type DocTree = Cofree (Doc.Top [Token Lexeme]) Ann
-
--- | The `Doc` lexer as documented on unison-lang.org
-doc2 :: P [Token Lexeme]
-doc2 = do
-  -- Ensure we're at a doc before we start consuming tokens
-  P.lookAhead (lit "{{")
-  openStart <- posP
-  -- Produce any layout tokens, such as closing the last open block or virtual semicolons
-  -- We don't use 'token' on "{{" directly because we don't want to duplicate layout
-  -- tokens if we do the rewrite hack for type-docs below.
-  beforeStartToks <- token' ignore (pure ())
-  void $ lit "{{"
-  openEnd <- posP
-  CP.space
-  env0 <- S.get
-  -- Disable layout while parsing the doc block and reset the section number
-  (docTok, closeTok) <- local
-    ( \env ->
-        env
-          { inLayout = False,
-            parentSections = 0 : (parentSections env0)
-          }
-    )
-    do
-      body <- docBody (lit "}}")
-      closeStart <- posP
-      lit "}}"
-      closeEnd <- posP
-      pure (Token (Doc body) openStart closeEnd, Token Close closeStart closeEnd)
-  -- Parse any layout tokens after the doc block, e.g. virtual semicolon
-  endToks <- token' ignore (pure ())
-  -- Hack to allow anonymous doc blocks before type decls
-  --   {{ Some docs }}             Foo.doc = {{ Some docs }}
-  --   ability Foo where      =>   ability Foo where
-  --
-  -- __FIXME__: This should be done _after_ parsing, not in lexing.
-  tn <- subsequentTypeName
-  pure $
-    beforeStartToks <> case (tn) of
-      -- If we're followed by a type, we rewrite the doc block to be a named doc block.
-      Just (WordyId tname)
-        | isTopLevel ->
-            Token (WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment))) openStart openEnd
-              : Token (Open "=") openStart openEnd
-              : docTok
-              -- We need an extra 'Close' here because we added an extra Open above.
-              : closeTok
-              : endToks
-        where
-          isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
-      _ -> docTok : endToks
-  where
-    -- DUPLICATED
-    wordyKw kw = separated wordySep (lit kw)
-    subsequentTypeName = P.lookAhead . P.optional $ do
-      let lit' s = lit s <* sp
-      let modifier = typeModifiersAlt (lit' . Text.unpack)
-      let typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
-      _ <- optional modifier *> typeOrAbility' *> sp
-      Token name start stop <- tokenP identifierP
-      if Name.isSymboly (HQ'.toName name)
-        then P.customFailure (Token (InvalidSymbolyId (Text.unpack (HQ'.toTextWith Name.toText name))) start stop)
-        else pure (WordyId name)
-    ignore _ _ _ = []
-    -- DUPLICATED
-    sp = P.try $ do
-      spaces <- P.takeWhile1P (Just "space") isSpace
-      close <- P.optional (P.lookAhead (lit "}}"))
-      case close of
-        Nothing -> guard $ ok spaces
-        Just _ -> pure ()
-      pure spaces
-      where
-        ok s = length [() | '\n' <- s] < 2
-
 -- | Like `P.some`, but returns an actual `NonEmpty`.
 some' :: P a -> P (NonEmpty a)
 some' p = liftA2 (:|) p $ many p
@@ -482,761 +174,12 @@ someTill' p end = liftA2 (:|) p $ P.manyTill p end
 sepBy1' :: P a -> P sep -> P (NonEmpty a)
 sepBy1' p sep = liftA2 (:|) p . many $ sep *> p
 
--- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
---   Unison wraps `Doc` literals in `}}`).
-docBody :: P end -> P (Doc.UntitledSection DocTree)
-docBody docClose = Doc.UntitledSection <$> P.many (sectionElem <* CP.space)
-  where
-    wordyKw kw = separated wordySep (lit kw)
-    sectionElem = section <|> fencedBlock <|> list <|> paragraph
-    paragraph = wrap' . Doc.Paragraph <$> spaced leaf
-    reserved word = List.isPrefixOf "}}" word || all (== '#') word
-
-    wordy :: P end -> P (Doc.Leaf [Token Lexeme] void)
-    wordy closing = fmap Doc.Word . tokenP . P.try $ do
-      let end =
-            P.lookAhead $
-              void docClose
-                <|> void (P.satisfy isSpace)
-                <|> void closing
-      word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
-      guard (not $ reserved word || null word)
-      pure word
-
-    leafy closing = groupy closing gs
-      where
-        gs =
-          link
-            <|> externalLink
-            <|> exampleInline
-            <|> expr
-            <|> boldOrItalicOrStrikethrough closing
-            <|> verbatim
-            <|> atDoc
-            <|> wordy closing
-
-    leaf = leafy mzero
-
-    atDoc = src <|> evalInline <|> signature <|> signatureInline
-      where
-        comma = lit "," <* CP.space
-        src =
-          src' Doc.Source "@source"
-            <|> src' Doc.FoldedSource "@foldedSource"
-        srcElem =
-          Doc.SourceElement
-            <$> (typeLink <|> termLink)
-            <*> ( fmap (fromMaybe []) . P.optional $
-                    (lit "@") *> (CP.space *> annotations)
-                )
-          where
-            annotation = fmap Left (tokenP identifierP) <|> fmap Right expr <* CP.space
-            annotations =
-              P.some (Doc.EmbedAnnotation <$> annotation)
-        src' name atName = fmap name $ do
-          _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
-          s <- sepBy1' srcElem comma
-          _ <- lit "}"
-          pure s
-        signature = fmap Doc.Signature $ do
-          _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
-          s <- sepBy1' signatureLink comma
-          _ <- lit "}"
-          pure s
-        signatureInline = fmap Doc.SignatureInline $ do
-          _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
-          s <- signatureLink
-          _ <- lit "}"
-          pure s
-        evalInline = fmap Doc.EvalInline $ do
-          _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
-          let inlineEvalClose = [] <$ lit "}"
-          s <- lexemes' inlineEvalClose
-          pure s
-
-    typeLink = fmap Doc.EmbedTypeLink $ do
-      _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-      tokenP identifierP <* CP.space
-
-    termLink =
-      fmap Doc.EmbedTermLink $
-        tokenP identifierP <* CP.space
-
-    signatureLink =
-      fmap Doc.EmbedSignatureLink $
-        tokenP identifierP <* CP.space
-
-    groupy closing p = do
-      Token p _ _ <- tokenP p
-      after <- P.optional . P.try $ leafy closing
-      pure $ case after of
-        Nothing -> p
-        Just after ->
-          Doc.Group
-            . Doc.Join
-            $ p
-              :| pure after
-
-    verbatim =
-      P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
-        Token originalText start stop <- tokenP do
-          -- a single backtick followed by a non-backtick is treated as monospaced
-          let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
-          -- also two or more ' followed by that number of closing '
-          quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
-          P.someTill P.anySingle (lit quotes)
-        let isMultiLine = line start /= line stop
-        if isMultiLine
-          then do
-            let trimmed = (trimAroundDelimiters originalText)
-            let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
-            -- If it's a multi-line verbatim block we trim any whitespace representing
-            -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-            pure . Doc.Verbatim $
-              Doc.Word $
-                Token txt start stop
-          else
-            pure . Doc.Code $
-              Doc.Word $
-                Token originalText start stop
-
-    exampleInline =
-      P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
-        fmap Doc.Example $ do
-          n <- P.try $ do
-            _ <- lit "`"
-            length <$> P.takeWhile1P (Just "backticks") (== '`')
-          let end :: P [Token Lexeme] = [] <$ lit (replicate (n + 1) '`')
-          ex <- CP.space *> lexemes' end
-          pure ex
-
-    link =
-      P.label "link (examples: {type List}, {Nat.+})" $
-        fmap Doc.Link $
-          P.try $
-            lit "{" *> (typeLink <|> termLink) <* lit "}"
-
-    expr =
-      fmap Doc.Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
-        lit "{{" *> lexemes' ([] <$ lit "}}")
-
-    nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
-    nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
-
-    -- Allows whitespace or a newline, but not more than two newlines in a row.
-    whitespaceWithoutParagraphBreak :: P ()
-    whitespaceWithoutParagraphBreak = void do
-      void nonNewlineSpaces
-      optional newline >>= \case
-        Just _ -> void nonNewlineSpaces
-        Nothing -> pure ()
-
-    fencedBlock =
-      P.label "block eval (syntax: a fenced code block)" $
-        evalUnison <|> exampleBlock <|> other
-      where
-        evalUnison = fmap (wrap' . Doc.Eval) $ do
-          -- commit after seeing that ``` is on its own line
-          fence <- P.try $ do
-            fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-            b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
-            fence <$ guard b
-          CP.space
-            *> lexemes' ([] <$ lit fence)
-
-        exampleBlock = fmap (wrap' . Doc.ExampleBlock) $ do
-          void $ lit "@typecheck" <* CP.space
-          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-          lexemes' $ [] <$ lit fence
-
-        uncolumn column tabWidth s =
-          let skip col r | col < 1 = r
-              skip col s@('\t' : _) | col < tabWidth = s
-              skip col ('\t' : r) = skip (col - tabWidth) r
-              skip col (c : r)
-                | isSpace c && (not $ isControl c) =
-                    skip (col - 1) r
-              skip _ s = s
-           in List.intercalate "\n" $ skip column <$> lines s
-
-        other = fmap (uncurry $ wrapSimple2 Doc.CodeBlock) $ do
-          column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
-          let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
-          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-          name <-
-            P.takeWhileP Nothing nonNewlineSpace
-              *> tokenP (P.takeWhile1P Nothing (not . isSpace))
-              <* P.takeWhileP Nothing nonNewlineSpace
-          _ <- void CP.eol
-          verbatim <-
-            tokenP $
-              uncolumn column tabWidth . trimAroundDelimiters
-                <$> P.someTill P.anySingle ([] <$ lit fence)
-          pure (name, verbatim)
-
-    boldOrItalicOrStrikethrough closing = do
-      let start =
-            some (P.satisfy (== '*'))
-              <|> some (P.satisfy (== '_'))
-              <|> some
-                (P.satisfy (== '~'))
-          name s =
-            if take 1 s == "~"
-              then Doc.Strikethrough
-              else if take 1 s == "*" then Doc.Bold else Doc.Italic
-      end <- P.try $ do
-        end <- start
-        P.lookAhead (P.satisfy (not . isSpace))
-        pure end
-      name end . wrap' . Doc.Paragraph
-        <$> someTill'
-          (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
-          (lit end)
-
-    externalLink =
-      P.label "hyperlink (example: [link name](https://destination.com))" $
-        fmap (uncurry Doc.NamedLink) $ do
-          _ <- lit "["
-          p <- leafies (void $ char ']')
-          _ <- lit "]"
-          _ <- lit "("
-          target <-
-            fmap (Doc.Group . Doc.Join) $
-              fmap pure link <|> some' (expr <|> wordy (char ')'))
-          _ <- lit ")"
-          pure (p, target)
-
-    -- newline = P.optional (lit "\r") *> lit "\n"
-
-    sp = P.try $ do
-      spaces <- P.takeWhile1P (Just "space") isSpace
-      close <- P.optional (P.lookAhead (lit "}}"))
-      case close of
-        Nothing -> guard $ ok spaces
-        Just _ -> pure ()
-      pure spaces
-      where
-        ok s = length [() | '\n' <- s] < 2
-
-    spaced p = some' (p <* P.optional sp)
-    leafies close = wrap' . Doc.Paragraph <$> spaced (leafy close)
-
-    list = bulletedList <|> numberedList
-
-    bulletedList = wrap' . Doc.BulletedList <$> sepBy1' bullet listSep
-    numberedList = wrap' . Doc.NumberedList <$> sepBy1' numberedItem listSep
-
-    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
-
-    bulletedStart = P.try $ do
-      r <- listItemStart' $ [] <$ P.satisfy bulletChar
-      P.lookAhead (P.satisfy isSpace)
-      pure r
-      where
-        bulletChar ch = ch == '*' || ch == '-' || ch == '+'
-
-    listItemStart' :: P a -> P (Int, a)
-    listItemStart' gutter = P.try $ do
-      nonNewlineSpaces
-      col <- column <$> posP
-      parentCol <- S.gets parentListColumn
-      guard (col > parentCol)
-      (col,) <$> gutter
-
-    numberedStart =
-      listItemStart' $ P.try (tokenP $ LP.decimal <* lit ".")
-
-    listItemParagraph = fmap (wrap' . Doc.Paragraph) $ do
-      col <- column <$> posP
-      some' (leaf <* sep col)
-      where
-        -- Trickiness here to support hard line breaks inside of
-        -- a bulleted list, so for instance this parses as expected:
-        --
-        --   * uno dos
-        --     tres quatro
-        --   * alice bob
-        --     carol dave eve
-        sep col = do
-          _ <- nonNewlineSpaces
-          _ <-
-            P.optional . P.try $
-              newline
-                *> nonNewlineSpaces
-                *> do
-                  col2 <- column <$> posP
-                  guard $ col2 >= col
-                  (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
-          pure ()
-
-    numberedItem = P.label msg $ do
-      (col, s) <- numberedStart
-      (s,)
-        <$> ( fmap (uncurry Doc.Column) $ do
-                p <- nonNewlineSpaces *> listItemParagraph
-                subList <-
-                  local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
-                pure (p, subList)
-            )
-      where
-        msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
-
-    bullet = fmap (uncurry Doc.Column) . P.label "bullet (examples: * item1, - item2)" $ do
-      (col, _) <- bulletedStart
-      p <- nonNewlineSpaces *> listItemParagraph
-      subList <-
-        local
-          (\e -> e {parentListColumn = col})
-          (P.optional $ listSep *> list)
-      pure (p, subList)
-
-    newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
-
-    -- ## Section title
-    --
-    -- A paragraph under this section.
-    -- Part of the same paragraph. Blanklines separate paragraphs.
-    --
-    -- ### A subsection title
-    --
-    -- A paragraph under this subsection.
-
-    -- # A section title (not a subsection)
-    section :: P DocTree
-    section = fmap (wrap' . uncurry Doc.Section) $ do
-      ns <- S.gets parentSections
-      hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
-      title <- paragraph <* CP.space
-      let m = length hashes + head ns
-      body <-
-        local (\env -> env {parentSections = (m : (tail ns))}) $
-          P.many (sectionElem <* CP.space)
-      pure $ (title, body)
-
-    wrap' :: Doc.Top [Token Lexeme] DocTree -> DocTree
-    wrap' doc = ann doc :< doc
-
-    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> Doc.Top [Token Lexeme] DocTree) -> a -> b -> DocTree
-    wrapSimple2 fn a b = ann a <> ann b :< fn a b
-
-lexemes' :: P [Token Lexeme] -> P [Token Lexeme]
-lexemes' eof =
-  -- NB: `postLex` requires the token stream to start with an `Open`, otherwise it can’t create a `T`, so this adds one,
-  --     runs `postLex`, then removes it.
-  fmap (tail . postLex . (Token (Open "fake") mempty mempty :)) $
-    local (\env -> env {inLayout = True, opening = Just "DUMMY"}) do
-      p <- lexemes eof
-      -- deals with a final "unclosed" block at the end of `p`)
-      unclosed <- takeWhile (("DUMMY" /=) . fst) . layout <$> S.get
-      let pos = end $ last p
-      pure $ p <> replicate (length unclosed) (Token Close pos pos)
-
--- | Consumes an entire Unison “module”.
-lexemes :: P [Token Lexeme] -> P [Token Lexeme]
-lexemes eof =
-  P.optional space >> do
-    hd <- join <$> P.manyTill toks (P.lookAhead eof)
-    tl <- eof
-    pure $ hd <> tl
-  where
-    toks :: P [Token Lexeme]
-    toks =
-      doc2
-        <|> doc
-        <|> token numeric
-        <|> token character
-        <|> reserved
-        <|> token blank
-        <|> token identifierLexemeP
-        <|> (asum . map token) [semi, textual, hash]
-
-    doc :: P [Token Lexeme]
-    doc = open <+> (CP.space *> fmap fixup body) <+> (close <* space)
-      where
-        open = token'' (\t _ _ -> t) $ tok (Open <$> lit "[:")
-        close = tok (Close <$ lit ":]")
-        at = lit "@"
-        -- this removes some trailing whitespace from final textual segment
-        fixup [] = []
-        fixup (Token (Textual (reverse -> txt)) start stop : []) =
-          [Token (Textual txt') start stop]
-          where
-            txt' = reverse (dropWhile (\c -> isSpace c && not (c == '\n')) txt)
-        fixup (h : t) = h : fixup t
-
-        body :: P [Token Lexeme]
-        body = txt <+> (atk <|> pure [])
-          where
-            ch = (":]" <$ lit "\\:]") <|> ("@" <$ lit "\\@") <|> (pure <$> P.anySingle)
-            txt = tok (Textual . join <$> P.manyTill ch (P.lookAhead sep))
-            sep = void at <|> void close
-            ref = at *> (tok identifierLexemeP <|> docTyp)
-            atk = (ref <|> docTyp) <+> body
-            docTyp = do
-              _ <- lit "["
-              typ <- tok (P.manyTill P.anySingle (P.lookAhead (lit "]")))
-              _ <- lit "]" *> CP.space
-              t <- tok identifierLexemeP
-              pure $ (fmap Reserved <$> typ) <> t
-
-    blank =
-      separated wordySep do
-        _ <- char '_'
-        seg <- P.optional wordyIdSegP
-        pure (Blank (maybe "" (Text.unpack . NameSegment.toUnescapedText) seg))
-
-    semi = char ';' $> Semi False
-    textual = Textual <$> quoted
-    quoted = quotedRaw <|> quotedSingleLine
-    quotedRaw = do
-      _ <- lit "\"\"\""
-      n <- many (char '"')
-      _ <- optional (char '\n') -- initial newline is skipped
-      s <- P.manyTill P.anySingle (lit (replicate (length n + 3) '"'))
-      col0 <- column <$> posP
-      let col = col0 - (length n) - 3 -- this gets us first col of closing quotes
-      let leading = replicate (max 0 (col - 1)) ' '
-      -- a last line that's equal to `leading` is ignored, since leading
-      -- spaces up to `col` are not considered part of the string
-      let tweak l = case reverse l of
-            last : rest
-              | col > 1 && last == leading -> reverse rest
-              | otherwise -> l
-            [] -> []
-      pure $ case tweak (lines s) of
-        [] -> s
-        ls
-          | all (\l -> List.isPrefixOf leading l || all isSpace l) ls -> List.intercalate "\n" (drop (length leading) <$> ls)
-          | otherwise -> s
-    quotedSingleLine = char '"' *> P.manyTill (LP.charLiteral <|> sp) (char '"')
-      where
-        sp = lit "\\s" $> ' '
-    character = Character <$> (char '?' *> (spEsc <|> LP.charLiteral))
-      where
-        spEsc = P.try (char '\\' *> char 's' $> ' ')
-
-    numeric = bytes <|> otherbase <|> float <|> intOrNat
-      where
-        intOrNat = P.try $ num <$> sign <*> LP.decimal
-        float = do
-          _ <- P.try (P.lookAhead (sign >> (LP.decimal :: P Int) >> (char '.' <|> char 'e' <|> char 'E'))) -- commit after this
-          start <- posP
-          sign <- fromMaybe "" <$> sign
-          base <- P.takeWhile1P (Just "base") isDigit
-          decimals <-
-            P.optional $
-              let missingFractional = err start (MissingFractional $ base <> ".")
-               in liftA2 (<>) (lit ".") (P.takeWhile1P (Just "decimals") isDigit <|> missingFractional)
-          exp <- P.optional $ do
-            e <- map toLower <$> (lit "e" <|> lit "E")
-            sign <- fromMaybe "" <$> optional (lit "+" <|> lit "-")
-            let missingExp = err start (MissingExponent $ base <> fromMaybe "" decimals <> e <> sign)
-            exp <- P.takeWhile1P (Just "exponent") isDigit <|> missingExp
-            pure $ e <> sign <> exp
-          pure $ Numeric (sign <> base <> fromMaybe "" decimals <> fromMaybe "" exp)
-
-        bytes = do
-          start <- posP
-          _ <- lit "0xs"
-          s <- map toLower <$> P.takeWhileP (Just "hexidecimal character") isAlphaNum
-          case Bytes.fromBase16 $ Bytes.fromWord8s (fromIntegral . ord <$> s) of
-            Left _ -> err start (InvalidBytesLiteral $ "0xs" <> s)
-            Right bs -> pure (Bytes bs)
-        otherbase = octal <|> hex
-        octal = do
-          start <- posP
-          commitAfter2 sign (lit "0o") $ \sign _ ->
-            fmap (num sign) LP.octal <|> err start InvalidOctalLiteral
-        hex = do
-          start <- posP
-          commitAfter2 sign (lit "0x") $ \sign _ ->
-            fmap (num sign) LP.hexadecimal <|> err start InvalidHexLiteral
-
-        num :: Maybe String -> Integer -> Lexeme
-        num sign n = Numeric (fromMaybe "" sign <> show n)
-        sign = P.optional (lit "+" <|> lit "-")
-
-    hash = Hash <$> P.try shortHashP
-
-    reserved :: P [Token Lexeme]
-    reserved =
-      token' (\ts _ _ -> ts) $
-        braces
-          <|> parens
-          <|> brackets
-          <|> commaSeparator
-          <|> delim
-          <|> delayOrForce
-          <|> keywords
-          <|> layoutKeywords
-      where
-        keywords =
-          -- yes "wordy" - just like a wordy keyword like "true", the literal "." (as in the dot in
-          -- "forall a. a -> a") is considered the keyword "." so long as it is either followed by EOF, a space, or some
-          -- non-wordy character (because ".foo" is a single identifier lexeme)
-          wordyKw "."
-            <|> symbolyKw ":"
-            <|> openKw "@rewrite"
-            <|> symbolyKw "@"
-            <|> symbolyKw "||"
-            <|> symbolyKw "|"
-            <|> symbolyKw "&&"
-            <|> wordyKw "true"
-            <|> wordyKw "false"
-            <|> wordyKw "use"
-            <|> wordyKw "forall"
-            <|> wordyKw "∀"
-            <|> wordyKw "termLink"
-            <|> wordyKw "typeLink"
-
-        wordyKw s = separated wordySep (kw s)
-        symbolyKw s = separated (not . symbolyIdChar) (kw s)
-
-        kw :: String -> P [Token Lexeme]
-        kw s = tokenP (lit s) <&> \token -> [Reserved <$> token]
-
-        layoutKeywords :: P [Token Lexeme]
-        layoutKeywords =
-          ifElse
-            <|> withKw
-            <|> openKw "match"
-            <|> openKw "handle"
-            <|> typ
-            <|> arr
-            <|> rewriteArr
-            <|> eq
-            <|> openKw "cases"
-            <|> openKw "where"
-            <|> openKw "let"
-            <|> openKw "do"
-          where
-            ifElse =
-              openKw "if"
-                <|> closeKw' (Just "then") ["if"] (lit "then")
-                <|> closeKw' (Just "else") ["then"] (lit "else")
-            modKw = typeModifiersAlt (openKw1 wordySep . Text.unpack)
-            typeOrAbilityKw = typeOrAbilityAlt (openTypeKw1 . Text.unpack)
-            typ = modKw <|> typeOrAbilityKw
-
-            withKw = do
-              [Token _ pos1 pos2] <- wordyKw "with"
-              env <- S.get
-              let l = layout env
-              case findClose ["handle", "match"] l of
-                Nothing -> err pos1 (CloseWithoutMatchingOpen msgOpen "'with'")
-                  where
-                    msgOpen = "'handle' or 'match'"
-                Just (withBlock, n) -> do
-                  let b = withBlock <> "-with"
-                  S.put (env {layout = drop n l, opening = Just b})
-                  let opens = [Token (Open "with") pos1 pos2]
-                  pure $ replicate n (Token Close pos1 pos2) ++ opens
-
-            -- In `structural/unique type` and `structural/unique ability`,
-            -- only the `structural` or `unique` opens a layout block,
-            -- and `ability` and `type` are just keywords.
-            openTypeKw1 t = do
-              b <- S.gets (topBlockName . layout)
-              case b of
-                Just mod | Set.member (Text.pack mod) typeModifiers -> wordyKw t
-                _ -> openKw1 wordySep t
-
-            -- layout keyword which bumps the layout column by 1, rather than looking ahead
-            -- to the next token to determine the layout column
-            openKw1 :: (Char -> Bool) -> String -> P [Token Lexeme]
-            openKw1 sep kw = do
-              Token kw pos0 pos1 <- tokenP $ separated sep (lit kw)
-              S.modify (\env -> env {layout = (kw, column $ inc pos0) : layout env})
-              pure [Token (Open kw) pos0 pos1]
-
-            eq = do
-              [Token _ start end] <- symbolyKw "="
-              env <- S.get
-              case topBlockName (layout env) of
-                -- '=' does not open a layout block if within a type declaration
-                Just t | t == "type" || Set.member (Text.pack t) typeModifiers -> pure [Token (Reserved "=") start end]
-                Just _ -> S.put (env {opening = Just "="}) >> pure [Token (Open "=") start end]
-                _ -> err start LayoutError
-
-            rewriteArr = do
-              [Token _ start end] <- symbolyKw "==>"
-              env <- S.get
-              S.put (env {opening = Just "==>"}) >> pure [Token (Open "==>") start end]
-
-            arr = do
-              [Token _ start end] <- symbolyKw "->"
-              env <- S.get
-              -- -> introduces a layout block if we're inside a `match with` or `cases`
-              case topBlockName (layout env) of
-                Just match | match `elem` matchWithBlocks -> do
-                  S.put (env {opening = Just "->"})
-                  pure [Token (Open "->") start end]
-                _ -> pure [Token (Reserved "->") start end]
-
-        -- a bit of lookahead here to reserve }} for closing a documentation block
-        braces = open "{" <|> close ["{"] p
-          where
-            p = do
-              l <- lit "}"
-              -- if we're within an existing {{ }} block, inLayout will be false
-              -- so we can actually allow }} to appear in normal code
-              inLayout <- S.gets inLayout
-              when (not inLayout) $ void $ P.lookAhead (P.satisfy (/= '}'))
-              pure l
-        matchWithBlocks = ["match-with", "cases"]
-        parens = open "(" <|> close ["("] (lit ")")
-        brackets = open "[" <|> close ["["] (lit "]")
-        -- `allowCommaToClose` determines if a comma should close inner blocks.
-        -- Currently there is a set of blocks where `,` is not treated specially
-        -- and it just emits a Reserved ",". There are currently only three:
-        -- `cases`, `match-with`, and `{`
-        allowCommaToClose match = not $ match `elem` ("{" : matchWithBlocks)
-        commaSeparator = do
-          env <- S.get
-          case topBlockName (layout env) of
-            Just match
-              | allowCommaToClose match ->
-                  blockDelimiter ["[", "("] (lit ",")
-            _ -> fail "this comma is a pattern separator"
-
-        delim = P.try $ do
-          ch <- P.satisfy (\ch -> ch /= ';' && Set.member ch delimiters)
-          pos <- posP
-          pure [Token (Reserved [ch]) pos (inc pos)]
-
-        delayOrForce = separated ok $ do
-          token <- tokenP $ P.satisfy isDelayOrForce
-          pure [token <&> \op -> Reserved [op]]
-          where
-            ok c = isDelayOrForce c || isSpace c || isAlphaNum c || Set.member c delimiters || c == '\"'
-
--- | If it's a multi-line verbatim block we trim any whitespace representing
--- indentation from the pretty-printer.
---
--- E.g.
---
--- @@
--- {{
---   # Heading
---     '''
---     code
---       indented
---     '''
--- }}
--- @@
---
--- Should lex to the text literal "code\n  indented".
---
--- If there's text in the literal that has LESS trailing whitespace than the
--- opening delimiters, we don't trim it at all. E.g.
---
--- @@
--- {{
---   # Heading
---     '''
---   code
---     '''
--- }}
--- @@
---
---  Is parsed as "  code".
---
---  Trim the expected amount of whitespace from a text literal:
---  >>> trimIndentFromVerbatimBlock 2 "  code\n    indented"
--- "code\n  indented"
---
--- If the text literal has less leading whitespace than the opening delimiters,
--- leave it as-is
--- >>> trimIndentFromVerbatimBlock 2 "code\n  indented"
--- "code\n  indented"
-trimIndentFromVerbatimBlock :: Int -> String -> String
-trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
-  List.intercalate "\n" <$> for (lines txt) \line -> do
-    -- If any 'stripPrefix' fails, we fail and return the unaltered text
-    case List.stripPrefix (replicate leadingSpaces ' ') line of
-      Just stripped -> Just stripped
-      Nothing ->
-        -- If it was a line with all white-space, just use an empty line,
-        -- this can happen easily in editors which trim trailing whitespace.
-        if all isSpace line
-          then Just ""
-          else Nothing
-
--- Trim leading/trailing whitespace from around delimiters, e.g.
---
--- {{
---   '''___ <- whitespace here including newline
---   text block
--- 👇 or here
--- __'''
--- }}
--- >>> trimAroundDelimiters "  \n  text block \n  "
--- "  text block "
---
--- Should leave leading and trailing line untouched if it contains non-whitespace, e.g.:
---
--- '''  leading whitespace
---   text block
--- trailing whitespace:  '''
--- >>> trimAroundDelimiters "  leading whitespace\n  text block \ntrailing whitespace:  "
--- "  leading whitespace\n  text block \ntrailing whitespace:  "
---
--- Should keep trailing newline if it's the only thing on the line, e.g.:
---
--- '''
--- newline below
---
--- '''
--- >>> trimAroundDelimiters "\nnewline below\n\n"
--- "newline below\n\n"
-trimAroundDelimiters :: String -> String
-trimAroundDelimiters txt =
-  txt
-    & ( \s ->
-          List.breakOn "\n" s
-            & \case
-              (prefix, suffix)
-                | all isSpace prefix -> drop 1 suffix
-                | otherwise -> prefix <> suffix
-      )
-    & ( \s ->
-          List.breakOnEnd "\n" s
-            & \case
-              (_prefix, "") -> s
-              (prefix, suffix)
-                | all isSpace suffix -> dropTrailingNewline prefix
-                | otherwise -> prefix <> suffix
-      )
-  where
-    dropTrailingNewline = \case
-      [] -> []
-      (x : xs) -> NonEmpty.init (x NonEmpty.:| xs)
-
 separated :: (Char -> Bool) -> P a -> P a
 separated ok p = P.try $ p <* P.lookAhead (void (P.satisfy ok) <|> P.eof)
 
-open :: String -> P [Token Lexeme]
-open b = do
-  token <- tokenP $ lit b
-  env <- S.get
-  S.put (env {opening = Just b})
-  pure [Open b <$ token]
-
-openKw :: String -> P [Token Lexeme]
-openKw s = separated wordySep $ do
-  token <- tokenP $ lit s
-  env <- S.get
-  S.put (env {opening = Just s})
-  pure [Open <$> token]
-
 wordySep :: Char -> Bool
 wordySep c = isSpace c || not (wordyIdChar c)
 
-tok :: P a -> P [Token a]
-tok p = do
-  token <- tokenP p
-  pure [token]
-
 -- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
 -- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
 --
@@ -1258,23 +201,6 @@ identifierP = do
       NameSegment.ReservedOperator s -> ReservedSymbolyId (Text.unpack s)
       NameSegment.ReservedWord s -> ReservedWordyId (Text.unpack s)
 
--- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
--- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
---
--- Examples:
---
---   foo
---   .foo.++.doc
---   `.`.`..`     (This is a two-segment identifier without a leading dot: "." then "..")
-identifierLexemeP :: P Lexeme
-identifierLexemeP = identifierLexeme <$> identifierP
-
-identifierLexeme :: HQ'.HashQualified Name -> Lexeme
-identifierLexeme name =
-  if Name.isSymboly (HQ'.toName name)
-    then SymbolyId name
-    else WordyId name
-
 wordyIdSegP :: P NameSegment
 wordyIdSegP =
   PI.withParsecT (fmap (ReservedWordyId . Text.unpack)) NameSegment.wordyP
@@ -1283,59 +209,11 @@ shortHashP :: P ShortHash
 shortHashP =
   PI.withParsecT (fmap (InvalidShortHash . Text.unpack)) ShortHash.shortHashP
 
-blockDelimiter :: [String] -> P String -> P [Token Lexeme]
-blockDelimiter open closeP = do
-  Token close pos1 pos2 <- tokenP closeP
-  env <- S.get
-  case findClose open (layout env) of
-    Nothing -> err pos1 (UnexpectedDelimiter (quote close))
-      where
-        quote s = "'" <> s <> "'"
-    Just (_, n) -> do
-      S.put (env {layout = drop (n - 1) (layout env)})
-      let delims = [Token (Reserved close) pos1 pos2]
-      pure $ replicate (n - 1) (Token Close pos1 pos2) ++ delims
-
-close :: [String] -> P String -> P [Token Lexeme]
-close = close' Nothing
-
-closeKw' :: Maybe String -> [String] -> P String -> P [Token Lexeme]
-closeKw' reopenBlockname open closeP = close' reopenBlockname open (separated wordySep closeP)
-
-close' :: Maybe String -> [String] -> P String -> P [Token Lexeme]
-close' reopenBlockname open closeP = do
-  Token close pos1 pos2 <- tokenP closeP
-  env <- S.get
-  case findClose open (layout env) of
-    Nothing -> err pos1 (CloseWithoutMatchingOpen msgOpen (quote close))
-      where
-        msgOpen = List.intercalate " or " (quote <$> open)
-        quote s = "'" <> s <> "'"
-    Just (_, n) -> do
-      S.put (env {layout = drop n (layout env), opening = reopenBlockname})
-      let opens = maybe [] (const $ [Token (Open close) pos1 pos2]) reopenBlockname
-      pure $ replicate n (Token Close pos1 pos2) ++ opens
-
-findClose :: [String] -> Layout -> Maybe (String, Int)
-findClose _ [] = Nothing
-findClose s ((h, _) : tl) = if h `elem` s then Just (h, 1) else fmap (1 +) <$> findClose s tl
-
-notLayout :: Token Lexeme -> Bool
-notLayout t = case payload t of
-  Close -> False
-  Semi _ -> False
-  Open _ -> False
-  _ -> True
-
 -- `True` if the tokens are adjacent, with no space separating the two
 touches :: Token a -> Token b -> Bool
 touches (end -> t) (start -> t2) =
   line t == line t2 && column t == column t2
 
-top :: Layout -> Column
-top [] = 1
-top ((_, h) : _) = h
-
 -- todo: make Layout a NonEmpty
 topBlockName :: Layout -> Maybe BlockName
 topBlockName [] = Nothing
@@ -1344,122 +222,6 @@ topBlockName ((name, _) : _) = Just name
 pop :: [a] -> [a]
 pop = drop 1
 
-topLeftCorner :: Pos
-topLeftCorner = Pos 1 1
-
-data BlockTree a
-  = Block
-      -- | The token that opens the block
-      a
-      -- | “Stanzas” of nested tokens
-      [[BlockTree a]]
-      -- | The closing token, if any
-      (Maybe a)
-  | Leaf a
-  deriving (Functor, Foldable, Traversable)
-
-headToken :: BlockTree a -> a
-headToken (Block a _ _) = a
-headToken (Leaf a) = a
-
-instance (Show a) => Show (BlockTree a) where
-  show (Leaf a) = show a
-  show (Block open mid close) =
-    show open
-      ++ "\n"
-      ++ indent "  " (intercalateMap "\n" (intercalateMap " " show) mid)
-      ++ "\n"
-      ++ maybe "" show close
-    where
-      indent by s = by ++ (s >>= go by)
-      go by '\n' = '\n' : by
-      go _ c = [c]
-
-reorderTree :: ([[BlockTree a]] -> [[BlockTree a]]) -> BlockTree a -> BlockTree a
-reorderTree f (Block open mid close) = Block open (f (fmap (reorderTree f) <$> mid)) close
-reorderTree _ l = l
-
-tree :: [Token Lexeme] -> BlockTree (Token Lexeme)
-tree toks = one toks const
-  where
-    one (open@(payload -> Open _) : ts) k = many (Block open . stanzas) [] ts k
-    one (t : ts) k = k (Leaf t) ts
-    one [] k = k lastErr []
-      where
-        lastErr = Leaf case drop (length toks - 1) toks of
-          [] -> Token (Err LayoutError) topLeftCorner topLeftCorner
-          (t : _) -> t {payload = Err LayoutError}
-
-    many open acc [] k = k (open (reverse acc) Nothing) []
-    many open acc (t@(payload -> Close) : ts) k = k (open (reverse acc) $ pure t) ts
-    many open acc ts k = one ts $ \t ts -> many open (t : acc) ts k
-
-stanzas :: [BlockTree (Token Lexeme)] -> [[BlockTree (Token Lexeme)]]
-stanzas =
-  toList
-    . foldr
-      ( \tok (curr :| stanzas) -> case tok of
-          Leaf (Token (Semi _) _ _) -> [tok] :| curr : stanzas
-          _ -> (tok : curr) :| stanzas
-      )
-      ([] :| [])
-
--- Moves type and ability declarations to the front of the token stream
--- and move `use` statements to the front of each block
-reorder :: [[BlockTree (Token Lexeme)]] -> [[BlockTree (Token Lexeme)]]
-reorder = foldr fixup [] . sortWith f
-  where
-    f [] = 3 :: Int
-    f (t0 : _) = case payload $ headToken t0 of
-      Open mod | Set.member (Text.pack mod) typeModifiers -> 1
-      Open typOrA | Set.member (Text.pack typOrA) typeOrAbility -> 1
-      Reserved "use" -> 0
-      _ -> 3 :: Int
-    -- after reordering can end up with trailing semicolon at the end of
-    -- a block, which we remove with this pass
-    fixup stanza [] = case Lens.unsnoc stanza of
-      Nothing -> []
-      -- remove any trailing `Semi` from the last non-empty stanza
-      Just (init, Leaf (Token (Semi _) _ _)) -> [init]
-      -- don’t touch other stanzas
-      Just (_, _) -> [stanza]
-    fixup stanza tail = stanza : tail
-
--- | This turns the lexeme stream into a tree, reordering some lexeme subsequences.
-preParse :: [Token Lexeme] -> BlockTree (Token Lexeme)
-preParse = reorderTree reorder . tree
-
--- | A few transformations that happen between lexing and parsing.
---
---   All of these things should move out of the lexer, and be applied in the parse.
-postLex :: [Token Lexeme] -> [Token Lexeme]
-postLex = toList . preParse . foldr tweak []
-
-isDelayOrForce :: Char -> Bool
-isDelayOrForce op = op == '\'' || op == '!'
-
--- Mapping between characters and their escape codes. Use parse/showEscapeChar to convert.
-escapeChars :: [(Char, Char)]
-escapeChars =
-  [ ('0', '\0'),
-    ('a', '\a'),
-    ('b', '\b'),
-    ('f', '\f'),
-    ('n', '\n'),
-    ('r', '\r'),
-    ('t', '\t'),
-    ('v', '\v'),
-    ('s', ' '),
-    ('\'', '\''),
-    ('"', '"'),
-    ('\\', '\\')
-  ]
-
--- Inverse of parseEscapeChar; map a character to its escaped version:
-showEscapeChar :: Char -> Maybe Char
-showEscapeChar c =
-  Map.lookup c (Map.fromList [(x, y) | (y, x) <- escapeChars])
-
 typeOrAbilityAlt :: (Alternative f) => (Text -> f a) -> f a
 typeOrAbilityAlt f =
   asum $ map f (toList typeOrAbility)
@@ -1471,28 +233,6 @@ typeModifiersAlt f =
 inc :: Pos -> Pos
 inc (Pos line col) = Pos line (col + 1)
 
-debugFilePreParse :: FilePath -> IO ()
-debugFilePreParse file = putStrLn . debugPreParse . preParse . lexer file . Text.unpack =<< readUtf8 file
-
-debugPreParse :: BlockTree (Token Lexeme) -> String
-debugPreParse (Leaf (Token (Err (UnexpectedTokens msg)) start end)) =
-  (if start == end then msg1 else msg2) <> ":\n" <> msg
-  where
-    msg1 = "Error on line " <> show (line start) <> ", column " <> show (column start)
-    msg2 =
-      "Error on line "
-        <> show (line start)
-        <> ", column "
-        <> show (column start)
-        <> " - line "
-        <> show (line end)
-        <> ", column "
-        <> show (column end)
-debugPreParse ts = show $ payload <$> ts
-
-debugPreParse' :: String -> String
-debugPreParse' = debugPreParse . preParse . lexer "debugPreParse"
-
 instance EP.ShowErrorComponent (Token Err) where
   showErrorComponent (Token err _ _) = go err
     where
@@ -1504,35 +244,3 @@ instance EP.ShowErrorComponent (Token Err) where
         TextLiteralMissingClosingQuote s -> "This text literal missing a closing quote: " <> excerpt s
         e -> show e
       excerpt s = if length s < 15 then s else take 15 s <> "..."
-
-instance P.VisualStream [Token Lexeme] where
-  showTokens _ xs =
-    join . Nel.toList . S.evalState (traverse go xs) . end $ Nel.head xs
-    where
-      go :: Token Lexeme -> S.State Pos String
-      go tok = do
-        prev <- S.get
-        S.put $ end tok
-        pure $ pad prev (start tok) ++ pretty (payload tok)
-      pretty (Open s) = s
-      pretty (Reserved w) = w
-      pretty (Textual t) = '"' : t ++ ['"']
-      pretty (Character c) =
-        case showEscapeChar c of
-          Just c -> "?\\" ++ [c]
-          Nothing -> '?' : [c]
-      pretty (WordyId n) = Text.unpack (HQ'.toText n)
-      pretty (SymbolyId n) = Text.unpack (HQ'.toText n)
-      pretty (Blank s) = "_" ++ s
-      pretty (Numeric n) = n
-      pretty (Hash sh) = show sh
-      pretty (Err e) = show e
-      pretty (Bytes bs) = "0xs" <> show bs
-      pretty Close = "<outdent>"
-      pretty (Semi True) = "<virtual semicolon>"
-      pretty (Semi False) = ";"
-      pretty (Doc d) = show d
-      pad (Pos line1 col1) (Pos line2 col2) =
-        if line1 == line2
-          then replicate (col2 - col1) ' '
-          else replicate (line2 - line1) '\n' ++ replicate col2 ' '
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
new file mode 100644
index 0000000000..dcaf9ca6d3
--- /dev/null
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -0,0 +1,910 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Unison.Syntax.Lexer.Unison
+  ( Token (..),
+    Line,
+    Column,
+    Err (..),
+    Pos (..),
+    Lexeme (..),
+    lexer,
+    preParse,
+    escapeChars,
+    debugFilePreParse,
+    debugPreParse,
+    debugPreParse',
+    showEscapeChar,
+    touches,
+
+    -- * Character classifiers
+    wordyIdChar,
+    wordyIdStartChar,
+    symbolyIdChar,
+
+    -- * Error formatting
+    formatTrivialError,
+    displayLexeme,
+  )
+where
+
+import Control.Lens qualified as Lens
+import Control.Monad.State qualified as S
+import Data.Char (isAlphaNum, isDigit, isSpace, ord, toLower)
+import Data.Foldable qualified as Foldable
+import Data.List qualified as List
+import Data.List.NonEmpty (NonEmpty ((:|)))
+import Data.List.NonEmpty qualified as Nel
+import Data.Map.Strict qualified as Map
+import Data.Set qualified as Set
+import Data.Text qualified as Text
+import GHC.Exts (sortWith)
+import Text.Megaparsec qualified as P
+import Text.Megaparsec.Char (char)
+import Text.Megaparsec.Char qualified as CP
+import Text.Megaparsec.Char.Lexer qualified as LP
+import Text.Megaparsec.Error qualified as EP
+import Unison.HashQualifiedPrime qualified as HQ'
+import Unison.Name (Name)
+import Unison.Name qualified as Name
+import Unison.NameSegment qualified as NameSegment (docSegment)
+import Unison.NameSegment.Internal qualified as NameSegment
+import Unison.Prelude
+import Unison.ShortHash (ShortHash)
+import Unison.ShortHash qualified as SH
+import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
+import Unison.Syntax.Lexer
+import Unison.Syntax.Lexer.Token (posP, tokenP)
+import Unison.Syntax.Name qualified as Name (isSymboly, toText, unsafeParseText)
+import Unison.Syntax.Parser.Doc qualified as Doc
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
+import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
+import Unison.Util.Bytes qualified as Bytes
+import Unison.Util.Monoid (intercalateMap)
+
+-- Design principle:
+--   `[Lexeme]` should be sufficient information for parsing without
+--   further knowledge of spacing or indentation levels
+--   any knowledge of comments
+data Lexeme
+  = Open String -- start of a block
+  | Semi IsVirtual -- separator between elements of a block
+  | Close -- end of a block
+  | Reserved String -- reserved tokens such as `{`, `(`, `type`, `of`, etc
+  | Textual String -- text literals, `"foo bar"`
+  | Character Char -- character literals, `?X`
+  | WordyId (HQ'.HashQualified Name) -- a (non-infix) identifier. invariant: last segment is wordy
+  | SymbolyId (HQ'.HashQualified Name) -- an infix identifier. invariant: last segment is symboly
+  | Blank String -- a typed hole or placeholder
+  | Numeric String -- numeric literals, left unparsed
+  | Bytes Bytes.Bytes -- bytes literals
+  | Hash ShortHash -- hash literals
+  | Err Err
+  | Doc (Doc.UntitledSection (Doc.Tree [Token Lexeme]))
+  deriving stock (Eq, Show, Ord)
+
+type IsVirtual = Bool -- is it a virtual semi or an actual semi?
+
+token :: P Lexeme -> P [Token Lexeme]
+token = token' (\a start end -> [Token a start end])
+
+-- Token parser: strips trailing whitespace and comments after a
+-- successful parse, and also takes care of emitting layout tokens
+-- (such as virtual semicolons and closing tokens).
+token' :: (a -> Pos -> Pos -> [Token Lexeme]) -> P a -> P [Token Lexeme]
+token' tok p = LP.lexeme space (token'' tok p)
+
+-- Token parser implementation which leaves trailing whitespace and comments
+-- but does emit layout tokens such as virtual semicolons and closing tokens.
+token'' :: (a -> Pos -> Pos -> [Token Lexeme]) -> P a -> P [Token Lexeme]
+token'' tok p = do
+  start <- posP
+  -- We save the current state so we can backtrack the state if `p` fails.
+  env <- S.get
+  layoutToks <- case opening env of
+    -- If we're opening a block named b, we push (b, currentColumn) onto
+    -- the layout stack. Example:
+    --
+    --   blah = cases
+    --       {- A comment -}
+    --          -- A one-line comment
+    --     0 -> "hi"
+    --     1 -> "bye"
+    --
+    -- After the `cases` token, the state will be opening = Just "cases",
+    -- meaning the parser is searching for the next non-whitespace/comment
+    -- character to determine the leftmost column of the `cases` block.
+    -- That will be the column of the `0`.
+    Just blockname ->
+      -- special case - handling of empty blocks, as in:
+      --   foo =
+      --   bar = 42
+      if blockname == "=" && column start <= top l && not (null l)
+        then do
+          S.put (env {layout = (blockname, column start + 1) : l, opening = Nothing})
+          pops start
+        else [] <$ S.put (env {layout = layout', opening = Nothing})
+      where
+        layout' = (blockname, column start) : l
+        l = layout env
+    -- If we're not opening a block, we potentially pop from
+    -- the layout stack and/or emit virtual semicolons.
+    Nothing -> if inLayout env then pops start else pure []
+  beforeTokenPos <- posP
+  a <- p <|> (S.put env >> fail "resetting state")
+  endPos <- posP
+  pure $ layoutToks ++ tok a beforeTokenPos endPos
+  where
+    pops :: Pos -> P [Token Lexeme]
+    pops p = do
+      env <- S.get
+      let l = layout env
+      if top l == column p && topContainsVirtualSemis l
+        then pure [Token (Semi True) p p]
+        else
+          if column p > top l || topHasClosePair l
+            then pure []
+            else
+              if column p < top l
+                then S.put (env {layout = pop l}) >> ((Token Close p p :) <$> pops p)
+                else error "impossible"
+
+    -- don't emit virtual semis in (, {, or [ blocks
+    topContainsVirtualSemis :: Layout -> Bool
+    topContainsVirtualSemis = \case
+      [] -> False
+      ((name, _) : _) -> name /= "(" && name /= "{" && name /= "["
+
+    topHasClosePair :: Layout -> Bool
+    topHasClosePair [] = False
+    topHasClosePair ((name, _) : _) =
+      name `elem` ["DUMMY", "{", "(", "[", "handle", "match", "if", "then"]
+
+showErrorFancy :: (P.ShowErrorComponent e) => P.ErrorFancy e -> String
+showErrorFancy = \case
+  P.ErrorFail msg -> msg
+  P.ErrorIndentation ord ref actual ->
+    "incorrect indentation (got "
+      <> show (P.unPos actual)
+      <> ", should be "
+      <> p
+      <> show (P.unPos ref)
+      <> ")"
+    where
+      p = case ord of
+        LT -> "less than "
+        EQ -> "equal to "
+        GT -> "greater than "
+  P.ErrorCustom a -> P.showErrorComponent a
+
+lexer :: String -> String -> [Token Lexeme]
+lexer scope rem =
+  case flip S.evalState env0 $ P.runParserT (lexemes eof) scope rem of
+    Left e ->
+      let errsWithSourcePos =
+            fst $
+              P.attachSourcePos
+                P.errorOffset
+                (toList (P.bundleErrors e))
+                (P.bundlePosState e)
+          errorToTokens :: (EP.ParseError String (Token Err), P.SourcePos) -> [Token Lexeme]
+          errorToTokens (err, top) = case err of
+            P.FancyError _ (customErrs -> es) | not (null es) -> es
+            P.FancyError _errOffset es ->
+              let msg = intercalateMap "\n" showErrorFancy es
+               in [Token (Err (UnexpectedTokens msg)) (toPos top) (toPos top)]
+            P.TrivialError _errOffset mayUnexpectedTokens expectedTokens ->
+              let unexpectedStr :: Set String
+                  unexpectedStr =
+                    mayUnexpectedTokens
+                      & fmap errorItemToString
+                      & maybeToList
+                      & Set.fromList
+                  errorLength :: Int
+                  errorLength = case Set.toList unexpectedStr of
+                    [] -> 0
+                    (x : _) -> length x
+                  expectedStr :: Set String
+                  expectedStr =
+                    expectedTokens
+                      & Set.map errorItemToString
+                  err = UnexpectedTokens $ formatTrivialError unexpectedStr expectedStr
+                  startPos = toPos top
+                  -- This is just an attempt to highlight errors better in source excerpts.
+                  -- It may not work in all cases, but should generally provide a better experience.
+                  endPos = startPos & \(Pos l c) -> Pos l (c + errorLength)
+               in [Token (Err err) startPos endPos]
+       in errsWithSourcePos >>= errorToTokens
+    Right ts -> postLex $ Token (Open scope) topLeftCorner topLeftCorner : ts
+  where
+    eof :: P [Token Lexeme]
+    eof = P.try do
+      p <- P.eof >> posP
+      n <- maybe 0 (const 1) <$> S.gets opening
+      l <- S.gets layout
+      pure $ replicate (length l + n) (Token Close p p)
+    errorItemToString :: EP.ErrorItem Char -> String
+    errorItemToString = \case
+      (P.Tokens ts) -> Foldable.toList ts
+      (P.Label ts) -> Foldable.toList ts
+      (P.EndOfInput) -> "end of input"
+    customErrs es = [Err <$> e | P.ErrorCustom e <- toList es]
+    toPos (P.SourcePos _ line col) = Pos (P.unPos line) (P.unPos col)
+    env0 = ParsingEnv [] (Just scope) True [0] 0
+
+-- | hacky postprocessing pass to do some cleanup of stuff that's annoying to
+-- fix without adding more state to the lexer:
+--   - 1+1 lexes as [1, +1], convert this to [1, +, 1]
+--   - when a semi followed by a virtual semi, drop the virtual, lets you
+--     write
+--       foo x = action1;
+--               2
+--   - semi immediately after first Open is ignored
+tweak :: (Token Lexeme) -> [Token Lexeme] -> [Token Lexeme]
+tweak h@(Token (Semi False) _ _) (Token (Semi True) _ _ : t) = h : t
+-- __NB__: This case only exists to guard against the following one
+tweak h@(Token (Reserved _) _ _) t = h : t
+tweak t1 (t2@(Token (Numeric num) _ _) : rem)
+  | notLayout t1 && touches t1 t2 && isSigned num =
+      t1
+        : Token
+          (SymbolyId (HQ'.fromName (Name.unsafeParseText (Text.pack (take 1 num)))))
+          (start t2)
+          (inc $ start t2)
+        : Token (Numeric (drop 1 num)) (inc $ start t2) (end t2)
+        : rem
+  where
+    isSigned num = all (\ch -> ch == '-' || ch == '+') $ take 1 num
+tweak h t = h : t
+
+formatTrivialError :: Set String -> Set String -> [Char]
+formatTrivialError unexpectedTokens expectedTokens =
+  let unexpectedMsg = case Set.toList unexpectedTokens of
+        [] -> "I found something I didn't expect."
+        [x] ->
+          let article = case x of
+                (c : _) | c `elem` ("aeiou" :: String) -> "an"
+                _ -> "a"
+           in "I was surprised to find " <> article <> " " <> x <> " here."
+        xs -> "I was surprised to find these:\n\n* " <> List.intercalate "\n* " xs
+      expectedMsg = case Set.toList expectedTokens of
+        [] -> Nothing
+        xs -> Just $ "\nI was expecting one of these instead:\n\n* " <> List.intercalate "\n* " xs
+   in concat $ catMaybes [Just unexpectedMsg, expectedMsg]
+
+displayLexeme :: Lexeme -> String
+displayLexeme = \case
+  Open o -> o
+  Semi True -> "end of stanza"
+  Semi False -> "semicolon"
+  Close -> "end of section"
+  Reserved r -> "'" <> r <> "'"
+  Textual t -> "\"" <> t <> "\""
+  Character c -> "?" <> [c]
+  WordyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
+  SymbolyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
+  Blank b -> b
+  Numeric n -> n
+  Bytes _b -> "bytes literal"
+  Hash h -> Text.unpack (SH.toText h)
+  Err e -> show e
+  Doc _ -> "doc structure"
+
+-- | The `Doc` lexer as documented on unison-lang.org
+doc2 :: P [Token Lexeme]
+doc2 = do
+  -- Ensure we're at a doc before we start consuming tokens
+  P.lookAhead (lit "{{")
+  openStart <- posP
+  -- Produce any layout tokens, such as closing the last open block or virtual semicolons
+  -- We don't use 'token' on "{{" directly because we don't want to duplicate layout
+  -- tokens if we do the rewrite hack for type-docs below.
+  beforeStartToks <- token' ignore (pure ())
+  void $ lit "{{"
+  openEnd <- posP
+  CP.space
+  env0 <- S.get
+  -- Disable layout while parsing the doc block and reset the section number
+  (docTok, closeTok) <- local
+    ( \env ->
+        env
+          { inLayout = False,
+            parentSections = 0 : (parentSections env0)
+          }
+    )
+    do
+      body <- Doc.untitledSection lexemes' . P.lookAhead $ () <$ lit "}}"
+      closeStart <- posP
+      lit "}}"
+      closeEnd <- posP
+      pure (Token (Doc body) openStart closeEnd, Token Close closeStart closeEnd)
+  -- Parse any layout tokens after the doc block, e.g. virtual semicolon
+  endToks <- token' ignore (pure ())
+  -- Hack to allow anonymous doc blocks before type decls
+  --   {{ Some docs }}             Foo.doc = {{ Some docs }}
+  --   ability Foo where      =>   ability Foo where
+  --
+  -- __FIXME__: This should be done _after_ parsing, not in lexing.
+  tn <- subsequentTypeName
+  pure $
+    beforeStartToks <> case (tn) of
+      -- If we're followed by a type, we rewrite the doc block to be a named doc block.
+      Just (WordyId tname)
+        | isTopLevel ->
+            Token (WordyId (HQ'.fromName (Name.snoc (HQ'.toName tname) NameSegment.docSegment))) openStart openEnd
+              : Token (Open "=") openStart openEnd
+              : docTok
+              -- We need an extra 'Close' here because we added an extra Open above.
+              : closeTok
+              : endToks
+        where
+          isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
+      _ -> docTok : endToks
+  where
+    -- DUPLICATED
+    wordyKw kw = separated wordySep (lit kw)
+    subsequentTypeName = P.lookAhead . P.optional $ do
+      let lit' s = lit s <* sp
+      let modifier = typeModifiersAlt (lit' . Text.unpack)
+      let typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
+      _ <- optional modifier *> typeOrAbility' *> sp
+      Token name start stop <- tokenP identifierP
+      if Name.isSymboly (HQ'.toName name)
+        then P.customFailure (Token (InvalidSymbolyId (Text.unpack (HQ'.toTextWith Name.toText name))) start stop)
+        else pure (WordyId name)
+    ignore _ _ _ = []
+    -- DUPLICATED
+    sp = P.try $ do
+      spaces <- P.takeWhile1P (Just "space") isSpace
+      close <- P.optional (P.lookAhead (lit "}}"))
+      case close of
+        Nothing -> guard $ ok spaces
+        Just _ -> pure ()
+      pure spaces
+      where
+        ok s = length [() | '\n' <- s] < 2
+
+lexemes' :: P () -> P [Token Lexeme]
+lexemes' eof =
+  -- NB: `postLex` requires the token stream to start with an `Open`, otherwise it can’t create a `T`, so this adds one,
+  --     runs `postLex`, then removes it.
+  fmap (tail . postLex . (Token (Open "fake") mempty mempty :)) $
+    local (\env -> env {inLayout = True, opening = Just "DUMMY"}) do
+      p <- lexemes $ [] <$ eof
+      -- deals with a final "unclosed" block at the end of `p`)
+      unclosed <- takeWhile (("DUMMY" /=) . fst) . layout <$> S.get
+      let pos = end $ last p
+      pure $ p <> replicate (length unclosed) (Token Close pos pos)
+
+-- | Consumes an entire Unison “module”.
+lexemes :: P [Token Lexeme] -> P [Token Lexeme]
+lexemes eof =
+  P.optional space >> do
+    hd <- join <$> P.manyTill toks (P.lookAhead eof)
+    tl <- eof
+    pure $ hd <> tl
+  where
+    toks :: P [Token Lexeme]
+    toks =
+      doc2
+        <|> doc
+        <|> token numeric
+        <|> token character
+        <|> reserved
+        <|> token blank
+        <|> token identifierLexemeP
+        <|> (asum . map token) [semi, textual, hash]
+
+    doc :: P [Token Lexeme]
+    doc = open <+> (CP.space *> fmap fixup body) <+> (close <* space)
+      where
+        open = token'' (\t _ _ -> t) $ tok (Open <$> lit "[:")
+        close = tok (Close <$ lit ":]")
+        at = lit "@"
+        -- this removes some trailing whitespace from final textual segment
+        fixup [] = []
+        fixup (Token (Textual (reverse -> txt)) start stop : []) =
+          [Token (Textual txt') start stop]
+          where
+            txt' = reverse (dropWhile (\c -> isSpace c && not (c == '\n')) txt)
+        fixup (h : t) = h : fixup t
+
+        body :: P [Token Lexeme]
+        body = txt <+> (atk <|> pure [])
+          where
+            ch = (":]" <$ lit "\\:]") <|> ("@" <$ lit "\\@") <|> (pure <$> P.anySingle)
+            txt = tok (Textual . join <$> P.manyTill ch (P.lookAhead sep))
+            sep = void at <|> void close
+            ref = at *> (tok identifierLexemeP <|> docTyp)
+            atk = (ref <|> docTyp) <+> body
+            docTyp = do
+              _ <- lit "["
+              typ <- tok (P.manyTill P.anySingle (P.lookAhead (lit "]")))
+              _ <- lit "]" *> CP.space
+              t <- tok identifierLexemeP
+              pure $ (fmap Reserved <$> typ) <> t
+
+    blank =
+      separated wordySep do
+        _ <- char '_'
+        seg <- P.optional wordyIdSegP
+        pure (Blank (maybe "" (Text.unpack . NameSegment.toUnescapedText) seg))
+
+    semi = char ';' $> Semi False
+    textual = Textual <$> quoted
+    quoted = quotedRaw <|> quotedSingleLine
+    quotedRaw = do
+      _ <- lit "\"\"\""
+      n <- many (char '"')
+      _ <- optional (char '\n') -- initial newline is skipped
+      s <- P.manyTill P.anySingle (lit (replicate (length n + 3) '"'))
+      col0 <- column <$> posP
+      let col = col0 - (length n) - 3 -- this gets us first col of closing quotes
+      let leading = replicate (max 0 (col - 1)) ' '
+      -- a last line that's equal to `leading` is ignored, since leading
+      -- spaces up to `col` are not considered part of the string
+      let tweak l = case reverse l of
+            last : rest
+              | col > 1 && last == leading -> reverse rest
+              | otherwise -> l
+            [] -> []
+      pure $ case tweak (lines s) of
+        [] -> s
+        ls
+          | all (\l -> List.isPrefixOf leading l || all isSpace l) ls -> List.intercalate "\n" (drop (length leading) <$> ls)
+          | otherwise -> s
+    quotedSingleLine = char '"' *> P.manyTill (LP.charLiteral <|> sp) (char '"')
+      where
+        sp = lit "\\s" $> ' '
+    character = Character <$> (char '?' *> (spEsc <|> LP.charLiteral))
+      where
+        spEsc = P.try (char '\\' *> char 's' $> ' ')
+
+    numeric = bytes <|> otherbase <|> float <|> intOrNat
+      where
+        intOrNat = P.try $ num <$> sign <*> LP.decimal
+        float = do
+          _ <- P.try (P.lookAhead (sign >> (LP.decimal :: P Int) >> (char '.' <|> char 'e' <|> char 'E'))) -- commit after this
+          start <- posP
+          sign <- fromMaybe "" <$> sign
+          base <- P.takeWhile1P (Just "base") isDigit
+          decimals <-
+            P.optional $
+              let missingFractional = err start (MissingFractional $ base <> ".")
+               in liftA2 (<>) (lit ".") (P.takeWhile1P (Just "decimals") isDigit <|> missingFractional)
+          exp <- P.optional $ do
+            e <- map toLower <$> (lit "e" <|> lit "E")
+            sign <- fromMaybe "" <$> optional (lit "+" <|> lit "-")
+            let missingExp = err start (MissingExponent $ base <> fromMaybe "" decimals <> e <> sign)
+            exp <- P.takeWhile1P (Just "exponent") isDigit <|> missingExp
+            pure $ e <> sign <> exp
+          pure $ Numeric (sign <> base <> fromMaybe "" decimals <> fromMaybe "" exp)
+
+        bytes = do
+          start <- posP
+          _ <- lit "0xs"
+          s <- map toLower <$> P.takeWhileP (Just "hexidecimal character") isAlphaNum
+          case Bytes.fromBase16 $ Bytes.fromWord8s (fromIntegral . ord <$> s) of
+            Left _ -> err start (InvalidBytesLiteral $ "0xs" <> s)
+            Right bs -> pure (Bytes bs)
+        otherbase = octal <|> hex
+        octal = do
+          start <- posP
+          commitAfter2 sign (lit "0o") $ \sign _ ->
+            fmap (num sign) LP.octal <|> err start InvalidOctalLiteral
+        hex = do
+          start <- posP
+          commitAfter2 sign (lit "0x") $ \sign _ ->
+            fmap (num sign) LP.hexadecimal <|> err start InvalidHexLiteral
+
+        num :: Maybe String -> Integer -> Lexeme
+        num sign n = Numeric (fromMaybe "" sign <> show n)
+        sign = P.optional (lit "+" <|> lit "-")
+
+    hash = Hash <$> P.try shortHashP
+
+    reserved :: P [Token Lexeme]
+    reserved =
+      token' (\ts _ _ -> ts) $
+        braces
+          <|> parens
+          <|> brackets
+          <|> commaSeparator
+          <|> delim
+          <|> delayOrForce
+          <|> keywords
+          <|> layoutKeywords
+      where
+        keywords =
+          -- yes "wordy" - just like a wordy keyword like "true", the literal "." (as in the dot in
+          -- "forall a. a -> a") is considered the keyword "." so long as it is either followed by EOF, a space, or some
+          -- non-wordy character (because ".foo" is a single identifier lexeme)
+          wordyKw "."
+            <|> symbolyKw ":"
+            <|> openKw "@rewrite"
+            <|> symbolyKw "@"
+            <|> symbolyKw "||"
+            <|> symbolyKw "|"
+            <|> symbolyKw "&&"
+            <|> wordyKw "true"
+            <|> wordyKw "false"
+            <|> wordyKw "use"
+            <|> wordyKw "forall"
+            <|> wordyKw "∀"
+            <|> wordyKw "termLink"
+            <|> wordyKw "typeLink"
+
+        wordyKw s = separated wordySep (kw s)
+        symbolyKw s = separated (not . symbolyIdChar) (kw s)
+
+        kw :: String -> P [Token Lexeme]
+        kw s = tokenP (lit s) <&> \token -> [Reserved <$> token]
+
+        layoutKeywords :: P [Token Lexeme]
+        layoutKeywords =
+          ifElse
+            <|> withKw
+            <|> openKw "match"
+            <|> openKw "handle"
+            <|> typ
+            <|> arr
+            <|> rewriteArr
+            <|> eq
+            <|> openKw "cases"
+            <|> openKw "where"
+            <|> openKw "let"
+            <|> openKw "do"
+          where
+            ifElse =
+              openKw "if"
+                <|> closeKw' (Just "then") ["if"] (lit "then")
+                <|> closeKw' (Just "else") ["then"] (lit "else")
+            modKw = typeModifiersAlt (openKw1 wordySep . Text.unpack)
+            typeOrAbilityKw = typeOrAbilityAlt (openTypeKw1 . Text.unpack)
+            typ = modKw <|> typeOrAbilityKw
+
+            withKw = do
+              [Token _ pos1 pos2] <- wordyKw "with"
+              env <- S.get
+              let l = layout env
+              case findClose ["handle", "match"] l of
+                Nothing -> err pos1 (CloseWithoutMatchingOpen msgOpen "'with'")
+                  where
+                    msgOpen = "'handle' or 'match'"
+                Just (withBlock, n) -> do
+                  let b = withBlock <> "-with"
+                  S.put (env {layout = drop n l, opening = Just b})
+                  let opens = [Token (Open "with") pos1 pos2]
+                  pure $ replicate n (Token Close pos1 pos2) ++ opens
+
+            -- In `structural/unique type` and `structural/unique ability`,
+            -- only the `structural` or `unique` opens a layout block,
+            -- and `ability` and `type` are just keywords.
+            openTypeKw1 t = do
+              b <- S.gets (topBlockName . layout)
+              case b of
+                Just mod | Set.member (Text.pack mod) typeModifiers -> wordyKw t
+                _ -> openKw1 wordySep t
+
+            -- layout keyword which bumps the layout column by 1, rather than looking ahead
+            -- to the next token to determine the layout column
+            openKw1 :: (Char -> Bool) -> String -> P [Token Lexeme]
+            openKw1 sep kw = do
+              Token kw pos0 pos1 <- tokenP $ separated sep (lit kw)
+              S.modify (\env -> env {layout = (kw, column $ inc pos0) : layout env})
+              pure [Token (Open kw) pos0 pos1]
+
+            eq = do
+              [Token _ start end] <- symbolyKw "="
+              env <- S.get
+              case topBlockName (layout env) of
+                -- '=' does not open a layout block if within a type declaration
+                Just t | t == "type" || Set.member (Text.pack t) typeModifiers -> pure [Token (Reserved "=") start end]
+                Just _ -> S.put (env {opening = Just "="}) >> pure [Token (Open "=") start end]
+                _ -> err start LayoutError
+
+            rewriteArr = do
+              [Token _ start end] <- symbolyKw "==>"
+              env <- S.get
+              S.put (env {opening = Just "==>"}) >> pure [Token (Open "==>") start end]
+
+            arr = do
+              [Token _ start end] <- symbolyKw "->"
+              env <- S.get
+              -- -> introduces a layout block if we're inside a `match with` or `cases`
+              case topBlockName (layout env) of
+                Just match | match `elem` matchWithBlocks -> do
+                  S.put (env {opening = Just "->"})
+                  pure [Token (Open "->") start end]
+                _ -> pure [Token (Reserved "->") start end]
+
+        -- a bit of lookahead here to reserve }} for closing a documentation block
+        braces = open "{" <|> close ["{"] p
+          where
+            p = do
+              l <- lit "}"
+              -- if we're within an existing {{ }} block, inLayout will be false
+              -- so we can actually allow }} to appear in normal code
+              inLayout <- S.gets inLayout
+              when (not inLayout) $ void $ P.lookAhead (P.satisfy (/= '}'))
+              pure l
+        matchWithBlocks = ["match-with", "cases"]
+        parens = open "(" <|> close ["("] (lit ")")
+        brackets = open "[" <|> close ["["] (lit "]")
+        -- `allowCommaToClose` determines if a comma should close inner blocks.
+        -- Currently there is a set of blocks where `,` is not treated specially
+        -- and it just emits a Reserved ",". There are currently only three:
+        -- `cases`, `match-with`, and `{`
+        allowCommaToClose match = not $ match `elem` ("{" : matchWithBlocks)
+        commaSeparator = do
+          env <- S.get
+          case topBlockName (layout env) of
+            Just match
+              | allowCommaToClose match ->
+                  blockDelimiter ["[", "("] (lit ",")
+            _ -> fail "this comma is a pattern separator"
+
+        delim = P.try $ do
+          ch <- P.satisfy (\ch -> ch /= ';' && Set.member ch delimiters)
+          pos <- posP
+          pure [Token (Reserved [ch]) pos (inc pos)]
+
+        delayOrForce = separated ok $ do
+          token <- tokenP $ P.satisfy isDelayOrForce
+          pure [token <&> \op -> Reserved [op]]
+          where
+            ok c = isDelayOrForce c || isSpace c || isAlphaNum c || Set.member c delimiters || c == '\"'
+
+open :: String -> P [Token Lexeme]
+open b = do
+  token <- tokenP $ lit b
+  env <- S.get
+  S.put (env {opening = Just b})
+  pure [Open b <$ token]
+
+openKw :: String -> P [Token Lexeme]
+openKw s = separated wordySep $ do
+  token <- tokenP $ lit s
+  env <- S.get
+  S.put (env {opening = Just s})
+  pure [Open <$> token]
+
+tok :: P a -> P [Token a]
+tok p = do
+  token <- tokenP p
+  pure [token]
+
+-- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
+-- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
+--
+-- Examples:
+--
+--   foo
+--   .foo.++.doc
+--   `.`.`..`     (This is a two-segment identifier without a leading dot: "." then "..")
+identifierLexemeP :: P Lexeme
+identifierLexemeP = identifierLexeme <$> identifierP
+
+identifierLexeme :: HQ'.HashQualified Name -> Lexeme
+identifierLexeme name =
+  if Name.isSymboly (HQ'.toName name)
+    then SymbolyId name
+    else WordyId name
+
+blockDelimiter :: [String] -> P String -> P [Token Lexeme]
+blockDelimiter open closeP = do
+  Token close pos1 pos2 <- tokenP closeP
+  env <- S.get
+  case findClose open (layout env) of
+    Nothing -> err pos1 (UnexpectedDelimiter (quote close))
+      where
+        quote s = "'" <> s <> "'"
+    Just (_, n) -> do
+      S.put (env {layout = drop (n - 1) (layout env)})
+      let delims = [Token (Reserved close) pos1 pos2]
+      pure $ replicate (n - 1) (Token Close pos1 pos2) ++ delims
+
+close :: [String] -> P String -> P [Token Lexeme]
+close = close' Nothing
+
+closeKw' :: Maybe String -> [String] -> P String -> P [Token Lexeme]
+closeKw' reopenBlockname open closeP = close' reopenBlockname open (separated wordySep closeP)
+
+close' :: Maybe String -> [String] -> P String -> P [Token Lexeme]
+close' reopenBlockname open closeP = do
+  Token close pos1 pos2 <- tokenP closeP
+  env <- S.get
+  case findClose open (layout env) of
+    Nothing -> err pos1 (CloseWithoutMatchingOpen msgOpen (quote close))
+      where
+        msgOpen = List.intercalate " or " (quote <$> open)
+        quote s = "'" <> s <> "'"
+    Just (_, n) -> do
+      S.put (env {layout = drop n (layout env), opening = reopenBlockname})
+      let opens = maybe [] (const $ [Token (Open close) pos1 pos2]) reopenBlockname
+      pure $ replicate n (Token Close pos1 pos2) ++ opens
+
+findClose :: [String] -> Layout -> Maybe (String, Int)
+findClose _ [] = Nothing
+findClose s ((h, _) : tl) = if h `elem` s then Just (h, 1) else fmap (1 +) <$> findClose s tl
+
+notLayout :: Token Lexeme -> Bool
+notLayout t = case payload t of
+  Close -> False
+  Semi _ -> False
+  Open _ -> False
+  _ -> True
+
+top :: Layout -> Column
+top [] = 1
+top ((_, h) : _) = h
+
+topLeftCorner :: Pos
+topLeftCorner = Pos 1 1
+
+data BlockTree a
+  = Block
+      -- | The token that opens the block
+      a
+      -- | “Stanzas” of nested tokens
+      [[BlockTree a]]
+      -- | The closing token, if any
+      (Maybe a)
+  | Leaf a
+  deriving (Functor, Foldable, Traversable)
+
+headToken :: BlockTree a -> a
+headToken (Block a _ _) = a
+headToken (Leaf a) = a
+
+instance (Show a) => Show (BlockTree a) where
+  show (Leaf a) = show a
+  show (Block open mid close) =
+    show open
+      ++ "\n"
+      ++ indent "  " (intercalateMap "\n" (intercalateMap " " show) mid)
+      ++ "\n"
+      ++ maybe "" show close
+    where
+      indent by s = by ++ (s >>= go by)
+      go by '\n' = '\n' : by
+      go _ c = [c]
+
+reorderTree :: ([[BlockTree a]] -> [[BlockTree a]]) -> BlockTree a -> BlockTree a
+reorderTree f (Block open mid close) = Block open (f (fmap (reorderTree f) <$> mid)) close
+reorderTree _ l = l
+
+tree :: [Token Lexeme] -> BlockTree (Token Lexeme)
+tree toks = one toks const
+  where
+    one (open@(payload -> Open _) : ts) k = many (Block open . stanzas) [] ts k
+    one (t : ts) k = k (Leaf t) ts
+    one [] k = k lastErr []
+      where
+        lastErr = Leaf case drop (length toks - 1) toks of
+          [] -> Token (Err LayoutError) topLeftCorner topLeftCorner
+          (t : _) -> t {payload = Err LayoutError}
+
+    many open acc [] k = k (open (reverse acc) Nothing) []
+    many open acc (t@(payload -> Close) : ts) k = k (open (reverse acc) $ pure t) ts
+    many open acc ts k = one ts $ \t ts -> many open (t : acc) ts k
+
+stanzas :: [BlockTree (Token Lexeme)] -> [[BlockTree (Token Lexeme)]]
+stanzas =
+  toList
+    . foldr
+      ( \tok (curr :| stanzas) -> case tok of
+          Leaf (Token (Semi _) _ _) -> [tok] :| curr : stanzas
+          _ -> (tok : curr) :| stanzas
+      )
+      ([] :| [])
+
+-- Moves type and ability declarations to the front of the token stream
+-- and move `use` statements to the front of each block
+reorder :: [[BlockTree (Token Lexeme)]] -> [[BlockTree (Token Lexeme)]]
+reorder = foldr fixup [] . sortWith f
+  where
+    f [] = 3 :: Int
+    f (t0 : _) = case payload $ headToken t0 of
+      Open mod | Set.member (Text.pack mod) typeModifiers -> 1
+      Open typOrA | Set.member (Text.pack typOrA) typeOrAbility -> 1
+      Reserved "use" -> 0
+      _ -> 3 :: Int
+    -- after reordering can end up with trailing semicolon at the end of
+    -- a block, which we remove with this pass
+    fixup stanza [] = case Lens.unsnoc stanza of
+      Nothing -> []
+      -- remove any trailing `Semi` from the last non-empty stanza
+      Just (init, Leaf (Token (Semi _) _ _)) -> [init]
+      -- don’t touch other stanzas
+      Just (_, _) -> [stanza]
+    fixup stanza tail = stanza : tail
+
+-- | This turns the lexeme stream into a tree, reordering some lexeme subsequences.
+preParse :: [Token Lexeme] -> BlockTree (Token Lexeme)
+preParse = reorderTree reorder . tree
+
+-- | A few transformations that happen between lexing and parsing.
+--
+--   All of these things should move out of the lexer, and be applied in the parse.
+postLex :: [Token Lexeme] -> [Token Lexeme]
+postLex = toList . preParse . foldr tweak []
+
+isDelayOrForce :: Char -> Bool
+isDelayOrForce op = op == '\'' || op == '!'
+
+-- Mapping between characters and their escape codes. Use parse/showEscapeChar to convert.
+escapeChars :: [(Char, Char)]
+escapeChars =
+  [ ('0', '\0'),
+    ('a', '\a'),
+    ('b', '\b'),
+    ('f', '\f'),
+    ('n', '\n'),
+    ('r', '\r'),
+    ('t', '\t'),
+    ('v', '\v'),
+    ('s', ' '),
+    ('\'', '\''),
+    ('"', '"'),
+    ('\\', '\\')
+  ]
+
+-- Inverse of parseEscapeChar; map a character to its escaped version:
+showEscapeChar :: Char -> Maybe Char
+showEscapeChar c =
+  Map.lookup c (Map.fromList [(x, y) | (y, x) <- escapeChars])
+
+debugFilePreParse :: FilePath -> IO ()
+debugFilePreParse file = putStrLn . debugPreParse . preParse . lexer file . Text.unpack =<< readUtf8 file
+
+debugPreParse :: BlockTree (Token Lexeme) -> String
+debugPreParse (Leaf (Token (Err (UnexpectedTokens msg)) start end)) =
+  (if start == end then msg1 else msg2) <> ":\n" <> msg
+  where
+    msg1 = "Error on line " <> show (line start) <> ", column " <> show (column start)
+    msg2 =
+      "Error on line "
+        <> show (line start)
+        <> ", column "
+        <> show (column start)
+        <> " - line "
+        <> show (line end)
+        <> ", column "
+        <> show (column end)
+debugPreParse ts = show $ payload <$> ts
+
+debugPreParse' :: String -> String
+debugPreParse' = debugPreParse . preParse . lexer "debugPreParse"
+
+instance P.VisualStream [Token Lexeme] where
+  showTokens _ xs =
+    join . Nel.toList . S.evalState (traverse go xs) . end $ Nel.head xs
+    where
+      go :: Token Lexeme -> S.State Pos String
+      go tok = do
+        prev <- S.get
+        S.put $ end tok
+        pure $ pad prev (start tok) ++ pretty (payload tok)
+      pretty (Open s) = s
+      pretty (Reserved w) = w
+      pretty (Textual t) = '"' : t ++ ['"']
+      pretty (Character c) =
+        case showEscapeChar c of
+          Just c -> "?\\" ++ [c]
+          Nothing -> '?' : [c]
+      pretty (WordyId n) = Text.unpack (HQ'.toText n)
+      pretty (SymbolyId n) = Text.unpack (HQ'.toText n)
+      pretty (Blank s) = "_" ++ s
+      pretty (Numeric n) = n
+      pretty (Hash sh) = show sh
+      pretty (Err e) = show e
+      pretty (Bytes bs) = "0xs" <> show bs
+      pretty Close = "<outdent>"
+      pretty (Semi True) = "<virtual semicolon>"
+      pretty (Semi False) = ";"
+      pretty (Doc d) = show d
+      pad (Pos line1 col1) (Pos line2 col2) =
+        if line1 == line2
+          then replicate (col2 - col1) ' '
+          else replicate (line2 - line1) '\n' ++ replicate col2 ' '
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 1bee4d08f4..098caab1b6 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -88,8 +88,9 @@ import Unison.Pattern qualified as Pattern
 import Unison.Prelude
 import Unison.Reference (Reference)
 import Unison.Referent (Referent)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toVar, unsafeParseText)
+import Unison.Syntax.Parser.Doc qualified as Doc
 import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Term (MatchCase (..))
 import Unison.UnisonFile.Error qualified as UF
@@ -400,7 +401,7 @@ string = queryToken getString
     getString (L.Textual s) = Just (Text.pack s)
     getString _ = Nothing
 
-doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection L.DocTree))
+doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree [L.Token L.Lexeme])))
 doc = queryToken \case
   L.Doc d -> pure d
   _ -> Nothing
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
new file mode 100644
index 0000000000..5ca747f204
--- /dev/null
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -0,0 +1,476 @@
+module Unison.Syntax.Parser.Doc where
+
+import Control.Comonad.Cofree (Cofree ((:<)))
+import Control.Monad.State qualified as S
+import Data.Char (isControl, isSpace)
+import Data.List qualified as List
+import Data.List.Extra qualified as List
+import Data.List.NonEmpty (NonEmpty ((:|)))
+import Data.List.NonEmpty qualified as NonEmpty
+import Data.Text qualified as Text
+import Text.Megaparsec qualified as P
+import Text.Megaparsec.Char (char)
+import Text.Megaparsec.Char qualified as CP
+import Text.Megaparsec.Char.Lexer qualified as LP
+import Unison.Parser.Ann (Ann, Annotated (..))
+import Unison.Prelude
+import Unison.Syntax.Lexer
+  ( P,
+    ParsingEnv (..),
+    column,
+    identifierP,
+    line,
+    lit,
+    local,
+    sepBy1',
+    separated,
+    some',
+    someTill',
+    typeOrAbilityAlt,
+    wordySep,
+    (<+>),
+  )
+import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP)
+import Unison.Syntax.Parser.Doc.Data
+
+type Tree code = Cofree (Top code) Ann
+
+-- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
+--   Unison wraps `Doc` literals in `}}`).
+untitledSection :: forall code. (Annotated code) => (P () -> P code) -> P () -> P (UntitledSection (Tree code))
+untitledSection code docClose = UntitledSection <$> P.many (sectionElem <* CP.space)
+  where
+    wordyKw kw = separated wordySep (lit kw)
+    sectionElem = section <|> fencedBlock <|> list <|> paragraph
+    paragraph = wrap' . Paragraph <$> spaced leaf
+    reserved word = List.isPrefixOf "}}" word || all (== '#') word
+
+    wordy :: P end -> P (Leaf code void)
+    wordy closing = fmap Word . tokenP . P.try $ do
+      let end =
+            P.lookAhead $
+              docClose
+                <|> void (P.satisfy isSpace)
+                <|> void closing
+      word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
+      guard (not $ reserved word || null word)
+      pure word
+
+    leafy closing = groupy closing gs
+      where
+        gs =
+          link
+            <|> externalLink
+            <|> exampleInline
+            <|> expr
+            <|> boldOrItalicOrStrikethrough closing
+            <|> verbatim
+            <|> atDoc
+            <|> wordy closing
+
+    leaf = leafy mzero
+
+    atDoc = src <|> evalInline <|> signature <|> signatureInline
+      where
+        comma = lit "," <* CP.space
+        src =
+          src' Source "@source"
+            <|> src' FoldedSource "@foldedSource"
+        srcElem =
+          SourceElement
+            <$> (typeLink <|> termLink)
+            <*> ( fmap (fromMaybe []) . P.optional $
+                    (lit "@") *> (CP.space *> annotations)
+                )
+          where
+            annotation = fmap Left (tokenP identifierP) <|> fmap Right expr <* CP.space
+            annotations =
+              P.some (EmbedAnnotation <$> annotation)
+        src' name atName = fmap name $ do
+          _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
+          s <- sepBy1' srcElem comma
+          _ <- lit "}"
+          pure s
+        signature = fmap Signature $ do
+          _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
+          s <- sepBy1' signatureLink comma
+          _ <- lit "}"
+          pure s
+        signatureInline = fmap SignatureInline $ do
+          _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
+          s <- signatureLink
+          _ <- lit "}"
+          pure s
+        evalInline = fmap EvalInline $ do
+          _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
+          let inlineEvalClose = () <$ lit "}"
+          s <- code inlineEvalClose
+          pure s
+
+    typeLink = fmap EmbedTypeLink $ do
+      _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
+      tokenP identifierP <* CP.space
+
+    termLink =
+      fmap EmbedTermLink $
+        tokenP identifierP <* CP.space
+
+    signatureLink =
+      fmap EmbedSignatureLink $
+        tokenP identifierP <* CP.space
+
+    groupy closing p = do
+      Token p _ _ <- tokenP p
+      after <- P.optional . P.try $ leafy closing
+      pure $ case after of
+        Nothing -> p
+        Just after ->
+          Group
+            . Join
+            $ p
+              :| pure after
+
+    verbatim =
+      P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
+        Token originalText start stop <- tokenP do
+          -- a single backtick followed by a non-backtick is treated as monospaced
+          let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
+          -- also two or more ' followed by that number of closing '
+          quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
+          P.someTill P.anySingle (lit quotes)
+        let isMultiLine = line start /= line stop
+        if isMultiLine
+          then do
+            let trimmed = (trimAroundDelimiters originalText)
+            let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+            -- If it's a multi-line verbatim block we trim any whitespace representing
+            -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
+            pure . Verbatim $
+              Word $
+                Token txt start stop
+          else
+            pure . Code $
+              Word $
+                Token originalText start stop
+
+    exampleInline =
+      P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
+        fmap Example $ do
+          n <- P.try $ do
+            _ <- lit "`"
+            length <$> P.takeWhile1P (Just "backticks") (== '`')
+          let end = () <$ lit (replicate (n + 1) '`')
+          ex <- CP.space *> code end
+          pure ex
+
+    link =
+      P.label "link (examples: {type List}, {Nat.+})" $
+        fmap Link $
+          P.try $
+            lit "{" *> (typeLink <|> termLink) <* lit "}"
+
+    expr :: P (Leaf code x)
+    expr =
+      fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+        lit "{{" *> code (() <$ lit "}}")
+
+    nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
+    nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
+
+    -- Allows whitespace or a newline, but not more than two newlines in a row.
+    whitespaceWithoutParagraphBreak :: P ()
+    whitespaceWithoutParagraphBreak = void do
+      void nonNewlineSpaces
+      optional newline >>= \case
+        Just _ -> void nonNewlineSpaces
+        Nothing -> pure ()
+
+    fencedBlock =
+      P.label "block eval (syntax: a fenced code block)" $
+        evalUnison <|> exampleBlock <|> other
+      where
+        evalUnison = fmap (wrap' . Eval) $ do
+          -- commit after seeing that ``` is on its own line
+          fence <- P.try $ do
+            fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+            b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
+            fence <$ guard b
+          CP.space
+            *> code (() <$ lit fence)
+
+        exampleBlock = fmap (wrap' . ExampleBlock) $ do
+          void $ lit "@typecheck" <* CP.space
+          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+          code (() <$ lit fence)
+
+        uncolumn column tabWidth s =
+          let skip col r | col < 1 = r
+              skip col s@('\t' : _) | col < tabWidth = s
+              skip col ('\t' : r) = skip (col - tabWidth) r
+              skip col (c : r)
+                | isSpace c && (not $ isControl c) =
+                    skip (col - 1) r
+              skip _ s = s
+           in List.intercalate "\n" $ skip column <$> lines s
+
+        other = fmap (uncurry $ wrapSimple2 CodeBlock) $ do
+          column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
+          let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
+          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+          name <-
+            P.takeWhileP Nothing nonNewlineSpace
+              *> tokenP (P.takeWhile1P Nothing (not . isSpace))
+              <* P.takeWhileP Nothing nonNewlineSpace
+          _ <- void CP.eol
+          verbatim <-
+            tokenP $
+              uncolumn column tabWidth . trimAroundDelimiters
+                <$> P.someTill P.anySingle ([] <$ lit fence)
+          pure (name, verbatim)
+
+    boldOrItalicOrStrikethrough closing = do
+      let start =
+            some (P.satisfy (== '*'))
+              <|> some (P.satisfy (== '_'))
+              <|> some
+                (P.satisfy (== '~'))
+          name s =
+            if take 1 s == "~"
+              then Strikethrough
+              else if take 1 s == "*" then Bold else Italic
+      end <- P.try $ do
+        end <- start
+        P.lookAhead (P.satisfy (not . isSpace))
+        pure end
+      name end . wrap' . Paragraph
+        <$> someTill'
+          (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
+          (lit end)
+
+    externalLink =
+      P.label "hyperlink (example: [link name](https://destination.com))" $
+        fmap (uncurry NamedLink) $ do
+          _ <- lit "["
+          p <- leafies (void $ char ']')
+          _ <- lit "]"
+          _ <- lit "("
+          target <-
+            fmap (Group . Join) $
+              fmap pure link <|> some' (expr <|> wordy (char ')'))
+          _ <- lit ")"
+          pure (p, target)
+
+    -- newline = P.optional (lit "\r") *> lit "\n"
+
+    sp = P.try $ do
+      spaces <- P.takeWhile1P (Just "space") isSpace
+      close <- P.optional (P.lookAhead (lit "}}"))
+      case close of
+        Nothing -> guard $ ok spaces
+        Just _ -> pure ()
+      pure spaces
+      where
+        ok s = length [() | '\n' <- s] < 2
+
+    spaced p = some' (p <* P.optional sp)
+    leafies close = wrap' . Paragraph <$> spaced (leafy close)
+
+    list = bulletedList <|> numberedList
+
+    bulletedList = wrap' . BulletedList <$> sepBy1' bullet listSep
+    numberedList = wrap' . NumberedList <$> sepBy1' numberedItem listSep
+
+    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
+
+    bulletedStart = P.try $ do
+      r <- listItemStart' $ [] <$ P.satisfy bulletChar
+      P.lookAhead (P.satisfy isSpace)
+      pure r
+      where
+        bulletChar ch = ch == '*' || ch == '-' || ch == '+'
+
+    listItemStart' :: P a -> P (Int, a)
+    listItemStart' gutter = P.try $ do
+      nonNewlineSpaces
+      col <- column <$> posP
+      parentCol <- S.gets parentListColumn
+      guard (col > parentCol)
+      (col,) <$> gutter
+
+    numberedStart =
+      listItemStart' $ P.try (tokenP $ LP.decimal <* lit ".")
+
+    listItemParagraph = fmap (wrap' . Paragraph) $ do
+      col <- column <$> posP
+      some' (leaf <* sep col)
+      where
+        -- Trickiness here to support hard line breaks inside of
+        -- a bulleted list, so for instance this parses as expected:
+        --
+        --   * uno dos
+        --     tres quatro
+        --   * alice bob
+        --     carol dave eve
+        sep col = do
+          _ <- nonNewlineSpaces
+          _ <-
+            P.optional . P.try $
+              newline
+                *> nonNewlineSpaces
+                *> do
+                  col2 <- column <$> posP
+                  guard $ col2 >= col
+                  (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
+          pure ()
+
+    numberedItem = P.label msg $ do
+      (col, s) <- numberedStart
+      (s,)
+        <$> ( fmap (uncurry Column) $ do
+                p <- nonNewlineSpaces *> listItemParagraph
+                subList <-
+                  local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
+                pure (p, subList)
+            )
+      where
+        msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
+
+    bullet = fmap (uncurry Column) . P.label "bullet (examples: * item1, - item2)" $ do
+      (col, _) <- bulletedStart
+      p <- nonNewlineSpaces *> listItemParagraph
+      subList <-
+        local
+          (\e -> e {parentListColumn = col})
+          (P.optional $ listSep *> list)
+      pure (p, subList)
+
+    newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
+
+    -- ## Section title
+    --
+    -- A paragraph under this section.
+    -- Part of the same paragraph. Blanklines separate paragraphs.
+    --
+    -- ### A subsection title
+    --
+    -- A paragraph under this subsection.
+
+    -- # A section title (not a subsection)
+    section :: P (Tree code)
+    section = fmap (wrap' . uncurry Section) $ do
+      ns <- S.gets parentSections
+      hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
+      title <- paragraph <* CP.space
+      let m = length hashes + head ns
+      body <-
+        local (\env -> env {parentSections = (m : (tail ns))}) $
+          P.many (sectionElem <* CP.space)
+      pure $ (title, body)
+
+    wrap' :: Top code (Tree code) -> Tree code
+    wrap' doc = ann doc :< doc
+
+    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> Top code (Tree code)) -> a -> b -> Tree code
+    wrapSimple2 fn a b = ann a <> ann b :< fn a b
+
+-- | If it's a multi-line verbatim block we trim any whitespace representing
+-- indentation from the pretty-printer.
+--
+-- E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--     code
+--       indented
+--     '''
+-- }}
+-- @@
+--
+-- Should lex to the text literal "code\n  indented".
+--
+-- If there's text in the literal that has LESS trailing whitespace than the
+-- opening delimiters, we don't trim it at all. E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--   code
+--     '''
+-- }}
+-- @@
+--
+--  Is parsed as "  code".
+--
+--  Trim the expected amount of whitespace from a text literal:
+--  >>> trimIndentFromVerbatimBlock 2 "  code\n    indented"
+-- "code\n  indented"
+--
+-- If the text literal has less leading whitespace than the opening delimiters,
+-- leave it as-is
+-- >>> trimIndentFromVerbatimBlock 2 "code\n  indented"
+-- "code\n  indented"
+trimIndentFromVerbatimBlock :: Int -> String -> String
+trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
+  List.intercalate "\n" <$> for (lines txt) \line -> do
+    -- If any 'stripPrefix' fails, we fail and return the unaltered text
+    case List.stripPrefix (replicate leadingSpaces ' ') line of
+      Just stripped -> Just stripped
+      Nothing ->
+        -- If it was a line with all white-space, just use an empty line,
+        -- this can happen easily in editors which trim trailing whitespace.
+        if all isSpace line
+          then Just ""
+          else Nothing
+
+-- Trim leading/trailing whitespace from around delimiters, e.g.
+--
+-- {{
+--   '''___ <- whitespace here including newline
+--   text block
+-- 👇 or here
+-- __'''
+-- }}
+-- >>> trimAroundDelimiters "  \n  text block \n  "
+-- "  text block "
+--
+-- Should leave leading and trailing line untouched if it contains non-whitespace, e.g.:
+--
+-- '''  leading whitespace
+--   text block
+-- trailing whitespace:  '''
+-- >>> trimAroundDelimiters "  leading whitespace\n  text block \ntrailing whitespace:  "
+-- "  leading whitespace\n  text block \ntrailing whitespace:  "
+--
+-- Should keep trailing newline if it's the only thing on the line, e.g.:
+--
+-- '''
+-- newline below
+--
+-- '''
+-- >>> trimAroundDelimiters "\nnewline below\n\n"
+-- "newline below\n\n"
+trimAroundDelimiters :: String -> String
+trimAroundDelimiters txt =
+  txt
+    & ( \s ->
+          List.breakOn "\n" s
+            & \case
+              (prefix, suffix)
+                | all isSpace prefix -> drop 1 suffix
+                | otherwise -> prefix <> suffix
+      )
+    & ( \s ->
+          List.breakOnEnd "\n" s
+            & \case
+              (_prefix, "") -> s
+              (prefix, suffix)
+                | all isSpace suffix -> dropTrailingNewline prefix
+                | otherwise -> prefix <> suffix
+      )
+  where
+    dropTrailingNewline = \case
+      [] -> []
+      (x : xs) -> NonEmpty.init (x NonEmpty.:| xs)
diff --git a/unison-syntax/test/Main.hs b/unison-syntax/test/Main.hs
index 5c13940b0a..b7235f299b 100644
--- a/unison-syntax/test/Main.hs
+++ b/unison-syntax/test/Main.hs
@@ -10,7 +10,7 @@ import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as ShortHash
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText)
-import Unison.Syntax.Lexer
+import Unison.Syntax.Lexer.Unison
 
 main :: IO ()
 main =
diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal
index 853da4c817..0da37d0036 100644
--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@@ -23,9 +23,11 @@ library
       Unison.Syntax.HashQualifiedPrime
       Unison.Syntax.Lexer
       Unison.Syntax.Lexer.Token
+      Unison.Syntax.Lexer.Unison
       Unison.Syntax.Name
       Unison.Syntax.NameSegment
       Unison.Syntax.Parser
+      Unison.Syntax.Parser.Doc
       Unison.Syntax.Parser.Doc.Data
       Unison.Syntax.ReservedWords
       Unison.Syntax.ShortHash

From e9512a69ce03e137758f8adc81dae04c422260d9 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Fri, 26 Jul 2024 12:07:05 -0600
Subject: [PATCH 17/22] Split the Doc parser into multiple functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In general, they map to the constructors of the Doc types, with some wiggle room for now.

It’s probably beneficial to review this commit by ignoring whitespace.
---
 .../src/Unison/Syntax/Lexer/Unison.hs         |   2 +-
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 672 ++++++++++--------
 2 files changed, 375 insertions(+), 299 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
index dcaf9ca6d3..86e75b1afe 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -313,7 +313,7 @@ doc2 = do
           }
     )
     do
-      body <- Doc.untitledSection lexemes' . P.lookAhead $ () <$ lit "}}"
+      body <- Doc.untitledSection . Doc.sectionElem lexemes' . P.lookAhead $ () <$ lit "}}"
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
index 5ca747f204..99122bd5ff 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -1,4 +1,44 @@
-module Unison.Syntax.Parser.Doc where
+module Unison.Syntax.Parser.Doc
+  ( Tree,
+    untitledSection,
+    sectionElem,
+    leaf,
+
+    -- * section elements
+    section,
+    eval,
+    exampleBlock,
+    codeBlock,
+    list,
+    bulletedList,
+    numberedList,
+    paragraph,
+
+    -- * leaves
+    link,
+    namedLink,
+    example,
+    transclude,
+    bold,
+    italic,
+    strikethrough,
+    verbatim,
+    source,
+    foldedSource,
+    evalInline,
+    signatures,
+    signatureInline,
+    group,
+    word,
+
+    -- * other components
+    column',
+    embedTypeLink,
+    embedTermLink,
+    embedSignatureLink,
+    join,
+  )
+where
 
 import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.State qualified as S
@@ -13,7 +53,7 @@ import Text.Megaparsec.Char (char)
 import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Unison.Parser.Ann (Ann, Annotated (..))
-import Unison.Prelude
+import Unison.Prelude hiding (join)
 import Unison.Syntax.Lexer
   ( P,
     ParsingEnv (..),
@@ -37,146 +77,221 @@ type Tree code = Cofree (Top code) Ann
 
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
-untitledSection :: forall code. (Annotated code) => (P () -> P code) -> P () -> P (UntitledSection (Tree code))
-untitledSection code docClose = UntitledSection <$> P.many (sectionElem <* CP.space)
+untitledSection :: P a -> P (UntitledSection a)
+untitledSection a = UntitledSection <$> P.many (a <* CP.space)
+
+wordyKw :: String -> P String
+wordyKw kw = separated wordySep (lit kw)
+
+sectionElem :: (Annotated code) => (P () -> P code) -> P () -> P (Tree code)
+sectionElem code docClose =
+  fmap wrap' $
+    section code docClose
+      <|> P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock)
+      <|> list code docClose
+      <|> paragraph code docClose
+
+paragraph :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
+paragraph code = fmap Paragraph . spaced . leafy code
+
+word :: P end -> P (Leaf code void)
+word closing = fmap Word . tokenP . P.try $ do
+  let end = P.lookAhead $ void (P.satisfy isSpace) <|> void closing
+  word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
+  guard (not $ reserved word || null word)
+  pure word
   where
-    wordyKw kw = separated wordySep (lit kw)
-    sectionElem = section <|> fencedBlock <|> list <|> paragraph
-    paragraph = wrap' . Paragraph <$> spaced leaf
     reserved word = List.isPrefixOf "}}" word || all (== '#') word
 
-    wordy :: P end -> P (Leaf code void)
-    wordy closing = fmap Word . tokenP . P.try $ do
-      let end =
-            P.lookAhead $
-              docClose
-                <|> void (P.satisfy isSpace)
-                <|> void closing
-      word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
-      guard (not $ reserved word || null word)
-      pure word
-
-    leafy closing = groupy closing gs
-      where
-        gs =
-          link
-            <|> externalLink
-            <|> exampleInline
-            <|> expr
-            <|> boldOrItalicOrStrikethrough closing
-            <|> verbatim
-            <|> atDoc
-            <|> wordy closing
-
-    leaf = leafy mzero
-
-    atDoc = src <|> evalInline <|> signature <|> signatureInline
+leaf :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+leaf code closing =
+  do
+    link
+      <|> namedLink code closing
+      <|> example code
+      <|> transclude code
+    <|> bold code closing
+    <|> italic code closing
+    <|> strikethrough code closing
+    <|> verbatim
+    <|> source code
+    <|> foldedSource code
+    <|> evalInline code
+    <|> signatures
+    <|> signatureInline
+    <|> word closing
+
+leafy :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+leafy code closing = do
+  p <- leaf code closing
+  after <- P.optional . P.try $ leafy code closing
+  case after of
+    Nothing -> pure p
+    Just after -> group . pure $ p :| pure after
+
+comma :: P String
+comma = lit "," <* CP.space
+
+source :: (P () -> P code) -> P (Leaf code a)
+source = fmap Source . (lit "@source" *>) . sourceElements
+
+foldedSource :: (P () -> P code) -> P (Leaf code a)
+foldedSource = fmap FoldedSource . (lit "@foldedSource" *>) . sourceElements
+
+sourceElements :: (P () -> P code) -> P (NonEmpty (SourceElement (Leaf code Void)))
+sourceElements code = do
+  _ <- (lit " {" <|> lit "{") *> CP.space
+  s <- sepBy1' srcElem comma
+  _ <- lit "}"
+  pure s
+  where
+    srcElem =
+      SourceElement
+        <$> embedLink
+        <*> ( fmap (fromMaybe []) . P.optional $
+                (lit "@") *> (CP.space *> annotations)
+            )
       where
-        comma = lit "," <* CP.space
-        src =
-          src' Source "@source"
-            <|> src' FoldedSource "@foldedSource"
-        srcElem =
-          SourceElement
-            <$> (typeLink <|> termLink)
-            <*> ( fmap (fromMaybe []) . P.optional $
-                    (lit "@") *> (CP.space *> annotations)
-                )
-          where
-            annotation = fmap Left (tokenP identifierP) <|> fmap Right expr <* CP.space
-            annotations =
-              P.some (EmbedAnnotation <$> annotation)
-        src' name atName = fmap name $ do
-          _ <- lit atName *> (lit " {" <|> lit "{") *> CP.space
-          s <- sepBy1' srcElem comma
-          _ <- lit "}"
-          pure s
-        signature = fmap Signature $ do
-          _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
-          s <- sepBy1' signatureLink comma
-          _ <- lit "}"
-          pure s
-        signatureInline = fmap SignatureInline $ do
-          _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
-          s <- signatureLink
-          _ <- lit "}"
-          pure s
-        evalInline = fmap EvalInline $ do
-          _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
-          let inlineEvalClose = () <$ lit "}"
-          s <- code inlineEvalClose
-          pure s
-
-    typeLink = fmap EmbedTypeLink $ do
-      _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-      tokenP identifierP <* CP.space
-
-    termLink =
-      fmap EmbedTermLink $
-        tokenP identifierP <* CP.space
-
-    signatureLink =
-      fmap EmbedSignatureLink $
-        tokenP identifierP <* CP.space
-
-    groupy closing p = do
-      Token p _ _ <- tokenP p
-      after <- P.optional . P.try $ leafy closing
-      pure $ case after of
-        Nothing -> p
-        Just after ->
-          Group
-            . Join
-            $ p
-              :| pure after
-
-    verbatim =
-      P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
-        Token originalText start stop <- tokenP do
-          -- a single backtick followed by a non-backtick is treated as monospaced
-          let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
-          -- also two or more ' followed by that number of closing '
-          quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
-          P.someTill P.anySingle (lit quotes)
-        let isMultiLine = line start /= line stop
-        if isMultiLine
-          then do
-            let trimmed = (trimAroundDelimiters originalText)
-            let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
-            -- If it's a multi-line verbatim block we trim any whitespace representing
-            -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-            pure . Verbatim $
-              Word $
-                Token txt start stop
-          else
-            pure . Code $
-              Word $
-                Token originalText start stop
-
-    exampleInline =
-      P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
-        fmap Example $ do
-          n <- P.try $ do
-            _ <- lit "`"
-            length <$> P.takeWhile1P (Just "backticks") (== '`')
-          let end = () <$ lit (replicate (n + 1) '`')
-          ex <- CP.space *> code end
-          pure ex
-
-    link =
-      P.label "link (examples: {type List}, {Nat.+})" $
-        fmap Link $
-          P.try $
-            lit "{" *> (typeLink <|> termLink) <* lit "}"
-
-    expr :: P (Leaf code x)
-    expr =
-      fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
-        lit "{{" *> code (() <$ lit "}}")
-
+        annotation = fmap Left (tokenP identifierP) <|> fmap Right (transclude code) <* CP.space
+        annotations =
+          P.some (EmbedAnnotation <$> annotation)
+
+signatures :: P (Leaf code a)
+signatures = fmap Signature $ do
+  _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
+  s <- sepBy1' embedSignatureLink comma
+  _ <- lit "}"
+  pure s
+
+signatureInline :: P (Leaf code a)
+signatureInline = fmap SignatureInline $ do
+  _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
+  s <- embedSignatureLink
+  _ <- lit "}"
+  pure s
+
+evalInline :: (P () -> P a1) -> P (Leaf a1 a2)
+evalInline code = fmap EvalInline $ do
+  _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
+  let inlineEvalClose = void $ lit "}"
+  s <- code inlineEvalClose
+  pure s
+
+embedTypeLink :: P EmbedLink
+embedTypeLink =
+  EmbedTypeLink <$> do
+    _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
+    tokenP identifierP <* CP.space
+
+embedTermLink :: P EmbedLink
+embedTermLink = EmbedTermLink <$> tokenP identifierP <* CP.space
+
+embedSignatureLink :: P EmbedSignatureLink
+embedSignatureLink = EmbedSignatureLink <$> tokenP identifierP <* CP.space
+
+verbatim :: P (Leaf code a)
+verbatim =
+  P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
+    Token originalText start stop <- tokenP do
+      -- a single backtick followed by a non-backtick is treated as monospaced
+      let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
+      -- also two or more ' followed by that number of closing '
+      quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
+      P.someTill P.anySingle (lit quotes)
+    let isMultiLine = line start /= line stop
+    if isMultiLine
+      then do
+        let trimmed = (trimAroundDelimiters originalText)
+        let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+        -- If it's a multi-line verbatim block we trim any whitespace representing
+        -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
+        pure . Verbatim $
+          Word $
+            Token txt start stop
+      else
+        pure . Code $
+          Word $
+            Token originalText start stop
+
+example :: (P () -> P code) -> P (Leaf code void)
+example code =
+  P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
+    fmap Example $ do
+      n <- P.try $ do
+        _ <- lit "`"
+        length <$> P.takeWhile1P (Just "backticks") (== '`')
+      let end = void . lit $ replicate (n + 1) '`'
+      CP.space *> code end
+
+link :: P (Leaf a b)
+link = P.label "link (examples: {type List}, {Nat.+})" $ Link <$> P.try (lit "{" *> embedLink <* lit "}")
+
+transclude :: (P () -> P code) -> P (Leaf code x)
+transclude code =
+  fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+    lit "{{" *> code (void $ lit "}}")
+
+nonNewlineSpaces :: P String
+nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
+  where
     nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
-    nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
 
+eval :: (Annotated code) => (P () -> P code) -> P (Top code (Tree code))
+eval code =
+  Eval <$> do
+    -- commit after seeing that ``` is on its own line
+    fence <- P.try $ do
+      fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+      b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
+      fence <$ guard b
+    CP.space *> code (void $ lit fence)
+
+exampleBlock :: (Annotated code) => (P () -> P code) -> P (Top code (Tree code))
+exampleBlock code =
+  ExampleBlock
+    <$> do
+      void $ lit "@typecheck" <* CP.space
+      fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+      code . void $ lit fence
+
+codeBlock :: P (Top code (Tree code))
+codeBlock = do
+  column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
+  let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
+  fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+  name <-
+    nonNewlineSpaces
+      *> tokenP (P.takeWhile1P Nothing (not . isSpace))
+      <* nonNewlineSpaces
+  _ <- void CP.eol
+  verbatim <-
+    tokenP $
+      uncolumn column tabWidth . trimAroundDelimiters
+        <$> P.someTill P.anySingle ([] <$ lit fence)
+  pure $ CodeBlock name verbatim
+  where
+    uncolumn column tabWidth s =
+      let skip col r | col < 1 = r
+          skip col s@('\t' : _) | col < tabWidth = s
+          skip col ('\t' : r) = skip (col - tabWidth) r
+          skip col (c : r)
+            | isSpace c && (not $ isControl c) =
+                skip (col - 1) r
+          skip _ s = s
+       in List.intercalate "\n" $ skip column <$> lines s
+
+emphasis :: (Annotated code) => Char -> (P () -> P code) -> P () -> P (Tree code)
+emphasis delimiter code closing = do
+  let start = some (P.satisfy (== delimiter))
+  end <- P.try $ do
+    end <- start
+    P.lookAhead (P.satisfy (not . isSpace))
+    pure end
+  wrap' . Paragraph
+    <$> someTill'
+      (leafy code (closing <|> (void $ lit end)) <* void whitespaceWithoutParagraphBreak)
+      (lit end)
+  where
     -- Allows whitespace or a newline, but not more than two newlines in a row.
     whitespaceWithoutParagraphBreak :: P ()
     whitespaceWithoutParagraphBreak = void do
@@ -185,124 +300,92 @@ untitledSection code docClose = UntitledSection <$> P.many (sectionElem <* CP.sp
         Just _ -> void nonNewlineSpaces
         Nothing -> pure ()
 
-    fencedBlock =
-      P.label "block eval (syntax: a fenced code block)" $
-        evalUnison <|> exampleBlock <|> other
-      where
-        evalUnison = fmap (wrap' . Eval) $ do
-          -- commit after seeing that ``` is on its own line
-          fence <- P.try $ do
-            fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-            b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
-            fence <$ guard b
-          CP.space
-            *> code (() <$ lit fence)
-
-        exampleBlock = fmap (wrap' . ExampleBlock) $ do
-          void $ lit "@typecheck" <* CP.space
-          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-          code (() <$ lit fence)
-
-        uncolumn column tabWidth s =
-          let skip col r | col < 1 = r
-              skip col s@('\t' : _) | col < tabWidth = s
-              skip col ('\t' : r) = skip (col - tabWidth) r
-              skip col (c : r)
-                | isSpace c && (not $ isControl c) =
-                    skip (col - 1) r
-              skip _ s = s
-           in List.intercalate "\n" $ skip column <$> lines s
-
-        other = fmap (uncurry $ wrapSimple2 CodeBlock) $ do
-          column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
-          let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
-          fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
-          name <-
-            P.takeWhileP Nothing nonNewlineSpace
-              *> tokenP (P.takeWhile1P Nothing (not . isSpace))
-              <* P.takeWhileP Nothing nonNewlineSpace
-          _ <- void CP.eol
-          verbatim <-
-            tokenP $
-              uncolumn column tabWidth . trimAroundDelimiters
-                <$> P.someTill P.anySingle ([] <$ lit fence)
-          pure (name, verbatim)
-
-    boldOrItalicOrStrikethrough closing = do
-      let start =
-            some (P.satisfy (== '*'))
-              <|> some (P.satisfy (== '_'))
-              <|> some
-                (P.satisfy (== '~'))
-          name s =
-            if take 1 s == "~"
-              then Strikethrough
-              else if take 1 s == "*" then Bold else Italic
-      end <- P.try $ do
-        end <- start
-        P.lookAhead (P.satisfy (not . isSpace))
-        pure end
-      name end . wrap' . Paragraph
-        <$> someTill'
-          (leafy (closing <|> (void $ lit end)) <* whitespaceWithoutParagraphBreak)
-          (lit end)
-
-    externalLink =
-      P.label "hyperlink (example: [link name](https://destination.com))" $
-        fmap (uncurry NamedLink) $ do
-          _ <- lit "["
-          p <- leafies (void $ char ']')
-          _ <- lit "]"
-          _ <- lit "("
-          target <-
-            fmap (Group . Join) $
-              fmap pure link <|> some' (expr <|> wordy (char ')'))
-          _ <- lit ")"
-          pure (p, target)
-
-    -- newline = P.optional (lit "\r") *> lit "\n"
-
-    sp = P.try $ do
-      spaces <- P.takeWhile1P (Just "space") isSpace
-      close <- P.optional (P.lookAhead (lit "}}"))
-      case close of
-        Nothing -> guard $ ok spaces
-        Just _ -> pure ()
-      pure spaces
-      where
-        ok s = length [() | '\n' <- s] < 2
+bold :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+bold code = fmap Bold . emphasis '*' code
+
+italic :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+italic code = fmap Italic . emphasis '_' code
+
+strikethrough :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+strikethrough code = fmap Strikethrough . emphasis '~' code
+
+namedLink :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
+namedLink code docClose =
+  P.label "hyperlink (example: [link name](https://destination.com))" do
+    _ <- lit "["
+    p <- spaced . leafy code . void $ char ']'
+    _ <- lit "]"
+    _ <- lit "("
+    target <- group $ fmap pure link <|> some' (transclude code <|> word (docClose <|> void (char ')')))
+    _ <- lit ")"
+    pure $ NamedLink (wrap' $ Paragraph p) target
+
+sp :: P String
+sp = P.try $ do
+  spaces <- P.takeWhile1P (Just "space") isSpace
+  close <- P.optional (P.lookAhead (lit "}}"))
+  case close of
+    Nothing -> guard $ ok spaces
+    Just _ -> pure ()
+  pure spaces
+  where
+    ok s = length [() | '\n' <- s] < 2
+
+spaced :: P a -> P (NonEmpty a)
+spaced p = some' (p <* P.optional sp)
 
-    spaced p = some' (p <* P.optional sp)
-    leafies close = wrap' . Paragraph <$> spaced (leafy close)
+-- | Not an actual node, but this pattern is referenced in multiple places
+list :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
+list code docClose = bulletedList code docClose <|> numberedList code docClose
 
-    list = bulletedList <|> numberedList
+listSep :: P ()
+listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
 
-    bulletedList = wrap' . BulletedList <$> sepBy1' bullet listSep
-    numberedList = wrap' . NumberedList <$> sepBy1' numberedItem listSep
+bulletedStart :: P (Int, [a])
+bulletedStart = P.try $ do
+  r <- listItemStart $ [] <$ P.satisfy bulletChar
+  P.lookAhead (P.satisfy isSpace)
+  pure r
+  where
+    bulletChar ch = ch == '*' || ch == '-' || ch == '+'
+
+listItemStart :: P a -> P (Int, a)
+listItemStart gutter = P.try $ do
+  nonNewlineSpaces
+  col <- column <$> posP
+  parentCol <- S.gets parentListColumn
+  guard (col > parentCol)
+  (col,) <$> gutter
+
+numberedStart :: P (Int, Token Word64)
+numberedStart = listItemStart $ P.try (tokenP $ LP.decimal <* lit ".")
+
+-- | FIXME: This should take a @`P` a@
+numberedList :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
+numberedList code docClose = NumberedList <$> sepBy1' numberedItem listSep
+  where
+    numberedItem = P.label "numbered list (examples: 1. item1, 8. start numbering at '8')" do
+      (col, s) <- numberedStart
+      (s,) <$> column' code docClose col
 
-    listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
+-- | FIXME: This should take a @`P` a@
+bulletedList :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
+bulletedList code docClose = BulletedList <$> sepBy1' bullet listSep
+  where
+    bullet = P.label "bullet (examples: * item1, - item2)" do
+      (col, _) <- bulletedStart
+      column' code docClose col
 
-    bulletedStart = P.try $ do
-      r <- listItemStart' $ [] <$ P.satisfy bulletChar
-      P.lookAhead (P.satisfy isSpace)
-      pure r
-      where
-        bulletChar ch = ch == '*' || ch == '-' || ch == '+'
-
-    listItemStart' :: P a -> P (Int, a)
-    listItemStart' gutter = P.try $ do
-      nonNewlineSpaces
-      col <- column <$> posP
-      parentCol <- S.gets parentListColumn
-      guard (col > parentCol)
-      (col,) <$> gutter
-
-    numberedStart =
-      listItemStart' $ P.try (tokenP $ LP.decimal <* lit ".")
-
-    listItemParagraph = fmap (wrap' . Paragraph) $ do
-      col <- column <$> posP
-      some' (leaf <* sep col)
+column' :: (Annotated code) => (P () -> P code) -> P () -> Int -> P (Column (Tree code))
+column' code docClose col =
+  Column . wrap'
+    <$> (nonNewlineSpaces *> listItemParagraph)
+    <*> local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list code docClose))
+  where
+    listItemParagraph =
+      Paragraph <$> do
+        col <- column <$> posP
+        some' (leafy code docClose <* sep col)
       where
         -- Trickiness here to support hard line breaks inside of
         -- a bulleted list, so for instance this parses as expected:
@@ -323,55 +406,48 @@ untitledSection code docClose = UntitledSection <$> P.many (sectionElem <* CP.sp
                   (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
           pure ()
 
-    numberedItem = P.label msg $ do
-      (col, s) <- numberedStart
-      (s,)
-        <$> ( fmap (uncurry Column) $ do
-                p <- nonNewlineSpaces *> listItemParagraph
-                subList <-
-                  local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list)
-                pure (p, subList)
-            )
-      where
-        msg = "numbered list (examples: 1. item1, 8. start numbering at '8')"
+newline :: P String
+newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
 
-    bullet = fmap (uncurry Column) . P.label "bullet (examples: * item1, - item2)" $ do
-      (col, _) <- bulletedStart
-      p <- nonNewlineSpaces *> listItemParagraph
-      subList <-
-        local
-          (\e -> e {parentListColumn = col})
-          (P.optional $ listSep *> list)
-      pure (p, subList)
-
-    newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
-
-    -- ## Section title
-    --
-    -- A paragraph under this section.
-    -- Part of the same paragraph. Blanklines separate paragraphs.
-    --
-    -- ### A subsection title
-    --
-    -- A paragraph under this subsection.
-
-    -- # A section title (not a subsection)
-    section :: P (Tree code)
-    section = fmap (wrap' . uncurry Section) $ do
-      ns <- S.gets parentSections
-      hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
-      title <- paragraph <* CP.space
-      let m = length hashes + head ns
-      body <-
-        local (\env -> env {parentSections = (m : (tail ns))}) $
-          P.many (sectionElem <* CP.space)
-      pure $ (title, body)
-
-    wrap' :: Top code (Tree code) -> Tree code
-    wrap' doc = ann doc :< doc
-
-    wrapSimple2 :: (Annotated a, Annotated b) => (a -> b -> Top code (Tree code)) -> a -> b -> Tree code
-    wrapSimple2 fn a b = ann a <> ann b :< fn a b
+-- |
+--
+-- > ## Section title
+-- >
+-- > A paragraph under this section.
+-- > Part of the same paragraph. Blanklines separate paragraphs.
+-- >
+-- > ### A subsection title
+-- >
+-- > A paragraph under this subsection.
+-- >
+-- > # A section title (not a subsection)
+section :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
+section code docClose = do
+  ns <- S.gets parentSections
+  hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
+  title <- paragraph code docClose <* CP.space
+  let m = length hashes + head ns
+  body <-
+    local (\env -> env {parentSections = (m : (tail ns))}) $
+      P.many (sectionElem code docClose <* CP.space)
+  pure $ Section (wrap' title) body
+
+-- | Not an actual node, but this pattern is referenced in multiple places
+embedLink :: P EmbedLink
+embedLink = embedTypeLink <|> embedTermLink
+
+-- | FIXME: This should just take a @`P` code@ and @`P` a@.
+group :: P (NonEmpty (Leaf code a)) -> P (Leaf code a)
+group = fmap Group . join
+
+-- | FIXME: This should just take a @`P` a@
+join :: P (NonEmpty a) -> P (Join a)
+join = fmap Join
+
+-- * utility functions
+
+wrap' :: (Annotated code) => Top code (Tree code) -> Tree code
+wrap' doc = ann doc :< doc
 
 -- | If it's a multi-line verbatim block we trim any whitespace representing
 -- indentation from the pretty-printer.
@@ -425,7 +501,7 @@ trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
           then Just ""
           else Nothing
 
--- Trim leading/trailing whitespace from around delimiters, e.g.
+-- | Trim leading/trailing whitespace from around delimiters, e.g.
 --
 -- {{
 --   '''___ <- whitespace here including newline

From a6528ac351c1b8f7e51363040d2f710e75a4a1ee Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Fri, 26 Jul 2024 22:56:22 -0600
Subject: [PATCH 18/22] Generalize the Doc parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is now completely[^1] independent of the Unison language. The parser takes a few parsers as arguments: one for
identifiers, one for code, and one to indicate the end of the Doc block.

[^1]: There is one last bit to be removed in the next commit – Doc still looks for `type` or `ability` to identify type
links.
---
 .../src/Unison/Syntax/TermParser.hs           |  16 +-
 unison-syntax/src/Unison/Parser/Ann.hs        |   4 +
 unison-syntax/src/Unison/Syntax/Lexer.hs      | 170 +-------
 .../src/Unison/Syntax/Lexer/Token.hs          |   6 +-
 .../src/Unison/Syntax/Lexer/Unison.hs         | 117 +++++-
 unison-syntax/src/Unison/Syntax/Parser.hs     |   2 +-
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 364 +++++++++++-------
 .../src/Unison/Syntax/Parser/Doc/Data.hs      |  72 ++--
 8 files changed, 412 insertions(+), 339 deletions(-)

diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 4c3069b9ff..8d0195410a 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -530,7 +530,7 @@ doc2Block = do
     docUntitledSection ann (Doc.UntitledSection tops) =
       Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops
 
-    docTop :: Doc.Top [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docTop :: Doc.Top (HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docTop d = case d of
       Doc.Section title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
       Doc.Eval code ->
@@ -558,7 +558,7 @@ doc2Block = do
     docColumn d@(Doc.Column para sublist) =
       Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist
 
-    docLeaf :: Doc.Leaf [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docLeaf :: Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docLeaf d = case d of
       Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
       Doc.NamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
@@ -590,7 +590,7 @@ doc2Block = do
         Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList
           <$> traverse docLeaf leaves
 
-    docEmbedLink :: Doc.EmbedLink -> TermP v m
+    docEmbedLink :: Doc.EmbedLink (HQ'.HashQualified Name) -> TermP v m
     docEmbedLink d = case d of
       Doc.EmbedTypeLink ident ->
         Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload
@@ -598,17 +598,21 @@ doc2Block = do
       Doc.EmbedTermLink ident ->
         Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docSourceElement :: Doc.SourceElement (Doc.Leaf [L.Token L.Lexeme] Void) -> TermP v m
+    docSourceElement ::
+      Doc.SourceElement (HQ'.HashQualified Name) (Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      TermP v m
     docSourceElement d@(Doc.SourceElement link anns) = do
       link' <- docEmbedLink link
       anns' <- traverse docEmbedAnnotation anns
       pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns']
 
-    docEmbedSignatureLink :: Doc.EmbedSignatureLink -> TermP v m
+    docEmbedSignatureLink :: Doc.EmbedSignatureLink (HQ'.HashQualified Name) -> TermP v m
     docEmbedSignatureLink d@(Doc.EmbedSignatureLink ident) =
       Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
 
-    docEmbedAnnotation :: Doc.EmbedAnnotation (Doc.Leaf [L.Token L.Lexeme] Void) -> TermP v m
+    docEmbedAnnotation ::
+      Doc.EmbedAnnotation (HQ'.HashQualified Name) (Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      TermP v m
     docEmbedAnnotation d@(Doc.EmbedAnnotation a) =
       -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
       -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
diff --git a/unison-syntax/src/Unison/Parser/Ann.hs b/unison-syntax/src/Unison/Parser/Ann.hs
index 961bbcb30c..e4b361d148 100644
--- a/unison-syntax/src/Unison/Parser/Ann.hs
+++ b/unison-syntax/src/Unison/Parser/Ann.hs
@@ -4,6 +4,7 @@
 
 module Unison.Parser.Ann where
 
+import Control.Comonad.Cofree (Cofree ((:<)))
 import Data.List.NonEmpty (NonEmpty)
 import Data.Void (absurd)
 import Unison.Lexer.Pos qualified as L
@@ -100,3 +101,6 @@ instance (Annotated a) => Annotated (Maybe a) where
 
 instance Annotated Void where
   ann = absurd
+
+instance (Annotated a) => Annotated (Cofree f a) where
+  ann (a :< _) = ann a
diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs
index cfd932cd7e..5e6d18293f 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@@ -1,12 +1,8 @@
-{-# LANGUAGE TemplateHaskell #-}
-{-# OPTIONS_GHC -Wno-orphans #-}
-
 -- | This currently contains a mix of general lexing utilities and identifier-y lexers.
 module Unison.Syntax.Lexer
   ( Token (..),
     Line,
     Column,
-    Err (..),
     Pos (..),
     touches,
 
@@ -15,16 +11,10 @@ module Unison.Syntax.Lexer
     wordyIdStartChar,
     symbolyIdChar,
 
-    -- * new exports
-    BlockName,
-    Layout,
-    ParsingEnv (..),
-    P,
+    -- * other utils
     local,
-    parseFailure,
     space,
     lit,
-    err,
     commitAfter2,
     (<+>),
     some',
@@ -32,99 +22,35 @@ module Unison.Syntax.Lexer
     sepBy1',
     separated,
     wordySep,
-    identifierP,
-    wordyIdSegP,
-    shortHashP,
-    topBlockName,
     pop,
     typeOrAbilityAlt,
-    typeModifiersAlt,
     inc,
   )
 where
 
-import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.State qualified as S
 import Data.Char (isSpace)
 import Data.List.NonEmpty (NonEmpty ((:|)))
-import Data.Text qualified as Text
 import Text.Megaparsec qualified as P
 import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
-import Text.Megaparsec.Error qualified as EP
-import Text.Megaparsec.Internal qualified as PI
-import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Lexer.Pos (Column, Line, Pos (Pos), column, line)
-import Unison.Name (Name)
-import Unison.NameSegment (NameSegment)
-import Unison.Parser.Ann (Annotated (..))
 import Unison.Prelude
-import Unison.ShortHash (ShortHash)
-import Unison.Syntax.Lexer.Token (Token (..), posP)
-import Unison.Syntax.Name qualified as Name (nameP)
+import Unison.Syntax.Lexer.Token (Token (..))
 import Unison.Syntax.NameSegment (symbolyIdChar, wordyIdChar, wordyIdStartChar)
-import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
-import Unison.Syntax.ReservedWords (typeModifiers, typeOrAbility)
-import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
-
-instance (Annotated a) => Annotated (Cofree f a) where
-  ann (a :< _) = ann a
-
-type BlockName = String
-
-type Layout = [(BlockName, Column)]
-
-data ParsingEnv = ParsingEnv
-  { -- | layout stack
-    layout :: !Layout,
-    -- | `Just b` if a block of type `b` is being opened
-    opening :: Maybe BlockName,
-    -- | are we inside a construct that uses layout?
-    inLayout :: Bool,
-    -- | Use a stack to remember the parent section and allow docSections within docSections.
-    -- - 1 means we are inside a # Heading 1
-    parentSections :: [Int],
-    -- | 4 means we are inside a list starting at the fourth column
-    parentListColumn :: Int
-  }
-  deriving (Show)
-
-type P = P.ParsecT (Token Err) String (S.State ParsingEnv)
-
-local :: (ParsingEnv -> ParsingEnv) -> P a -> P a
+import Unison.Syntax.ReservedWords (typeOrAbility)
+
+local :: (P.MonadParsec e s' m, S.MonadState s m) => (s -> s) -> m a -> m a
 local f p = do
   env0 <- S.get
   S.put (f env0)
   e <- P.observing p
   S.put env0
   case e of
-    Left e -> parseFailure e
+    Left e -> P.parseError e
     Right a -> pure a
 
-parseFailure :: EP.ParseError [Char] (Token Err) -> P a
-parseFailure e = PI.ParsecT $ \s _ _ _ eerr -> eerr e s
-
-data Err
-  = ReservedWordyId String
-  | InvalidSymbolyId String
-  | ReservedSymbolyId String
-  | InvalidShortHash String
-  | InvalidBytesLiteral String
-  | InvalidHexLiteral
-  | InvalidOctalLiteral
-  | Both Err Err
-  | MissingFractional String -- ex `1.` rather than `1.04`
-  | MissingExponent String -- ex `1e` rather than `1e3`
-  | UnknownLexeme
-  | TextLiteralMissingClosingQuote String
-  | InvalidEscapeCharacter Char
-  | LayoutError
-  | CloseWithoutMatchingOpen String String -- open, close
-  | UnexpectedDelimiter String
-  | UnexpectedTokens String -- Catch-all for all other lexer errors, representing some unexpected tokens.
-  deriving stock (Eq, Ord, Show) -- richer algebra
-
-space :: P ()
+space :: (P.MonadParsec e String m) => m ()
 space =
   LP.space
     CP.space1
@@ -133,92 +59,42 @@ space =
   where
     fold = P.try $ lit "---" *> P.takeRest *> pure ()
 
-lit :: String -> P String
+lit :: (P.MonadParsec e String m) => String -> m String
 lit = P.try . LP.symbol (pure ())
 
--- Committed failure
-err :: Pos -> Err -> P x
-err start t = do
-  stop <- posP
-  -- This consumes a character and therefore produces committed failure,
-  -- so `err s t <|> p2` won't try `p2`
-  _ <- void P.anySingle <|> P.eof
-  P.customFailure (Token t start stop)
-
-{-
-commitAfter :: P a -> (a -> P b) -> P b
-commitAfter a f = do
-  a <- P.try a
-  f a
--}
-
-commitAfter2 :: P a -> P b -> (a -> b -> P c) -> P c
+commitAfter2 :: (P.MonadParsec e s m) => m a -> m b -> (a -> b -> m c) -> m c
 commitAfter2 a b f = do
   (a, b) <- P.try $ liftA2 (,) a b
   f a b
 
 infixl 2 <+>
 
-(<+>) :: (Monoid a) => P a -> P a -> P a
-p1 <+> p2 = do a1 <- p1; a2 <- p2; pure (a1 <> a2)
+(<+>) :: (Applicative f, Monoid a) => f a -> f a -> f a
+(<+>) = liftA2 (<>)
 
 -- | Like `P.some`, but returns an actual `NonEmpty`.
-some' :: P a -> P (NonEmpty a)
+some' :: (P.MonadParsec e s m) => m a -> m (NonEmpty a)
 some' p = liftA2 (:|) p $ many p
 
 -- | Like `P.someTill`, but returns an actual `NonEmpty`.
-someTill' :: P a -> P end -> P (NonEmpty a)
+someTill' :: (P.MonadParsec e s m) => m a -> m end -> m (NonEmpty a)
 someTill' p end = liftA2 (:|) p $ P.manyTill p end
 
 -- | Like `P.sepBy1`, but returns an actual `NonEmpty`.
-sepBy1' :: P a -> P sep -> P (NonEmpty a)
+sepBy1' :: (P.MonadParsec e s m) => m a -> m sep -> m (NonEmpty a)
 sepBy1' p sep = liftA2 (:|) p . many $ sep *> p
 
-separated :: (Char -> Bool) -> P a -> P a
+separated :: (P.MonadParsec e s m) => (P.Token s -> Bool) -> m a -> m a
 separated ok p = P.try $ p <* P.lookAhead (void (P.satisfy ok) <|> P.eof)
 
 wordySep :: Char -> Bool
 wordySep c = isSpace c || not (wordyIdChar c)
 
--- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
--- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
---
--- Examples:
---
---   foo
---   .foo.++.doc
---   `.`.`..`     (This is a two-segment identifier without a leading dot: "." then "..")
-identifierP :: P (HQ'.HashQualified Name)
-identifierP = do
-  P.label "identifier (ex: abba1, snake_case, .foo.bar#xyz, .foo.++#xyz, or 🌻)" do
-    name <- PI.withParsecT (fmap nameSegmentParseErrToErr) Name.nameP
-    P.optional shortHashP <&> \case
-      Nothing -> HQ'.fromName name
-      Just shorthash -> HQ'.HashQualified name shorthash
-  where
-    nameSegmentParseErrToErr :: NameSegment.ParseErr -> Err
-    nameSegmentParseErrToErr = \case
-      NameSegment.ReservedOperator s -> ReservedSymbolyId (Text.unpack s)
-      NameSegment.ReservedWord s -> ReservedWordyId (Text.unpack s)
-
-wordyIdSegP :: P NameSegment
-wordyIdSegP =
-  PI.withParsecT (fmap (ReservedWordyId . Text.unpack)) NameSegment.wordyP
-
-shortHashP :: P ShortHash
-shortHashP =
-  PI.withParsecT (fmap (InvalidShortHash . Text.unpack)) ShortHash.shortHashP
-
 -- `True` if the tokens are adjacent, with no space separating the two
 touches :: Token a -> Token b -> Bool
 touches (end -> t) (start -> t2) =
   line t == line t2 && column t == column t2
 
--- todo: make Layout a NonEmpty
-topBlockName :: Layout -> Maybe BlockName
-topBlockName [] = Nothing
-topBlockName ((name, _) : _) = Just name
-
 pop :: [a] -> [a]
 pop = drop 1
 
@@ -226,21 +102,5 @@ typeOrAbilityAlt :: (Alternative f) => (Text -> f a) -> f a
 typeOrAbilityAlt f =
   asum $ map f (toList typeOrAbility)
 
-typeModifiersAlt :: (Alternative f) => (Text -> f a) -> f a
-typeModifiersAlt f =
-  asum $ map f (toList typeModifiers)
-
 inc :: Pos -> Pos
 inc (Pos line col) = Pos line (col + 1)
-
-instance EP.ShowErrorComponent (Token Err) where
-  showErrorComponent (Token err _ _) = go err
-    where
-      go = \case
-        UnexpectedTokens msg -> msg
-        CloseWithoutMatchingOpen open close -> "I found a closing " <> close <> " but no matching " <> open <> "."
-        Both e1 e2 -> go e1 <> "\n" <> go e2
-        LayoutError -> "Indentation error"
-        TextLiteralMissingClosingQuote s -> "This text literal missing a closing quote: " <> excerpt s
-        e -> show e
-      excerpt s = if length s < 15 then s else take 15 s <> "..."
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Token.hs b/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
index e29f276c5e..f778dd66c0 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
@@ -6,7 +6,7 @@ module Unison.Syntax.Lexer.Token
 where
 
 import Data.Text qualified as Text
-import Text.Megaparsec (ParsecT, TraversableStream)
+import Text.Megaparsec (MonadParsec, TraversableStream)
 import Text.Megaparsec qualified as P
 import Unison.Lexer.Pos (Pos (Pos))
 import Unison.Parser.Ann (Ann (Ann), Annotated (..))
@@ -43,14 +43,14 @@ instance Applicative Token where
 instance P.ShowErrorComponent (Token Text) where
   showErrorComponent = Text.unpack . payload
 
-tokenP :: (Ord e, TraversableStream s) => ParsecT e s m a -> ParsecT e s m (Token a)
+tokenP :: (Ord e, TraversableStream s, MonadParsec e s m) => m a -> m (Token a)
 tokenP p = do
   start <- posP
   payload <- p
   end <- posP
   pure Token {payload, start, end}
 
-posP :: (Ord e, TraversableStream s) => ParsecT e s m Pos
+posP :: (Ord e, TraversableStream s, MonadParsec e s m) => m Pos
 posP = do
   p <- P.getSourcePos
   pure (Pos (P.unPos (P.sourceLine p)) (P.unPos (P.sourceColumn p)))
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
index 86e75b1afe..98112c2124 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE TemplateHaskell #-}
-{-# OPTIONS_GHC -Wno-orphans #-}
-
 module Unison.Syntax.Lexer.Unison
   ( Token (..),
     Line,
@@ -44,9 +41,11 @@ import Text.Megaparsec.Char (char)
 import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Text.Megaparsec.Error qualified as EP
+import Text.Megaparsec.Internal qualified as PI
 import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Name (Name)
 import Unison.Name qualified as Name
+import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
 import Unison.NameSegment.Internal qualified as NameSegment
 import Unison.Prelude
@@ -55,13 +54,51 @@ import Unison.ShortHash qualified as SH
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
 import Unison.Syntax.Lexer
 import Unison.Syntax.Lexer.Token (posP, tokenP)
-import Unison.Syntax.Name qualified as Name (isSymboly, toText, unsafeParseText)
+import Unison.Syntax.Name qualified as Name (isSymboly, nameP, toText, unsafeParseText)
+import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
 import Unison.Syntax.Parser.Doc qualified as Doc
 import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
+import Unison.Syntax.ShortHash qualified as ShortHash (shortHashP)
 import Unison.Util.Bytes qualified as Bytes
 import Unison.Util.Monoid (intercalateMap)
 
+type BlockName = String
+
+type Layout = [(BlockName, Column)]
+
+data ParsingEnv = ParsingEnv
+  { -- | layout stack
+    layout :: !Layout,
+    -- | `Just b` if a block of type `b` is being opened
+    opening :: Maybe BlockName,
+    -- | are we inside a construct that uses layout?
+    inLayout :: Bool
+  }
+  deriving (Show)
+
+type P = P.ParsecT (Token Err) String (S.State ParsingEnv)
+
+data Err
+  = ReservedWordyId String
+  | InvalidSymbolyId String
+  | ReservedSymbolyId String
+  | InvalidShortHash String
+  | InvalidBytesLiteral String
+  | InvalidHexLiteral
+  | InvalidOctalLiteral
+  | Both Err Err
+  | MissingFractional String -- ex `1.` rather than `1.04`
+  | MissingExponent String -- ex `1e` rather than `1e3`
+  | UnknownLexeme
+  | TextLiteralMissingClosingQuote String
+  | InvalidEscapeCharacter Char
+  | LayoutError
+  | CloseWithoutMatchingOpen String String -- open, close
+  | UnexpectedDelimiter String
+  | UnexpectedTokens String -- Catch-all for all other lexer errors, representing some unexpected tokens.
+  deriving stock (Eq, Ord, Show) -- richer algebra
+
 -- Design principle:
 --   `[Lexeme]` should be sufficient information for parsing without
 --   further knowledge of spacing or indentation levels
@@ -80,11 +117,20 @@ data Lexeme
   | Bytes Bytes.Bytes -- bytes literals
   | Hash ShortHash -- hash literals
   | Err Err
-  | Doc (Doc.UntitledSection (Doc.Tree [Token Lexeme]))
+  | Doc (Doc.UntitledSection (Doc.Tree (HQ'.HashQualified Name) [Token Lexeme]))
   deriving stock (Eq, Show, Ord)
 
 type IsVirtual = Bool -- is it a virtual semi or an actual semi?
 
+-- Committed failure
+err :: (P.TraversableStream s, P.MonadParsec (Token Err) s m) => Pos -> Err -> m x
+err start t = do
+  stop <- posP
+  -- This consumes a character and therefore produces committed failure,
+  -- so `err s t <|> p2` won't try `p2`
+  _ <- void P.anySingle <|> P.eof
+  P.customFailure (Token t start stop)
+
 token :: P Lexeme -> P [Token Lexeme]
 token = token' (\a start end -> [Token a start end])
 
@@ -230,7 +276,7 @@ lexer scope rem =
       (P.EndOfInput) -> "end of input"
     customErrs es = [Err <$> e | P.ErrorCustom e <- toList es]
     toPos (P.SourcePos _ line col) = Pos (P.unPos line) (P.unPos col)
-    env0 = ParsingEnv [] (Just scope) True [0] 0
+    env0 = ParsingEnv [] (Just scope) True
 
 -- | hacky postprocessing pass to do some cleanup of stuff that's annoying to
 -- fix without adding more state to the lexer:
@@ -306,14 +352,9 @@ doc2 = do
   env0 <- S.get
   -- Disable layout while parsing the doc block and reset the section number
   (docTok, closeTok) <- local
-    ( \env ->
-        env
-          { inLayout = False,
-            parentSections = 0 : (parentSections env0)
-          }
-    )
+    (\env -> env {inLayout = False})
     do
-      body <- Doc.untitledSection . Doc.sectionElem lexemes' . P.lookAhead $ () <$ lit "}}"
+      body <- Doc.doc identifierP lexemes' . P.lookAhead $ () <$ lit "}}"
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
@@ -674,6 +715,27 @@ tok p = do
   token <- tokenP p
   pure [token]
 
+-- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
+-- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
+--
+-- Examples:
+--
+--   foo
+--   .foo.++.doc
+--   `.`.`..`     (This is a two-segment identifier without a leading dot: "." then "..")
+identifierP :: (Monad m) => P.ParsecT (Token Err) String m (HQ'.HashQualified Name)
+identifierP = do
+  P.label "identifier (ex: abba1, snake_case, .foo.bar#xyz, .foo.++#xyz, or 🌻)" do
+    name <- PI.withParsecT (fmap nameSegmentParseErrToErr) Name.nameP
+    P.optional shortHashP <&> \case
+      Nothing -> HQ'.fromName name
+      Just shorthash -> HQ'.HashQualified name shorthash
+  where
+    nameSegmentParseErrToErr :: NameSegment.ParseErr -> Err
+    nameSegmentParseErrToErr = \case
+      NameSegment.ReservedOperator s -> ReservedSymbolyId (Text.unpack s)
+      NameSegment.ReservedWord s -> ReservedWordyId (Text.unpack s)
+
 -- An identifier is a non-empty dot-delimited list of segments, with an optional leading dot, where each segment is
 -- symboly (comprised of only symbols) or wordy (comprised of only alphanums).
 --
@@ -691,6 +753,14 @@ identifierLexeme name =
     then SymbolyId name
     else WordyId name
 
+wordyIdSegP :: P.ParsecT (Token Err) String m NameSegment
+wordyIdSegP =
+  PI.withParsecT (fmap (ReservedWordyId . Text.unpack)) NameSegment.wordyP
+
+shortHashP :: P.ParsecT (Token Err) String m ShortHash
+shortHashP =
+  PI.withParsecT (fmap (InvalidShortHash . Text.unpack)) ShortHash.shortHashP
+
 blockDelimiter :: [String] -> P String -> P [Token Lexeme]
 blockDelimiter open closeP = do
   Token close pos1 pos2 <- tokenP closeP
@@ -739,6 +809,11 @@ top :: Layout -> Column
 top [] = 1
 top ((_, h) : _) = h
 
+-- todo: make Layout a NonEmpty
+topBlockName :: Layout -> Maybe BlockName
+topBlockName [] = Nothing
+topBlockName ((name, _) : _) = Just name
+
 topLeftCorner :: Pos
 topLeftCorner = Pos 1 1
 
@@ -855,6 +930,10 @@ showEscapeChar :: Char -> Maybe Char
 showEscapeChar c =
   Map.lookup c (Map.fromList [(x, y) | (y, x) <- escapeChars])
 
+typeModifiersAlt :: (Alternative f) => (Text -> f a) -> f a
+typeModifiersAlt f =
+  asum $ map f (toList typeModifiers)
+
 debugFilePreParse :: FilePath -> IO ()
 debugFilePreParse file = putStrLn . debugPreParse . preParse . lexer file . Text.unpack =<< readUtf8 file
 
@@ -877,6 +956,18 @@ debugPreParse ts = show $ payload <$> ts
 debugPreParse' :: String -> String
 debugPreParse' = debugPreParse . preParse . lexer "debugPreParse"
 
+instance EP.ShowErrorComponent (Token Err) where
+  showErrorComponent (Token err _ _) = go err
+    where
+      go = \case
+        UnexpectedTokens msg -> msg
+        CloseWithoutMatchingOpen open close -> "I found a closing " <> close <> " but no matching " <> open <> "."
+        Both e1 e2 -> go e1 <> "\n" <> go e2
+        LayoutError -> "Indentation error"
+        TextLiteralMissingClosingQuote s -> "This text literal missing a closing quote: " <> excerpt s
+        e -> show e
+      excerpt s = if length s < 15 then s else take 15 s <> "..."
+
 instance P.VisualStream [Token Lexeme] where
   showTokens _ xs =
     join . Nel.toList . S.evalState (traverse go xs) . end $ Nel.head xs
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index 098caab1b6..fac55142de 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -401,7 +401,7 @@ string = queryToken getString
     getString (L.Textual s) = Just (Text.pack s)
     getString _ = Nothing
 
-doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree [L.Token L.Lexeme])))
+doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree (HQ'.HashQualified Name) [L.Token L.Lexeme])))
 doc = queryToken \case
   L.Doc d -> pure d
   _ -> Nothing
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
index 99122bd5ff..4009c30dec 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -1,5 +1,17 @@
+-- | The parser for Unison’s @Doc@ syntax.
+--
+--   This is completely independent of the Unison language, and requires a couple parsers to be passed in to then
+--   provide a parser for @Doc@ applied to any host language.
+--
+-- - an identifer parser
+-- - a code parser (that accepts a termination parser)
+-- - a termination parser (only used for lookahead), for this parser to know when to give up
+--
+-- Each of those parsers is expected to satisfy @(`Ord` e, `P.MonadParsec` e `String` m)@.
 module Unison.Syntax.Parser.Doc
   ( Tree,
+    initialState,
+    doc,
     untitledSection,
     sectionElem,
     leaf,
@@ -55,10 +67,7 @@ import Text.Megaparsec.Char.Lexer qualified as LP
 import Unison.Parser.Ann (Ann, Annotated (..))
 import Unison.Prelude hiding (join)
 import Unison.Syntax.Lexer
-  ( P,
-    ParsingEnv (..),
-    column,
-    identifierP,
+  ( column,
     line,
     lit,
     local,
@@ -73,28 +82,58 @@ import Unison.Syntax.Lexer
 import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP)
 import Unison.Syntax.Parser.Doc.Data
 
-type Tree code = Cofree (Top code) Ann
+type Tree ident code = Cofree (Top ident code) Ann
+
+data ParsingEnv = ParsingEnv
+  { -- | Use a stack to remember the parent section and allow docSections within docSections.
+    -- - 1 means we are inside a # Heading 1
+    parentSections :: [Int],
+    -- | 4 means we are inside a list starting at the fourth column
+    parentListColumn :: Int
+  }
+  deriving (Show)
+
+initialState :: ParsingEnv
+initialState = ParsingEnv [0] 0
+
+doc ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (UntitledSection (Tree ident code))
+doc ident code = flip S.evalStateT initialState . untitledSection . sectionElem ident code
 
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
-untitledSection :: P a -> P (UntitledSection a)
+untitledSection :: (P.MonadParsec e String m) => m a -> m (UntitledSection a)
 untitledSection a = UntitledSection <$> P.many (a <* CP.space)
 
-wordyKw :: String -> P String
+wordyKw :: (P.MonadParsec e String m) => String -> m String
 wordyKw kw = separated wordySep (lit kw)
 
-sectionElem :: (Annotated code) => (P () -> P code) -> P () -> P (Tree code)
-sectionElem code docClose =
+sectionElem ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  S.StateT ParsingEnv m (Tree ident code)
+sectionElem ident code docClose =
   fmap wrap' $
-    section code docClose
-      <|> P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock)
-      <|> list code docClose
-      <|> paragraph code docClose
-
-paragraph :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
-paragraph code = fmap Paragraph . spaced . leafy code
-
-word :: P end -> P (Leaf code void)
+    section ident code docClose
+      <|> lift (P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock))
+      <|> list ident code docClose
+      <|> lift (paragraph ident code docClose)
+
+paragraph ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Top ident code (Tree ident code))
+paragraph ident code = fmap Paragraph . spaced . leafy ident code
+
+word :: (Ord e, P.MonadParsec e String m) => m end -> m (Leaf ident code void)
 word closing = fmap Word . tokenP . P.try $ do
   let end = P.lookAhead $ void (P.satisfy isSpace) <|> void closing
   word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
@@ -103,43 +142,56 @@ word closing = fmap Word . tokenP . P.try $ do
   where
     reserved word = List.isPrefixOf "}}" word || all (== '#') word
 
-leaf :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-leaf code closing =
-  do
-    link
-      <|> namedLink code closing
-      <|> example code
-      <|> transclude code
-    <|> bold code closing
-    <|> italic code closing
-    <|> strikethrough code closing
+leaf ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+leaf ident code closing =
+  link ident
+    <|> namedLink ident code closing
+    <|> example code
+    <|> transclude code
+    <|> bold ident code closing
+    <|> italic ident code closing
+    <|> strikethrough ident code closing
     <|> verbatim
-    <|> source code
-    <|> foldedSource code
+    <|> source ident code
+    <|> foldedSource ident code
     <|> evalInline code
-    <|> signatures
-    <|> signatureInline
+    <|> signatures ident
+    <|> signatureInline ident
     <|> word closing
 
-leafy :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-leafy code closing = do
-  p <- leaf code closing
-  after <- P.optional . P.try $ leafy code closing
+leafy ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+leafy ident code closing = do
+  p <- leaf ident code closing
+  after <- P.optional . P.try $ leafy ident code closing
   case after of
     Nothing -> pure p
     Just after -> group . pure $ p :| pure after
 
-comma :: P String
+comma :: (P.MonadParsec e String m) => m String
 comma = lit "," <* CP.space
 
-source :: (P () -> P code) -> P (Leaf code a)
-source = fmap Source . (lit "@source" *>) . sourceElements
+source :: (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m (Leaf ident code a)
+source ident = fmap Source . (lit "@source" *>) . sourceElements ident
 
-foldedSource :: (P () -> P code) -> P (Leaf code a)
-foldedSource = fmap FoldedSource . (lit "@foldedSource" *>) . sourceElements
+foldedSource :: (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m (Leaf ident code a)
+foldedSource ident = fmap FoldedSource . (lit "@foldedSource" *>) . sourceElements ident
 
-sourceElements :: (P () -> P code) -> P (NonEmpty (SourceElement (Leaf code Void)))
-sourceElements code = do
+sourceElements ::
+  (Ord e, P.MonadParsec e String m) =>
+  m ident ->
+  (m () -> m code) ->
+  m (NonEmpty (SourceElement ident (Leaf ident code Void)))
+sourceElements ident code = do
   _ <- (lit " {" <|> lit "{") *> CP.space
   s <- sepBy1' srcElem comma
   _ <- lit "}"
@@ -147,49 +199,48 @@ sourceElements code = do
   where
     srcElem =
       SourceElement
-        <$> embedLink
+        <$> embedLink ident
         <*> ( fmap (fromMaybe []) . P.optional $
                 (lit "@") *> (CP.space *> annotations)
             )
       where
-        annotation = fmap Left (tokenP identifierP) <|> fmap Right (transclude code) <* CP.space
-        annotations =
-          P.some (EmbedAnnotation <$> annotation)
+        annotation = fmap Left (tokenP ident) <|> fmap Right (transclude code) <* CP.space
+        annotations = P.some (EmbedAnnotation <$> annotation)
 
-signatures :: P (Leaf code a)
-signatures = fmap Signature $ do
+signatures :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+signatures ident = fmap Signature $ do
   _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
-  s <- sepBy1' embedSignatureLink comma
+  s <- sepBy1' (embedSignatureLink ident) comma
   _ <- lit "}"
   pure s
 
-signatureInline :: P (Leaf code a)
-signatureInline = fmap SignatureInline $ do
+signatureInline :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+signatureInline ident = fmap SignatureInline $ do
   _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
-  s <- embedSignatureLink
+  s <- embedSignatureLink ident
   _ <- lit "}"
   pure s
 
-evalInline :: (P () -> P a1) -> P (Leaf a1 a2)
+evalInline :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code a)
 evalInline code = fmap EvalInline $ do
   _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
   let inlineEvalClose = void $ lit "}"
   s <- code inlineEvalClose
   pure s
 
-embedTypeLink :: P EmbedLink
-embedTypeLink =
+embedTypeLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
+embedTypeLink ident =
   EmbedTypeLink <$> do
     _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-    tokenP identifierP <* CP.space
+    tokenP ident <* CP.space
 
-embedTermLink :: P EmbedLink
-embedTermLink = EmbedTermLink <$> tokenP identifierP <* CP.space
+embedTermLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
+embedTermLink ident = EmbedTermLink <$> tokenP ident <* CP.space
 
-embedSignatureLink :: P EmbedSignatureLink
-embedSignatureLink = EmbedSignatureLink <$> tokenP identifierP <* CP.space
+embedSignatureLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedSignatureLink ident)
+embedSignatureLink ident = EmbedSignatureLink <$> tokenP ident <* CP.space
 
-verbatim :: P (Leaf code a)
+verbatim :: (Ord e, P.MonadParsec e String m) => m (Leaf ident code a)
 verbatim =
   P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
     Token originalText start stop <- tokenP do
@@ -199,21 +250,17 @@ verbatim =
       quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
       P.someTill P.anySingle (lit quotes)
     let isMultiLine = line start /= line stop
-    if isMultiLine
-      then do
-        let trimmed = (trimAroundDelimiters originalText)
-        let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
-        -- If it's a multi-line verbatim block we trim any whitespace representing
-        -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
-        pure . Verbatim $
-          Word $
-            Token txt start stop
-      else
-        pure . Code $
-          Word $
-            Token originalText start stop
-
-example :: (P () -> P code) -> P (Leaf code void)
+    pure
+      if isMultiLine
+        then
+          let trimmed = (trimAroundDelimiters originalText)
+              txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+           in -- If it's a multi-line verbatim block we trim any whitespace representing
+              -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
+              Verbatim . Word $ Token txt start stop
+        else Code . Word $ Token originalText start stop
+
+example :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code void)
 example code =
   P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
     fmap Example $ do
@@ -223,20 +270,20 @@ example code =
       let end = void . lit $ replicate (n + 1) '`'
       CP.space *> code end
 
-link :: P (Leaf a b)
-link = P.label "link (examples: {type List}, {Nat.+})" $ Link <$> P.try (lit "{" *> embedLink <* lit "}")
+link :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+link ident = P.label "link (examples: {type List}, {Nat.+})" $ Link <$> P.try (lit "{" *> embedLink ident <* lit "}")
 
-transclude :: (P () -> P code) -> P (Leaf code x)
+transclude :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code a)
 transclude code =
   fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
     lit "{{" *> code (void $ lit "}}")
 
-nonNewlineSpaces :: P String
+nonNewlineSpaces :: (P.MonadParsec e String m) => m String
 nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
   where
     nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
 
-eval :: (Annotated code) => (P () -> P code) -> P (Top code (Tree code))
+eval :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code))
 eval code =
   Eval <$> do
     -- commit after seeing that ``` is on its own line
@@ -246,7 +293,7 @@ eval code =
       fence <$ guard b
     CP.space *> code (void $ lit fence)
 
-exampleBlock :: (Annotated code) => (P () -> P code) -> P (Top code (Tree code))
+exampleBlock :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code))
 exampleBlock code =
   ExampleBlock
     <$> do
@@ -254,7 +301,7 @@ exampleBlock code =
       fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
       code . void $ lit fence
 
-codeBlock :: P (Top code (Tree code))
+codeBlock :: (Ord e, P.MonadParsec e String m) => m (Top ident code (Tree ident code))
 codeBlock = do
   column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
   let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
@@ -280,8 +327,14 @@ codeBlock = do
           skip _ s = s
        in List.intercalate "\n" $ skip column <$> lines s
 
-emphasis :: (Annotated code) => Char -> (P () -> P code) -> P () -> P (Tree code)
-emphasis delimiter code closing = do
+emphasis ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  Char ->
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Tree ident code)
+emphasis delimiter ident code closing = do
   let start = some (P.satisfy (== delimiter))
   end <- P.try $ do
     end <- start
@@ -289,38 +342,57 @@ emphasis delimiter code closing = do
     pure end
   wrap' . Paragraph
     <$> someTill'
-      (leafy code (closing <|> (void $ lit end)) <* void whitespaceWithoutParagraphBreak)
+      (leafy ident code (closing <|> (void $ lit end)) <* void whitespaceWithoutParagraphBreak)
       (lit end)
   where
-    -- Allows whitespace or a newline, but not more than two newlines in a row.
-    whitespaceWithoutParagraphBreak :: P ()
+    -- Allows whitespace including up to one newline
     whitespaceWithoutParagraphBreak = void do
       void nonNewlineSpaces
       optional newline >>= \case
         Just _ -> void nonNewlineSpaces
         Nothing -> pure ()
 
-bold :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-bold code = fmap Bold . emphasis '*' code
-
-italic :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-italic code = fmap Italic . emphasis '_' code
-
-strikethrough :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-strikethrough code = fmap Strikethrough . emphasis '~' code
-
-namedLink :: (Annotated code) => (P () -> P code) -> P () -> P (Leaf code (Tree code))
-namedLink code docClose =
+bold ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+bold ident code = fmap Bold . emphasis '*' ident code
+
+italic ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+italic ident code = fmap Italic . emphasis '_' ident code
+
+strikethrough ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+strikethrough ident code = fmap Strikethrough . emphasis '~' ident code
+
+namedLink ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+namedLink ident code docClose =
   P.label "hyperlink (example: [link name](https://destination.com))" do
     _ <- lit "["
-    p <- spaced . leafy code . void $ char ']'
+    p <- spaced . leafy ident code . void $ char ']'
     _ <- lit "]"
     _ <- lit "("
-    target <- group $ fmap pure link <|> some' (transclude code <|> word (docClose <|> void (char ')')))
+    target <- group $ fmap pure (link ident) <|> some' (transclude code <|> word (docClose <|> void (char ')')))
     _ <- lit ")"
     pure $ NamedLink (wrap' $ Paragraph p) target
 
-sp :: P String
+sp :: (P.MonadParsec e String m) => m String
 sp = P.try $ do
   spaces <- P.takeWhile1P (Just "space") isSpace
   close <- P.optional (P.lookAhead (lit "}}"))
@@ -331,17 +403,22 @@ sp = P.try $ do
   where
     ok s = length [() | '\n' <- s] < 2
 
-spaced :: P a -> P (NonEmpty a)
+spaced :: (P.MonadParsec e String m) => m a -> m (NonEmpty a)
 spaced p = some' (p <* P.optional sp)
 
 -- | Not an actual node, but this pattern is referenced in multiple places
-list :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
-list code docClose = bulletedList code docClose <|> numberedList code docClose
-
-listSep :: P ()
+list ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+list ident code docClose = bulletedList ident code docClose <|> numberedList ident code docClose
+
+listSep :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m ()
 listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
 
-bulletedStart :: P (Int, [a])
+bulletedStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m (Int, [a])
 bulletedStart = P.try $ do
   r <- listItemStart $ [] <$ P.satisfy bulletChar
   P.lookAhead (P.satisfy isSpace)
@@ -349,43 +426,59 @@ bulletedStart = P.try $ do
   where
     bulletChar ch = ch == '*' || ch == '-' || ch == '+'
 
-listItemStart :: P a -> P (Int, a)
-listItemStart gutter = P.try $ do
+listItemStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m a -> m (Int, a)
+listItemStart gutter = P.try do
   nonNewlineSpaces
   col <- column <$> posP
   parentCol <- S.gets parentListColumn
   guard (col > parentCol)
   (col,) <$> gutter
 
-numberedStart :: P (Int, Token Word64)
+numberedStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m (Int, Token Word64)
 numberedStart = listItemStart $ P.try (tokenP $ LP.decimal <* lit ".")
 
 -- | FIXME: This should take a @`P` a@
-numberedList :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
-numberedList code docClose = NumberedList <$> sepBy1' numberedItem listSep
+numberedList ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+numberedList ident code docClose = NumberedList <$> sepBy1' numberedItem listSep
   where
     numberedItem = P.label "numbered list (examples: 1. item1, 8. start numbering at '8')" do
       (col, s) <- numberedStart
-      (s,) <$> column' code docClose col
+      (s,) <$> column' ident code docClose col
 
 -- | FIXME: This should take a @`P` a@
-bulletedList :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
-bulletedList code docClose = BulletedList <$> sepBy1' bullet listSep
+bulletedList ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+bulletedList ident code docClose = BulletedList <$> sepBy1' bullet listSep
   where
     bullet = P.label "bullet (examples: * item1, - item2)" do
       (col, _) <- bulletedStart
-      column' code docClose col
-
-column' :: (Annotated code) => (P () -> P code) -> P () -> Int -> P (Column (Tree code))
-column' code docClose col =
+      column' ident code docClose col
+
+column' ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  Int ->
+  S.StateT ParsingEnv m (Column (Tree ident code))
+column' ident code docClose col =
   Column . wrap'
     <$> (nonNewlineSpaces *> listItemParagraph)
-    <*> local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list code docClose))
+    <*> local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list ident code docClose))
   where
     listItemParagraph =
       Paragraph <$> do
         col <- column <$> posP
-        some' (leafy code docClose <* sep col)
+        some' (lift (leafy ident code docClose) <* sep col)
       where
         -- Trickiness here to support hard line breaks inside of
         -- a bulleted list, so for instance this parses as expected:
@@ -406,7 +499,7 @@ column' code docClose col =
                   (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
           pure ()
 
-newline :: P String
+newline :: (P.MonadParsec e String m) => m String
 newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
 
 -- |
@@ -421,32 +514,37 @@ newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
 -- > A paragraph under this subsection.
 -- >
 -- > # A section title (not a subsection)
-section :: (Annotated code) => (P () -> P code) -> P () -> P (Top code (Tree code))
-section code docClose = do
+section ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+section ident code docClose = do
   ns <- S.gets parentSections
   hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
-  title <- paragraph code docClose <* CP.space
+  title <- lift $ paragraph ident code docClose <* CP.space
   let m = length hashes + head ns
   body <-
     local (\env -> env {parentSections = (m : (tail ns))}) $
-      P.many (sectionElem code docClose <* CP.space)
+      P.many (sectionElem ident code docClose <* CP.space)
   pure $ Section (wrap' title) body
 
 -- | Not an actual node, but this pattern is referenced in multiple places
-embedLink :: P EmbedLink
-embedLink = embedTypeLink <|> embedTermLink
+embedLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
+embedLink ident = embedTypeLink ident <|> embedTermLink ident
 
 -- | FIXME: This should just take a @`P` code@ and @`P` a@.
-group :: P (NonEmpty (Leaf code a)) -> P (Leaf code a)
+group :: (P.MonadParsec e s m) => m (NonEmpty (Leaf ident code a)) -> m (Leaf ident code a)
 group = fmap Group . join
 
 -- | FIXME: This should just take a @`P` a@
-join :: P (NonEmpty a) -> P (Join a)
+join :: (P.MonadParsec e s m) => m (NonEmpty a) -> m (Join a)
 join = fmap Join
 
 -- * utility functions
 
-wrap' :: (Annotated code) => Top code (Tree code) -> Tree code
+wrap' :: (Annotated code) => Top ident code (Tree ident code) -> Tree ident code
 wrap' doc = ann doc :< doc
 
 -- | If it's a multi-line verbatim block we trim any whitespace representing
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
index 5167b2bcf6..56a14939b6 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
@@ -1,7 +1,11 @@
 {-# LANGUAGE TemplateHaskell #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
 
 -- | Haskell parallel to @unison/base.Doc@.
 --
+--   These types have two significant parameters: @ident@ and @code@ that are expected to be parameterized by some
+--   representation of identifiers and source code of the host language.
+--
 --   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
 --   mismatch with Unison is a problem, as someone can create a Unison Doc with explicit constructors or function calls,
 --   have it rendered to a scratch file, and then we can’t parse it. Changing the types here to match Unison wouldn’t
@@ -13,8 +17,6 @@ import Data.Eq.Deriving (deriveEq1, deriveEq2)
 import Data.List.NonEmpty (NonEmpty)
 import Data.Ord.Deriving (deriveOrd1, deriveOrd2)
 import Text.Show.Deriving (deriveShow1, deriveShow2)
-import Unison.HashQualifiedPrime qualified as HQ'
-import Unison.Name (Name)
 import Unison.Parser.Ann (Annotated (..))
 import Unison.Prelude
 import Unison.Syntax.Lexer.Token (Token (..))
@@ -22,7 +24,7 @@ import Unison.Syntax.Lexer.Token (Token (..))
 newtype UntitledSection a = UntitledSection [a]
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-data Top code a
+data Top ident code a
   = -- | The first argument is always a `Paragraph`
     Section a [a]
   | Eval code
@@ -30,7 +32,7 @@ data Top code a
   | CodeBlock (Token String) (Token String)
   | BulletedList (NonEmpty (Column a))
   | NumberedList (NonEmpty (Token Word64, Column a))
-  | Paragraph (NonEmpty (Leaf code a))
+  | Paragraph (NonEmpty (Leaf ident code a))
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
 data Column a
@@ -38,11 +40,11 @@ data Column a
     Column a (Maybe a)
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-data Leaf code a
-  = Link EmbedLink
+data Leaf ident code a
+  = Link (EmbedLink ident)
   | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
     --   `Transclude`s & `Word`s)
-    NamedLink a (Leaf code Void)
+    NamedLink a (Leaf ident code Void)
   | Example code
   | Transclude code
   | -- | Always a Paragraph
@@ -52,21 +54,21 @@ data Leaf code a
   | -- | Always a Paragraph
     Strikethrough a
   | -- | Always a Word
-    Verbatim (Leaf Void Void)
+    Verbatim (Leaf ident Void Void)
   | -- | Always a Word
-    Code (Leaf Void Void)
+    Code (Leaf ident Void Void)
   | -- | Always a Transclude
-    Source (NonEmpty (SourceElement (Leaf code Void)))
+    Source (NonEmpty (SourceElement ident (Leaf ident code Void)))
   | -- | Always a Transclude
-    FoldedSource (NonEmpty (SourceElement (Leaf code Void)))
+    FoldedSource (NonEmpty (SourceElement ident (Leaf ident code Void)))
   | EvalInline code
-  | Signature (NonEmpty EmbedSignatureLink)
-  | SignatureInline EmbedSignatureLink
+  | Signature (NonEmpty (EmbedSignatureLink ident))
+  | SignatureInline (EmbedSignatureLink ident)
   | Word (Token String)
-  | Group (Join (Leaf code a))
+  | Group (Join (Leaf ident code a))
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-instance Bifunctor Leaf where
+instance Bifunctor (Leaf ident) where
   bimap f g = \case
     Link x -> Link x
     NamedLink a leaf -> NamedLink (g a) $ first f leaf
@@ -85,25 +87,25 @@ instance Bifunctor Leaf where
     Word x -> Word x
     Group join -> Group $ bimap f g <$> join
 
-data EmbedLink
-  = EmbedTypeLink (Token (HQ'.HashQualified Name))
-  | EmbedTermLink (Token (HQ'.HashQualified Name))
+data EmbedLink ident
+  = EmbedTypeLink (Token ident)
+  | EmbedTermLink (Token ident)
   deriving (Eq, Ord, Show)
 
-data SourceElement a = SourceElement EmbedLink [EmbedAnnotation a]
+data SourceElement ident a = SourceElement (EmbedLink ident) [EmbedAnnotation ident a]
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-newtype EmbedSignatureLink = EmbedSignatureLink (Token (HQ'.HashQualified Name))
+newtype EmbedSignatureLink ident = EmbedSignatureLink (Token ident)
   deriving (Eq, Ord, Show)
 
 newtype Join a = Join (NonEmpty a)
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-newtype EmbedAnnotation a
-  = EmbedAnnotation (Either (Token (HQ'.HashQualified Name)) a)
+newtype EmbedAnnotation ident a
+  = EmbedAnnotation (Either (Token ident) a)
   deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
 
-instance (Annotated code, Annotated a) => Annotated (Top code a) where
+instance (Annotated code, Annotated a) => Annotated (Top ident code a) where
   ann = \case
     Section title body -> ann title <> ann body
     Eval code -> ann code
@@ -116,7 +118,7 @@ instance (Annotated code, Annotated a) => Annotated (Top code a) where
 instance (Annotated a) => Annotated (Column a) where
   ann (Column para list) = ann para <> ann list
 
-instance (Annotated code, Annotated a) => Annotated (Leaf code a) where
+instance (Annotated code, Annotated a) => Annotated (Leaf ident code a) where
   ann = \case
     Link link -> ann link
     NamedLink label target -> ann label <> ann target
@@ -135,31 +137,45 @@ instance (Annotated code, Annotated a) => Annotated (Leaf code a) where
     Word text -> ann text
     Group (Join leaves) -> ann leaves
 
-instance Annotated EmbedLink where
+instance Annotated (EmbedLink ident) where
   ann = \case
     EmbedTypeLink name -> ann name
     EmbedTermLink name -> ann name
 
-instance (Annotated code) => Annotated (SourceElement code) where
+instance (Annotated code) => Annotated (SourceElement ident code) where
   ann (SourceElement link target) = ann link <> ann target
 
-instance Annotated EmbedSignatureLink where
+instance Annotated (EmbedSignatureLink ident) where
   ann (EmbedSignatureLink name) = ann name
 
-instance (Annotated code) => Annotated (EmbedAnnotation code) where
+instance (Annotated code) => Annotated (EmbedAnnotation ident code) where
   ann (EmbedAnnotation a) = either ann ann a
 
 $(deriveEq1 ''Column)
 $(deriveOrd1 ''Column)
 $(deriveShow1 ''Column)
 
+$(deriveEq1 ''Token)
+$(deriveOrd1 ''Token)
+$(deriveShow1 ''Token)
+
 $(deriveEq1 ''EmbedAnnotation)
 $(deriveOrd1 ''EmbedAnnotation)
 $(deriveShow1 ''EmbedAnnotation)
+$(deriveEq2 ''EmbedAnnotation)
+$(deriveOrd2 ''EmbedAnnotation)
+$(deriveShow2 ''EmbedAnnotation)
+
+$(deriveEq1 ''EmbedLink)
+$(deriveOrd1 ''EmbedLink)
+$(deriveShow1 ''EmbedLink)
 
 $(deriveEq1 ''SourceElement)
 $(deriveOrd1 ''SourceElement)
 $(deriveShow1 ''SourceElement)
+$(deriveEq2 ''SourceElement)
+$(deriveOrd2 ''SourceElement)
+$(deriveShow2 ''SourceElement)
 
 $(deriveEq1 ''Join)
 $(deriveOrd1 ''Join)

From 9a941a389079373465ecdd6b6baad8936f3561b1 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Fri, 26 Jul 2024 23:53:49 -0600
Subject: [PATCH 19/22] Caught a hardcoded `}}` in the Doc parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Doc parser shouldn’t know how Unison terminates Doc blocks.
---
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
index 4009c30dec..cecf2ca6a2 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -131,7 +131,7 @@ paragraph ::
   (m () -> m code) ->
   m () ->
   m (Top ident code (Tree ident code))
-paragraph ident code = fmap Paragraph . spaced . leafy ident code
+paragraph ident code docClose = fmap Paragraph . spaced docClose $ leafy ident code docClose
 
 word :: (Ord e, P.MonadParsec e String m) => m end -> m (Leaf ident code void)
 word closing = fmap Word . tokenP . P.try $ do
@@ -385,17 +385,17 @@ namedLink ::
 namedLink ident code docClose =
   P.label "hyperlink (example: [link name](https://destination.com))" do
     _ <- lit "["
-    p <- spaced . leafy ident code . void $ char ']'
+    p <- spaced docClose . leafy ident code . void $ char ']'
     _ <- lit "]"
     _ <- lit "("
     target <- group $ fmap pure (link ident) <|> some' (transclude code <|> word (docClose <|> void (char ')')))
     _ <- lit ")"
     pure $ NamedLink (wrap' $ Paragraph p) target
 
-sp :: (P.MonadParsec e String m) => m String
-sp = P.try $ do
+sp :: (P.MonadParsec e String m) => m () -> m String
+sp docClose = P.try $ do
   spaces <- P.takeWhile1P (Just "space") isSpace
-  close <- P.optional (P.lookAhead (lit "}}"))
+  close <- P.optional (P.lookAhead docClose)
   case close of
     Nothing -> guard $ ok spaces
     Just _ -> pure ()
@@ -403,8 +403,8 @@ sp = P.try $ do
   where
     ok s = length [() | '\n' <- s] < 2
 
-spaced :: (P.MonadParsec e String m) => m a -> m (NonEmpty a)
-spaced p = some' (p <* P.optional sp)
+spaced :: (P.MonadParsec e String m) => m () -> m a -> m (NonEmpty a)
+spaced docClose p = some' $ p <* P.optional (sp docClose)
 
 -- | Not an actual node, but this pattern is referenced in multiple places
 list ::
@@ -522,7 +522,7 @@ section ::
   S.StateT ParsingEnv m (Top ident code (Tree ident code))
 section ident code docClose = do
   ns <- S.gets parentSections
-  hashes <- P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp
+  hashes <- lift $ P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp docClose
   title <- lift $ paragraph ident code docClose <* CP.space
   let m = length hashes + head ns
   body <-

From beecaa9be715f2090b01577e12b2e541a60404b6 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Fri, 26 Jul 2024 23:54:54 -0600
Subject: [PATCH 20/22] Make Doc parser ignorant of type/term distinctions

This was the last thing tying Doc to Unison.
---
 parser-typechecker/src/Unison/PrintError.hs   |  8 ++++
 .../src/Unison/Syntax/TermParser.hs           | 43 +++++++++++++------
 .../src/Unison/Syntax/Lexer/Unison.hs         | 12 ++++--
 unison-syntax/src/Unison/Syntax/Parser.hs     |  6 ++-
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 36 +++-------------
 .../src/Unison/Syntax/Parser/Doc/Data.hs      | 10 ++---
 6 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/parser-typechecker/src/Unison/PrintError.hs b/parser-typechecker/src/Unison/PrintError.hs
index dd796c0159..691d7cd3ef 100644
--- a/parser-typechecker/src/Unison/PrintError.hs
+++ b/parser-typechecker/src/Unison/PrintError.hs
@@ -1861,6 +1861,14 @@ renderParseErrors s = \case
                     <> structuralVsUniqueDocsLink
               ]
        in (msg, rangeForToken <$> [void keyword, void name])
+    go (Parser.TypeNotAllowed tok) =
+      let msg =
+            Pr.lines
+              [ Pr.wrap "I expected to see a term here, but instead it’s a type:",
+                "",
+                tokenAsErrorSite s $ HQ.toText <$> tok
+              ]
+       in (msg, [rangeForToken tok])
 
     unknownConstructor ::
       String -> L.Token (HashQualified Name) -> Pretty ColorText
diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs
index 8d0195410a..642ed0e339 100644
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@@ -27,6 +27,7 @@ import Data.Text qualified as Text
 import Data.Tuple.Extra qualified as TupleE
 import Data.Void (absurd, vacuous)
 import Text.Megaparsec qualified as P
+import U.Codebase.Reference (ReferenceType (..))
 import U.Core.ABT qualified as ABT
 import Unison.ABT qualified as ABT
 import Unison.Builtin.Decls qualified as DD
@@ -530,7 +531,7 @@ doc2Block = do
     docUntitledSection ann (Doc.UntitledSection tops) =
       Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops
 
-    docTop :: Doc.Top (HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docTop :: Doc.Top (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docTop d = case d of
       Doc.Section title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
       Doc.Eval code ->
@@ -558,7 +559,7 @@ doc2Block = do
     docColumn d@(Doc.Column para sublist) =
       Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist
 
-    docLeaf :: Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docLeaf :: Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
     docLeaf d = case d of
       Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
       Doc.NamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
@@ -590,35 +591,49 @@ doc2Block = do
         Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList
           <$> traverse docLeaf leaves
 
-    docEmbedLink :: Doc.EmbedLink (HQ'.HashQualified Name) -> TermP v m
-    docEmbedLink d = case d of
-      Doc.EmbedTypeLink ident ->
+    docEmbedLink :: Doc.EmbedLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m
+    docEmbedLink d@(Doc.EmbedLink (L.Token (level, ident) start end)) = case level of
+      RtType ->
         Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload
-          <$> findUniqueType (HQ'.toHQ <$> ident)
-      Doc.EmbedTermLink ident ->
-        Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
+          <$> findUniqueType (L.Token (HQ'.toHQ ident) start end)
+      RtTerm ->
+        Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end)
 
     docSourceElement ::
-      Doc.SourceElement (HQ'.HashQualified Name) (Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      Doc.SourceElement
+        (ReferenceType, HQ'.HashQualified Name)
+        (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
       TermP v m
     docSourceElement d@(Doc.SourceElement link anns) = do
       link' <- docEmbedLink link
       anns' <- traverse docEmbedAnnotation anns
       pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns']
 
-    docEmbedSignatureLink :: Doc.EmbedSignatureLink (HQ'.HashQualified Name) -> TermP v m
-    docEmbedSignatureLink d@(Doc.EmbedSignatureLink ident) =
-      Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (HQ'.toHQ <$> ident)
+    docEmbedSignatureLink :: Doc.EmbedSignatureLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m
+    docEmbedSignatureLink d@(Doc.EmbedSignatureLink (L.Token (level, ident) start end)) = case level of
+      RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end
+      RtTerm ->
+        Term.app (gann d) (f d "EmbedSignatureLink") . addDelay
+          <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end)
 
     docEmbedAnnotation ::
-      Doc.EmbedAnnotation (HQ'.HashQualified Name) (Doc.Leaf (HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      Doc.EmbedAnnotation
+        (ReferenceType, HQ'.HashQualified Name)
+        (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
       TermP v m
     docEmbedAnnotation d@(Doc.EmbedAnnotation a) =
       -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
       -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
       -- produced for an identifier and the lexemes consumed for text line up. So, I think this is a bugfix I can’t
       -- avoid.
-      Term.app (gann d) (f d "EmbedAnnotation") <$> either (resolveHashQualified . fmap HQ'.toHQ) (docLeaf . vacuous) a
+      Term.app (gann d) (f d "EmbedAnnotation")
+        <$> either
+          ( \(L.Token (level, ident) start end) -> case level of
+              RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end
+              RtTerm -> resolveHashQualified $ L.Token (HQ'.toHQ ident) start end
+          )
+          (docLeaf . vacuous)
+          a
 
 docBlock :: (Monad m, Var v) => TermP v m
 docBlock = do
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
index 98112c2124..8a6c20d1a8 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -42,6 +42,7 @@ import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Text.Megaparsec.Error qualified as EP
 import Text.Megaparsec.Internal qualified as PI
+import U.Codebase.Reference (ReferenceType (..))
 import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Name (Name)
 import Unison.Name qualified as Name
@@ -117,7 +118,7 @@ data Lexeme
   | Bytes Bytes.Bytes -- bytes literals
   | Hash ShortHash -- hash literals
   | Err Err
-  | Doc (Doc.UntitledSection (Doc.Tree (HQ'.HashQualified Name) [Token Lexeme]))
+  | Doc (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [Token Lexeme]))
   deriving stock (Eq, Show, Ord)
 
 type IsVirtual = Bool -- is it a virtual semi or an actual semi?
@@ -354,7 +355,7 @@ doc2 = do
   (docTok, closeTok) <- local
     (\env -> env {inLayout = False})
     do
-      body <- Doc.doc identifierP lexemes' . P.lookAhead $ () <$ lit "}}"
+      body <- Doc.doc typeOrTerm lexemes' . P.lookAhead $ () <$ lit "}}"
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
@@ -382,12 +383,15 @@ doc2 = do
           isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1
       _ -> docTok : endToks
   where
-    -- DUPLICATED
     wordyKw kw = separated wordySep (lit kw)
+    typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
+    typeOrTerm = do
+      mtype <- P.optional $ typeOrAbility' <* CP.space
+      ident <- identifierP <* CP.space
+      pure (maybe RtTerm (const RtType) mtype, ident)
     subsequentTypeName = P.lookAhead . P.optional $ do
       let lit' s = lit s <* sp
       let modifier = typeModifiersAlt (lit' . Text.unpack)
-      let typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack)
       _ <- optional modifier *> typeOrAbility' *> sp
       Token name start stop <- tokenP identifierP
       if Name.isSymboly (HQ'.toName name)
diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs
index fac55142de..6c4aa74b95 100644
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@@ -72,6 +72,7 @@ import Data.Set qualified as Set
 import Data.Text qualified as Text
 import Text.Megaparsec (runParserT)
 import Text.Megaparsec qualified as P
+import U.Codebase.Reference (ReferenceType (..))
 import U.Util.Base32Hex qualified as Base32Hex
 import Unison.ABT qualified as ABT
 import Unison.ConstructorReference (ConstructorReference)
@@ -170,6 +171,8 @@ data Error v
   | TypeDeclarationErrors [UF.Error v Ann]
   | -- | MissingTypeModifier (type|ability) name
     MissingTypeModifier (L.Token String) (L.Token v)
+  | -- | A type was found in a position that requires a term
+    TypeNotAllowed (L.Token (HQ.HashQualified Name))
   | ResolutionFailures [Names.ResolutionFailure v Ann]
   | DuplicateTypeNames [(v, [Ann])]
   | DuplicateTermNames [(v, [Ann])]
@@ -401,7 +404,8 @@ string = queryToken getString
     getString (L.Textual s) = Just (Text.pack s)
     getString _ = Nothing
 
-doc :: (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree (HQ'.HashQualified Name) [L.Token L.Lexeme])))
+doc ::
+  (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme])))
 doc = queryToken \case
   L.Doc d -> pure d
   _ -> Nothing
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
index cecf2ca6a2..8ba6840dd2 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -45,8 +45,7 @@ module Unison.Syntax.Parser.Doc
 
     -- * other components
     column',
-    embedTypeLink,
-    embedTermLink,
+    embedLink,
     embedSignatureLink,
     join,
   )
@@ -59,26 +58,13 @@ import Data.List qualified as List
 import Data.List.Extra qualified as List
 import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.List.NonEmpty qualified as NonEmpty
-import Data.Text qualified as Text
 import Text.Megaparsec qualified as P
 import Text.Megaparsec.Char (char)
 import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Unison.Parser.Ann (Ann, Annotated (..))
 import Unison.Prelude hiding (join)
-import Unison.Syntax.Lexer
-  ( column,
-    line,
-    lit,
-    local,
-    sepBy1',
-    separated,
-    some',
-    someTill',
-    typeOrAbilityAlt,
-    wordySep,
-    (<+>),
-  )
+import Unison.Syntax.Lexer (column, line, lit, local, sepBy1', some', someTill', (<+>))
 import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP)
 import Unison.Syntax.Parser.Doc.Data
 
@@ -109,9 +95,6 @@ doc ident code = flip S.evalStateT initialState . untitledSection . sectionElem
 untitledSection :: (P.MonadParsec e String m) => m a -> m (UntitledSection a)
 untitledSection a = UntitledSection <$> P.many (a <* CP.space)
 
-wordyKw :: (P.MonadParsec e String m) => String -> m String
-wordyKw kw = separated wordySep (lit kw)
-
 sectionElem ::
   (Ord e, P.MonadParsec e String m, Annotated code) =>
   m ident ->
@@ -228,14 +211,9 @@ evalInline code = fmap EvalInline $ do
   s <- code inlineEvalClose
   pure s
 
-embedTypeLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
-embedTypeLink ident =
-  EmbedTypeLink <$> do
-    _ <- typeOrAbilityAlt (wordyKw . Text.unpack) <* CP.space
-    tokenP ident <* CP.space
-
-embedTermLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
-embedTermLink ident = EmbedTermLink <$> tokenP ident <* CP.space
+-- | Not an actual node, but this pattern is referenced in multiple places
+embedLink :: (Ord e, P.MonadParsec e s m, P.TraversableStream s) => m ident -> m (EmbedLink ident)
+embedLink = fmap EmbedLink . tokenP
 
 embedSignatureLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedSignatureLink ident)
 embedSignatureLink ident = EmbedSignatureLink <$> tokenP ident <* CP.space
@@ -530,10 +508,6 @@ section ident code docClose = do
       P.many (sectionElem ident code docClose <* CP.space)
   pure $ Section (wrap' title) body
 
--- | Not an actual node, but this pattern is referenced in multiple places
-embedLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedLink ident)
-embedLink ident = embedTypeLink ident <|> embedTermLink ident
-
 -- | FIXME: This should just take a @`P` code@ and @`P` a@.
 group :: (P.MonadParsec e s m) => m (NonEmpty (Leaf ident code a)) -> m (Leaf ident code a)
 group = fmap Group . join
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
index 56a14939b6..75bc3a621e 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
@@ -87,9 +87,9 @@ instance Bifunctor (Leaf ident) where
     Word x -> Word x
     Group join -> Group $ bimap f g <$> join
 
-data EmbedLink ident
-  = EmbedTypeLink (Token ident)
-  | EmbedTermLink (Token ident)
+-- | This is a deviation from the Unison Doc data model – in Unison, Doc distinguishes between type and term links, but
+--   here Doc knows nothing about what namespaces may exist.
+data EmbedLink ident = EmbedLink (Token ident)
   deriving (Eq, Ord, Show)
 
 data SourceElement ident a = SourceElement (EmbedLink ident) [EmbedAnnotation ident a]
@@ -138,9 +138,7 @@ instance (Annotated code, Annotated a) => Annotated (Leaf ident code a) where
     Group (Join leaves) -> ann leaves
 
 instance Annotated (EmbedLink ident) where
-  ann = \case
-    EmbedTypeLink name -> ann name
-    EmbedTermLink name -> ann name
+  ann (EmbedLink name) = ann name
 
 instance (Annotated code) => Annotated (SourceElement ident code) where
   ann (SourceElement link target) = ann link <> ann target

From c5a66d5608849dbe2f4d4b9164cae34ade4c6ca1 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Mon, 29 Jul 2024 16:49:21 -0600
Subject: [PATCH 21/22] Simplify Doc parser from `State` to `Reader`

---
 .../src/Unison/Syntax/Lexer/Unison.hs         |  2 +-
 unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 44 +++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
index 8a6c20d1a8..c641786505 100644
--- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -355,7 +355,7 @@ doc2 = do
   (docTok, closeTok) <- local
     (\env -> env {inLayout = False})
     do
-      body <- Doc.doc typeOrTerm lexemes' . P.lookAhead $ () <$ lit "}}"
+      body <- Doc.doc typeOrTerm lexemes' . P.lookAhead $ lit "}}"
       closeStart <- posP
       lit "}}"
       closeEnd <- posP
diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
index 8ba6840dd2..1a03665493 100644
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@@ -5,12 +5,12 @@
 --
 -- - an identifer parser
 -- - a code parser (that accepts a termination parser)
--- - a termination parser (only used for lookahead), for this parser to know when to give up
+-- - a termination parser, for this parser to know when to give up
 --
 -- Each of those parsers is expected to satisfy @(`Ord` e, `P.MonadParsec` e `String` m)@.
 module Unison.Syntax.Parser.Doc
   ( Tree,
-    initialState,
+    initialEnv,
     doc,
     untitledSection,
     sectionElem,
@@ -52,7 +52,7 @@ module Unison.Syntax.Parser.Doc
 where
 
 import Control.Comonad.Cofree (Cofree ((:<)))
-import Control.Monad.State qualified as S
+import Control.Monad.Reader qualified as R
 import Data.Char (isControl, isSpace)
 import Data.List qualified as List
 import Data.List.Extra qualified as List
@@ -64,7 +64,7 @@ import Text.Megaparsec.Char qualified as CP
 import Text.Megaparsec.Char.Lexer qualified as LP
 import Unison.Parser.Ann (Ann, Annotated (..))
 import Unison.Prelude hiding (join)
-import Unison.Syntax.Lexer (column, line, lit, local, sepBy1', some', someTill', (<+>))
+import Unison.Syntax.Lexer (column, line, lit, sepBy1', some', someTill', (<+>))
 import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP)
 import Unison.Syntax.Parser.Doc.Data
 
@@ -79,16 +79,16 @@ data ParsingEnv = ParsingEnv
   }
   deriving (Show)
 
-initialState :: ParsingEnv
-initialState = ParsingEnv [0] 0
+initialEnv :: ParsingEnv
+initialEnv = ParsingEnv [0] 0
 
 doc ::
   (Ord e, P.MonadParsec e String m, Annotated code) =>
   m ident ->
   (m () -> m code) ->
-  m () ->
+  m end ->
   m (UntitledSection (Tree ident code))
-doc ident code = flip S.evalStateT initialState . untitledSection . sectionElem ident code
+doc ident code = flip R.runReaderT initialEnv . untitledSection . sectionElem ident code . void
 
 -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
 --   Unison wraps `Doc` literals in `}}`).
@@ -100,7 +100,7 @@ sectionElem ::
   m ident ->
   (m () -> m code) ->
   m () ->
-  S.StateT ParsingEnv m (Tree ident code)
+  R.ReaderT ParsingEnv m (Tree ident code)
 sectionElem ident code docClose =
   fmap wrap' $
     section ident code docClose
@@ -390,13 +390,13 @@ list ::
   m ident ->
   (m () -> m code) ->
   m () ->
-  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
 list ident code docClose = bulletedList ident code docClose <|> numberedList ident code docClose
 
-listSep :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m ()
+listSep :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m ()
 listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
 
-bulletedStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m (Int, [a])
+bulletedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, [a])
 bulletedStart = P.try $ do
   r <- listItemStart $ [] <$ P.satisfy bulletChar
   P.lookAhead (P.satisfy isSpace)
@@ -404,15 +404,15 @@ bulletedStart = P.try $ do
   where
     bulletChar ch = ch == '*' || ch == '-' || ch == '+'
 
-listItemStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m a -> m (Int, a)
+listItemStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m a -> m (Int, a)
 listItemStart gutter = P.try do
   nonNewlineSpaces
   col <- column <$> posP
-  parentCol <- S.gets parentListColumn
+  parentCol <- R.asks parentListColumn
   guard (col > parentCol)
   (col,) <$> gutter
 
-numberedStart :: (Ord e, S.MonadState ParsingEnv m, P.MonadParsec e String m) => m (Int, Token Word64)
+numberedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, Token Word64)
 numberedStart = listItemStart $ P.try (tokenP $ LP.decimal <* lit ".")
 
 -- | FIXME: This should take a @`P` a@
@@ -421,7 +421,7 @@ numberedList ::
   m ident ->
   (m () -> m code) ->
   m () ->
-  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
 numberedList ident code docClose = NumberedList <$> sepBy1' numberedItem listSep
   where
     numberedItem = P.label "numbered list (examples: 1. item1, 8. start numbering at '8')" do
@@ -434,7 +434,7 @@ bulletedList ::
   m ident ->
   (m () -> m code) ->
   m () ->
-  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
 bulletedList ident code docClose = BulletedList <$> sepBy1' bullet listSep
   where
     bullet = P.label "bullet (examples: * item1, - item2)" do
@@ -447,11 +447,11 @@ column' ::
   (m () -> m code) ->
   m () ->
   Int ->
-  S.StateT ParsingEnv m (Column (Tree ident code))
+  R.ReaderT ParsingEnv m (Column (Tree ident code))
 column' ident code docClose col =
   Column . wrap'
     <$> (nonNewlineSpaces *> listItemParagraph)
-    <*> local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list ident code docClose))
+    <*> R.local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list ident code docClose))
   where
     listItemParagraph =
       Paragraph <$> do
@@ -497,14 +497,14 @@ section ::
   m ident ->
   (m () -> m code) ->
   m () ->
-  S.StateT ParsingEnv m (Top ident code (Tree ident code))
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
 section ident code docClose = do
-  ns <- S.gets parentSections
+  ns <- R.asks parentSections
   hashes <- lift $ P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp docClose
   title <- lift $ paragraph ident code docClose <* CP.space
   let m = length hashes + head ns
   body <-
-    local (\env -> env {parentSections = (m : (tail ns))}) $
+    R.local (\env -> env {parentSections = m : tail ns}) $
       P.many (sectionElem ident code docClose <* CP.space)
   pure $ Section (wrap' title) body
 

From 96f865b37c4fcb31928d976ece21dbf3231d8e87 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@technomadic.org>
Date: Thu, 1 Aug 2024 22:53:33 -0600
Subject: [PATCH 22/22] Add a transcript showing that #5076 was fixed

Some handling of blocks without final newlines was improved in the course of this PR.

Fixes #5076.
---
 unison-src/transcripts/fix5076.md        | 12 ++++++++++++
 unison-src/transcripts/fix5076.output.md | 22 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 unison-src/transcripts/fix5076.md
 create mode 100644 unison-src/transcripts/fix5076.output.md

diff --git a/unison-src/transcripts/fix5076.md b/unison-src/transcripts/fix5076.md
new file mode 100644
index 0000000000..d2c4b5a7b2
--- /dev/null
+++ b/unison-src/transcripts/fix5076.md
@@ -0,0 +1,12 @@
+```ucm:hide
+scratch/main> builtins.mergeio
+```
+
+Nested call to code lexer wasn’t terminating inline examples containing blocks properly.
+
+```unison
+x = {{
+    ``let "me"`` live
+    ``do "me"`` in
+  }}
+```
diff --git a/unison-src/transcripts/fix5076.output.md b/unison-src/transcripts/fix5076.output.md
new file mode 100644
index 0000000000..f92954cd23
--- /dev/null
+++ b/unison-src/transcripts/fix5076.output.md
@@ -0,0 +1,22 @@
+Nested call to code lexer wasn’t terminating inline examples containing blocks properly.
+
+``` unison
+x = {{
+    ``let "me"`` live
+    ``do "me"`` in
+  }}
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      x : Doc2
+
+```