-
Notifications
You must be signed in to change notification settings - Fork 272
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5235 from sellout/refactor-transcript-parser
- Loading branch information
Showing
9 changed files
with
356 additions
and
365 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
{-# LANGUAGE PatternSynonyms #-} | ||
|
||
-- | The data model for Unison transcripts. | ||
module Unison.Codebase.Transcript | ||
( ExpectingError, | ||
ScratchFileName, | ||
Hidden (..), | ||
UcmLine (..), | ||
UcmContext (..), | ||
APIRequest (..), | ||
pattern CMarkCodeBlock, | ||
Stanza, | ||
ProcessedBlock (..), | ||
) | ||
where | ||
|
||
import CMark qualified | ||
import Unison.Core.Project (ProjectBranchName, ProjectName) | ||
import Unison.Prelude | ||
import Unison.Project (ProjectAndBranch) | ||
|
||
type ExpectingError = Bool | ||
|
||
type ScratchFileName = Text | ||
|
||
data Hidden = Shown | HideOutput | HideAll | ||
deriving (Eq, Show) | ||
|
||
data UcmLine | ||
= UcmCommand UcmContext Text | ||
| -- | Text does not include the '--' prefix. | ||
UcmComment Text | ||
|
||
-- | Where a command is run: a project branch (myproject/mybranch>). | ||
data UcmContext | ||
= UcmContextProject (ProjectAndBranch ProjectName ProjectBranchName) | ||
|
||
data APIRequest | ||
= GetRequest Text | ||
| APIComment Text | ||
|
||
pattern CMarkCodeBlock :: (Maybe CMark.PosInfo) -> Text -> Text -> CMark.Node | ||
pattern CMarkCodeBlock pos info body = CMark.Node pos (CMark.CODE_BLOCK info body) [] | ||
|
||
type Stanza = Either CMark.Node ProcessedBlock | ||
|
||
data ProcessedBlock | ||
= Ucm Hidden ExpectingError [UcmLine] | ||
| Unison Hidden ExpectingError (Maybe ScratchFileName) Text | ||
| API [APIRequest] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
-- | Parse and print CommonMark (like Github-flavored Markdown) transcripts. | ||
module Unison.Codebase.Transcript.Parser | ||
( -- * printing | ||
formatAPIRequest, | ||
formatUcmLine, | ||
formatStanza, | ||
formatNode, | ||
formatProcessedBlock, | ||
|
||
-- * conversion | ||
processedBlockToNode, | ||
|
||
-- * parsing | ||
stanzas, | ||
ucmLine, | ||
apiRequest, | ||
fenced, | ||
hidden, | ||
expectingError, | ||
language, | ||
) | ||
where | ||
|
||
import CMark qualified | ||
import Data.Char qualified as Char | ||
import Data.Text qualified as Text | ||
import Data.These (These (..)) | ||
import Text.Megaparsec qualified as P | ||
import Unison.Codebase.Transcript | ||
import Unison.Prelude | ||
import Unison.Project (ProjectAndBranch (ProjectAndBranch)) | ||
|
||
formatAPIRequest :: APIRequest -> Text | ||
formatAPIRequest = \case | ||
GetRequest txt -> "GET " <> txt | ||
APIComment txt -> "-- " <> txt | ||
|
||
formatUcmLine :: UcmLine -> Text | ||
formatUcmLine = \case | ||
UcmCommand context txt -> formatContext context <> "> " <> txt | ||
UcmComment txt -> "--" <> txt | ||
where | ||
formatContext (UcmContextProject projectAndBranch) = into @Text projectAndBranch | ||
|
||
formatStanza :: Stanza -> Text | ||
formatStanza = either formatNode formatProcessedBlock | ||
|
||
formatNode :: CMark.Node -> Text | ||
formatNode = (<> "\n") . CMark.nodeToCommonmark [] Nothing | ||
|
||
formatProcessedBlock :: ProcessedBlock -> Text | ||
formatProcessedBlock = formatNode . processedBlockToNode | ||
|
||
processedBlockToNode :: ProcessedBlock -> CMark.Node | ||
processedBlockToNode = \case | ||
Ucm _ _ cmds -> CMarkCodeBlock Nothing "ucm" $ foldr ((<>) . formatUcmLine) "" cmds | ||
Unison _hide _ fname txt -> | ||
CMarkCodeBlock Nothing "unison" $ maybe txt (\fname -> Text.unlines ["---", "title: " <> fname, "---", txt]) fname | ||
API apiRequests -> CMarkCodeBlock Nothing "api" $ Text.unlines $ formatAPIRequest <$> apiRequests | ||
|
||
type P = P.Parsec Void Text | ||
|
||
stanzas :: FilePath -> Text -> Either (P.ParseErrorBundle Text Void) [Stanza] | ||
stanzas srcName = (\(CMark.Node _ _DOCUMENT blocks) -> traverse stanzaFromNode blocks) . CMark.commonmarkToNode [] | ||
where | ||
stanzaFromNode :: CMark.Node -> Either (P.ParseErrorBundle Text Void) Stanza | ||
stanzaFromNode node = case node of | ||
CMarkCodeBlock _ info body -> maybe (Left node) pure <$> P.parse (fenced info) srcName body | ||
_ -> pure $ Left node | ||
|
||
ucmLine :: P UcmLine | ||
ucmLine = ucmCommand <|> ucmComment | ||
where | ||
ucmCommand :: P UcmLine | ||
ucmCommand = do | ||
context <- | ||
P.try do | ||
contextString <- P.takeWhile1P Nothing (/= '>') | ||
context <- | ||
case (tryFrom @Text contextString) of | ||
(Right (These project branch)) -> pure (UcmContextProject (ProjectAndBranch project branch)) | ||
_ -> fail "expected project/branch or absolute path" | ||
void $ lineToken $ word ">" | ||
pure context | ||
line <- P.takeWhileP Nothing (/= '\n') <* spaces | ||
pure $ UcmCommand context line | ||
|
||
ucmComment :: P UcmLine | ||
ucmComment = do | ||
word "--" | ||
line <- P.takeWhileP Nothing (/= '\n') <* spaces | ||
pure $ UcmComment line | ||
|
||
apiRequest :: P APIRequest | ||
apiRequest = do | ||
apiComment <|> getRequest | ||
where | ||
getRequest = do | ||
word "GET" | ||
spaces | ||
path <- P.takeWhile1P Nothing (/= '\n') | ||
spaces | ||
pure (GetRequest path) | ||
apiComment = do | ||
word "--" | ||
comment <- P.takeWhileP Nothing (/= '\n') | ||
spaces | ||
pure (APIComment comment) | ||
|
||
-- | Produce the correct parser for the code block based on the provided info string. | ||
fenced :: Text -> P (Maybe ProcessedBlock) | ||
fenced info = do | ||
body <- P.getInput | ||
P.setInput info | ||
fenceType <- lineToken (word "ucm" <|> word "unison" <|> word "api" <|> language) | ||
case fenceType of | ||
"ucm" -> do | ||
hide <- hidden | ||
err <- expectingError | ||
P.setInput body | ||
pure . Ucm hide err <$> (spaces *> many ucmLine) | ||
"unison" -> | ||
do | ||
-- todo: this has to be more interesting | ||
-- ```unison:hide | ||
-- ```unison | ||
-- ```unison:hide:all scratch.u | ||
hide <- lineToken hidden | ||
err <- lineToken expectingError | ||
fileName <- optional untilSpace1 | ||
P.setInput body | ||
pure . Unison hide err fileName <$> (spaces *> P.getInput) | ||
"api" -> do | ||
P.setInput body | ||
pure . API <$> (spaces *> many apiRequest) | ||
_ -> pure Nothing | ||
|
||
word :: Text -> P Text | ||
word txt = P.try $ do | ||
chs <- P.takeP (Just $ show txt) (Text.length txt) | ||
guard (chs == txt) | ||
pure txt | ||
|
||
lineToken :: P a -> P a | ||
lineToken p = p <* nonNewlineSpaces | ||
|
||
nonNewlineSpaces :: P () | ||
nonNewlineSpaces = void $ P.takeWhileP Nothing (\ch -> ch == ' ' || ch == '\t') | ||
|
||
hidden :: P Hidden | ||
hidden = | ||
(HideAll <$ word ":hide:all") | ||
<|> (HideOutput <$ word ":hide") | ||
<|> pure Shown | ||
|
||
expectingError :: P ExpectingError | ||
expectingError = isJust <$> optional (word ":error") | ||
|
||
untilSpace1 :: P Text | ||
untilSpace1 = P.takeWhile1P Nothing (not . Char.isSpace) | ||
|
||
language :: P Text | ||
language = P.takeWhileP Nothing (\ch -> Char.isDigit ch || Char.isLower ch || ch == '_') | ||
|
||
spaces :: P () | ||
spaces = void $ P.takeWhileP (Just "spaces") Char.isSpace |
Oops, something went wrong.