From 27470205d8cb9a7556aa84c30f3bdfbca9e0e1ff Mon Sep 17 00:00:00 2001 From: Reid Spencer Date: Sat, 23 Nov 2024 15:48:31 -0500 Subject: [PATCH] Implement parsing to a list of tokens (#695) Implement parsing to a list of tokens Added to TopLevelParser: * def parseToTokens(input:RiddlParseInput): Either[Messages,List[Token]] Added these case classes to AST as the set of possible results returned from TopLevelParser.parseToTokens: * case class PunctuationTKN(at: At) extends Token * case class QuotedStringTKN(at: At) extends Token * case class ReadabilityTKN(at: At) extends Token * case class PredefinedTKN(at: At) extends Token * case class KeywordTKN(at: At) extends Token * case class CommentTKN(at: At) extends Token * case class LiteralStringTKN(at: At) extends Token * case class MarkdownLinesTKN(at: At) extends Token * case class IdentifierTKN(at: At) extends Token * case class OtherTKN(at: At) extends Token This allows for distinguishing the basic kinds of input being parsed. --- .../riddl/language/parsing/JVMTests.scala | 1 + .../language/parsing/TopLevelParserTest.scala | 15 + .../com/ossuminc/riddl/language/AST.scala | 16 + .../riddl/language/parsing/CommonParser.scala | 6 +- .../parsing/ExtensibleTopLevelParser.scala | 82 ++--- .../riddl/language/parsing/Keywords.scala | 281 +++++++++++++++++- .../language/parsing/ParsingContext.scala | 41 ++- .../riddl/language/parsing/PredefTypes.scala | 45 ++- .../riddl/language/parsing/Punctuation.scala | 26 ++ .../riddl/language/parsing/Readability.scala | 80 +++-- .../language/parsing/TokenStreamParser.scala | 87 ++++++ .../language/parsing/TopLevelParser.scala | 34 ++- .../riddl/language/parsing/KeywordsTest.scala | 6 +- .../parsing/TokenStreamParserTest.scala | 69 +++++ 14 files changed, 699 insertions(+), 90 deletions(-) create mode 100644 language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TokenStreamParser.scala create mode 100644 language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/TokenStreamParserTest.scala diff --git a/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/JVMTests.scala b/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/JVMTests.scala index 32f670781..a31a6c046 100644 --- a/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/JVMTests.scala +++ b/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/JVMTests.scala @@ -19,3 +19,4 @@ class JVMRepositoryTest extends RepositoryTest class JVMStatementsTest extends StatementsTest class JVMStreamingParserTest extends StreamingParserTest class JVMTypeParserTest extends TypeParserTest +class JVMTokenStreamParserTest extends TokenStreamParserTest diff --git a/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/TopLevelParserTest.scala b/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/TopLevelParserTest.scala index 9395da045..6a3dea6c3 100644 --- a/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/TopLevelParserTest.scala +++ b/language/jvm/src/test/scala/com/ossuminc/riddl/language/parsing/TopLevelParserTest.scala @@ -35,9 +35,22 @@ class TopLevelParserTest extends ParsingTest { "parse RiddlParserInput" in { (_: TestData) => TopLevelParser.parseInput(rpi) mustBe Right(simpleDomainResults) } + + "parse from a URL" in { (_: TestData) => + val url: URL = PathUtils.urlFromCwdPath(Path.of("language/jvm/src/test/input/everything.riddl")) + val future = TopLevelParser.parseURL(url) + Await.result(future, 10.seconds) match + case Right(r: Root) => + r.domains.head.id.value must be("Everything") + case Left(messages: Messages) => + fail(messages.format) + end match + } + "parse File" in { (_: TestData) => TopLevelParser.parseInput(rpi) mustBe Right(simpleDomainResults) } + "parse String" in { (_: TestData) => val source = Source.fromFile(simpleDomainFile.toFile) try { @@ -45,6 +58,7 @@ class TopLevelParserTest extends ParsingTest { result mustBe Right(simpleDomainResults) } finally { source.close() } } + "parse empty String" in { (_: TestData) => val parser = StringParser("") parser.parseRoot match { @@ -88,6 +102,7 @@ class TopLevelParserTest extends ParsingTest { paths must contain("language/jvm/src/test/input/everything_full.riddl") } } + "return URLs on failure" in { (td: TestData) => val rpi: RiddlParserInput = RiddlParserInput("some source that ain't riddl", td) val tlp = TopLevelParser(rpi, false) diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/AST.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/AST.scala index 858e1cd10..5a007ac47 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/AST.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/AST.scala @@ -3959,6 +3959,22 @@ object AST: override def format: String = s"domain ${pathId.format}" } + /////////////////////////////////////////////////////////////////////////////////////////////////////////// TOKENS + sealed trait Token { + def at: At + } + + case class PunctuationTKN(at: At) extends Token + case class QuotedStringTKN(at: At) extends Token + case class ReadabilityTKN(at: At) extends Token + case class PredefinedTKN(at: At) extends Token + case class KeywordTKN(at: At) extends Token + case class CommentTKN(at: At) extends Token + case class LiteralStringTKN(at: At) extends Token + case class MarkdownLinesTKN(at: At) extends Token + case class IdentifierTKN(at: At) extends Token + case class OtherTKN(at: At) extends Token + /////////////////////////////////////////////////////////////////////////////////////////////////////////// FUNCTIONS /** Find the authors for some definition diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/CommonParser.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/CommonParser.scala index 29827b6c2..4e82d65e2 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/CommonParser.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/CommonParser.scala @@ -68,7 +68,7 @@ private[parsing] trait CommonParser(using io: PlatformContext) def literalStrings[u: P]: P[Seq[LiteralString]] = { P(literalString.rep(1)) } - private def markdownLines[u: P]: P[Seq[LiteralString]] = { + def markdownLines[u: P]: P[Seq[LiteralString]] = { P(markdownLine.rep(1)) } @@ -228,12 +228,12 @@ private[parsing] trait CommonParser(using io: PlatformContext) private def portNum[u: P]: P[String] = { P(Index ~~ CharsWhileIn("0-9").rep(min = 1, max = 5).! ~~ Index).map { (i1, numStr: String, i2) => val num = numStr.toInt - if num > 0 && num < 65535 then + if num > 0 && num < 65535 then numStr else error(at(i1,i2), s"Invalid port number: $numStr. Must be in range 0 <= port < 65536") "0" - end if + end if } } diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ExtensibleTopLevelParser.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ExtensibleTopLevelParser.scala index b9ee9af58..0eb8d37a6 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ExtensibleTopLevelParser.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ExtensibleTopLevelParser.scala @@ -38,6 +38,7 @@ trait ExtensibleTopLevelParser(using PlatformContext) SagaParser, StreamingParser, StatementParser, + TokenStreamParser, ParsingContext { def input: RiddlParserInput @@ -87,30 +88,30 @@ trait ExtensibleTopLevelParser(using PlatformContext) } } - /** Obtain the parser for any of the main AST definition types */ + /** Obtain the parser for any of the main AST definition types */ protected def parserFor[T <: Definition: ClassTag]: P[?] => P[T] = { val parser: P[?] => P[?] = classTag[T].runtimeClass match { - case x if x == classOf[Adaptor] => adaptor(_) - case x if x == classOf[Author] => author(_) - case x if x == classOf[Connector] => connector(_) - case x if x == classOf[Constant] => constant(_) + case x if x == classOf[Adaptor] => adaptor(_) + case x if x == classOf[Author] => author(_) + case x if x == classOf[Connector] => connector(_) + case x if x == classOf[Constant] => constant(_) case x if x == classOf[ContainedGroup] => containedGroup(_) - case x if x == classOf[Context] => context(_) - case x if x == classOf[Domain] => domain(_) - case x if x == classOf[Entity] => entity(_) - case x if x == classOf[Epic] => epic(_) - case x if x == classOf[Function] => function(_) - case x if x == classOf[Invariant] => invariant(_) - case x if x == classOf[Module] => module(_) - case x if x == classOf[Nebula] => nebula(_) - case x if x == classOf[Projector] => projector(_) - case x if x == classOf[Relationship] => relationship(_) - case x if x == classOf[Repository] => repository(_) - case x if x == classOf[Root] => root(_) - case x if x == classOf[Saga] => saga(_) - case x if x == classOf[Streamlet] => streamlet(_) - case x if x == classOf[Type] => typeDef(_) - case x if x == classOf[User] => user(_) + case x if x == classOf[Context] => context(_) + case x if x == classOf[Domain] => domain(_) + case x if x == classOf[Entity] => entity(_) + case x if x == classOf[Epic] => epic(_) + case x if x == classOf[Function] => function(_) + case x if x == classOf[Invariant] => invariant(_) + case x if x == classOf[Module] => module(_) + case x if x == classOf[Nebula] => nebula(_) + case x if x == classOf[Projector] => projector(_) + case x if x == classOf[Relationship] => relationship(_) + case x if x == classOf[Repository] => repository(_) + case x if x == classOf[Root] => root(_) + case x if x == classOf[Saga] => saga(_) + case x if x == classOf[Streamlet] => streamlet(_) + case x if x == classOf[Type] => typeDef(_) + case x if x == classOf[User] => user(_) case _ => throw new RuntimeException( s"No parser defined for ${classTag[T].runtimeClass}" @@ -119,19 +120,16 @@ trait ExtensibleTopLevelParser(using PlatformContext) parser.asInstanceOf[P[?] => P[T]] } - /** Parse the input expecting the contents of a Root node - * @returns - * Either the failure error messages or the Root parsed - */ + * @returns + * Either the failure error messages or the Root parsed + */ def parseRoot: Either[Messages, Root] = doParse[Root](root(_)) - /** Parse the input expecting the contents of a Root node but also return the - * list of files that were read - * @returns - * Either the failure messages and a list of files or the Root that was parsed - * and the list of files parsed. - */ + /** Parse the input expecting the contents of a Root node but also return the list of files that were read + * @returns + * Either the failure messages and a list of files or the Root that was parsed and the list of files parsed. + */ def parseRootWithURLs: Either[(Messages, Seq[URL]), (Root, Seq[URL])] = { doParse[Root](root(_)) match { case l @ Left(messages) => Left(messages -> this.getURLs) @@ -139,16 +137,15 @@ trait ExtensibleTopLevelParser(using PlatformContext) } } - /** Parse the input expecting main definitions in any order, a nebula. Each - * definition must be syntactically correct but the top level definitions do - * not require the hierarchical structure of parsing for Root contents. - * @returns - * Either the failure messages or the Nebula of definitions - */ + /** Parse the input expecting main definitions in any order, a nebula. Each definition must be syntactically correct + * but the top level definitions do not require the hierarchical structure of parsing for Root contents. + * @returns + * Either the failure messages or the Nebula of definitions + */ def parseNebula: Either[Messages, Nebula] = doParse[Nebula](nebula(_)) - /** Parse the input expecting definitions in any order, a nebula. Each definition must be syntactically correct - * but the top level definitions do not require the hierarchical structure of parsing for Root contents. + /** Parse the input expecting definitions in any order, a nebula. Each definition must be syntactically correct but + * the top level definitions do not require the hierarchical structure of parsing for Root contents. * @returns * Either the failure messages with the list of parsed URL or the Nebula of definitions with the list of parsed * URLs @@ -159,4 +156,11 @@ trait ExtensibleTopLevelParser(using PlatformContext) case r @ Right(nebula) => Right(nebula -> this.getURLs) } } + + def parseTokens: Either[Messages, List[Token]] = { + parse[List[Token]](input, parseAllTokens(_)) match + case Left((messages, _)) => Left(messages) + case Right((list, _)) => Right(list) + end match + } } diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Keywords.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Keywords.scala index 160d356f5..8abc3ab0c 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Keywords.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Keywords.scala @@ -8,6 +8,142 @@ package com.ossuminc.riddl.language.parsing import fastparse.* import MultiLineWhitespace.* +import com.ossuminc.riddl.language.parsing.Keyword.{ + acquires, + adaptor, + all, + any, + append, + application, + attachment, + author, + become, + benefit, + body, + briefly, + call, + capability, + case_, + command, + commands, + condition, + connector, + constant, + container, + contains, + context, + create, + described, + details, + direct, + do_, + domain, + else_, + email, + end_, + entity, + epic, + error, + event, + example, + execute, + explained, + field, + fields, + file, + flow, + focus, + for_, + foreach, + from, + function, + graph, + group, + handler, + if_, + import_, + include, + index, + init, + inlet, + inlets, + input, + invariant, + items, + label, + link, + many, + mapping, + merge, + message, + module, + morph, + name, + nebula, + on, + one, + option, + optional, + options, + organization, + other, + outlet, + outlets, + output, + parallel, + pipe, + plant, + presents, + projector, + query, + range, + record, + reference, + relationship, + remove, + replica, + reply, + repository, + required, + requires, + result, + results, + return_, + returns, + reverted, + router, + saga, + schema, + selects, + send, + sequence, + set, + show, + shown, + sink, + source, + split, + state, + step, + stop, + story, + streamlet, + table, + take, + tell, + term, + then_, + title, + type_, + updates, + url, + user, + value, + void, + when, + where, + with_ +} import java.lang.Character.isLetterOrDigit @@ -247,6 +383,8 @@ object Keywords { def relationship[u: P]: P[Unit] = keyword(Keyword.relationship) + def remove[u: P]: P[Unit] = keyword(Keyword.remove) + def replica[u: P]: P[Unit] = keyword(Keyword.replica) def reply[u: P]: P[Unit] = keyword(Keyword.reply) @@ -332,6 +470,147 @@ object Keywords { def where[u: P]: P[Unit] = keyword(Keyword.where) def `with`[u: P]: P[Unit] = keyword(Keyword.with_) + + def anyKeyword[u: P]: P[Unit] = { + P( + StringIn( + Keyword.acquires, + Keyword.adaptor, + Keyword.all, + Keyword.any, + Keyword.append, + Keyword.application, + Keyword.attachment, + Keyword.author, + Keyword.become, + Keyword.benefit, + Keyword.briefly, + Keyword.body, + Keyword.call, + Keyword.case_, + Keyword.capability, + Keyword.command, + Keyword.commands, + Keyword.condition, + Keyword.connector, + Keyword.constant, + Keyword.container, + Keyword.contains, + Keyword.context, + Keyword.create, + Keyword.described, + Keyword.details, + Keyword.direct, + Keyword.presents, + Keyword.do_, + Keyword.domain, + Keyword.else_, + Keyword.email, + Keyword.end_, + Keyword.entity, + Keyword.epic, + Keyword.error, + Keyword.event, + Keyword.example, + Keyword.execute, + Keyword.explained, + Keyword.field, + Keyword.fields, + Keyword.file, + Keyword.flow, + Keyword.focus, + Keyword.for_, + Keyword.foreach, + Keyword.from, + Keyword.function, + Keyword.graph, + Keyword.group, + Keyword.handler, + Keyword.if_, + Keyword.import_, + Keyword.include, + Keyword.index, + Keyword.init, + Keyword.inlet, + Keyword.inlets, + Keyword.input, + Keyword.invariant, + Keyword.items, + Keyword.label, + Keyword.link, + Keyword.many, + Keyword.mapping, + Keyword.merge, + Keyword.message, + Keyword.module, + Keyword.morph, + Keyword.name, + Keyword.nebula, + Keyword.on, + Keyword.one, + Keyword.organization, + Keyword.option, + Keyword.optional, + Keyword.options, + Keyword.other, + Keyword.outlet, + Keyword.outlets, + Keyword.output, + Keyword.parallel, + Keyword.pipe, + Keyword.plant, + Keyword.projector, + Keyword.query, + Keyword.range, + Keyword.reference, + Keyword.relationship, + Keyword.remove, + Keyword.replica, + Keyword.reply, + Keyword.repository, + Keyword.requires, + Keyword.required, + Keyword.record, + Keyword.result, + Keyword.results, + Keyword.return_, + Keyword.returns, + Keyword.reverted, + Keyword.router, + Keyword.saga, + Keyword.schema, + Keyword.selects, + Keyword.send, + Keyword.sequence, + Keyword.set, + Keyword.show, + Keyword.shown, + Keyword.sink, + Keyword.source, + Keyword.split, + Keyword.state, + Keyword.step, + Keyword.stop, + Keyword.story, + Keyword.streamlet, + Keyword.table, + Keyword.take, + Keyword.tell, + Keyword.term, + Keyword.then_, + Keyword.title, + Keyword.type_, + Keyword.url, + Keyword.updates, + Keyword.user, + Keyword.value, + Keyword.void, + Keyword.when, + Keyword.where, + Keyword.with_ + ) + ) + } } object Keyword { @@ -376,8 +655,8 @@ object Keyword { final val event = "event" final val example = "example" final val execute = "execute" - final val explanation = "explanation" final val explained = "explained" + final val explanation = "explanation" final val field = "field" final val fields = "fields" final val file = "file" diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ParsingContext.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ParsingContext.scala index 470b1a5ef..700722d8c 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ParsingContext.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/ParsingContext.scala @@ -29,6 +29,26 @@ trait ParsingContext(using pc: PlatformContext) extends ParsingErrors { private val urlSeen: mutable.ListBuffer[URL] = mutable.ListBuffer[URL]() def getURLs: Seq[URL] = urlSeen.toSeq + protected def parse[RESULT]( + rpi: RiddlParserInput, + rule: P[?] => P[RESULT], + withVerboseFailures: Boolean = false + ): Either[(Messages,Int), (RESULT,Int)] = { + try { + fastparse.parse[RESULT](rpi, rule, withVerboseFailures) match { + case fastparse.Parsed.Success(list, index) => + if messagesNonEmpty then Left(messagesAsList -> index) else Right(list -> index) + case failure: fastparse.Parsed.Failure => + makeParseFailureError(failure, rpi) + Left(messagesAsList -> failure.index) + } + } catch { + case scala.util.control.NonFatal(exception) => + makeParseFailureError(exception, At.empty) + Left(messagesAsList -> 0) + } + } + protected def parseRule[RESULT]( rpi: RiddlParserInput, rule: P[?] => P[RESULT], @@ -42,21 +62,12 @@ trait ParsingContext(using pc: PlatformContext) extends ParsingErrors { result } ): Either[Messages, RESULT] = { - try { - fastparse.parse[RESULT](rpi, rule(_), withVerboseFailures) match { - case Success(root, index) => - if messagesNonEmpty then validate(Left(messagesAsList), rpi, index) - else validate(Right(root), rpi, index) - end if - case failure: Failure => - makeParseFailureError(failure, rpi) - validate(Left(messagesAsList), rpi, 0) - } - } catch { - case NonFatal(exception) => - makeParseFailureError(exception, At.empty) - validate(Left(messagesAsList), rpi, 0) - } + parse[RESULT](rpi, rule(_), withVerboseFailures) match + case Right((root, index)) => + validate(Right(root), rpi, index) + case Left((messages, index)) => + validate(Left(messagesAsList), rpi, index) + end match } def at(offset1: Int, offset2: Int)(implicit context: P[?]): At = { diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/PredefTypes.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/PredefTypes.scala index 70c526e28..d97604f8e 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/PredefTypes.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/PredefTypes.scala @@ -29,7 +29,7 @@ object PredefTypes { StringIn(PredefType.Boolean, PredefType.Integer, PredefType.Whole, PredefType.Natural).! ) - def timeTypes[u:P]: P[String] = keywords( + def timeTypes[u: P]: P[String] = keywords( StringIn( PredefType.Duration, PredefType.DateTime, @@ -39,7 +39,7 @@ object PredefTypes { ).! ) - def otherTypes[u:P]: P[String] = keywords( + def otherTypes[u: P]: P[String] = keywords( StringIn( // order matters in this list, because of common prefixes PredefType.Abstract, @@ -82,6 +82,11 @@ object PredefTypes { def UserId[u: P]: P[Unit] = keyword("UserId") def UUID[u: P]: P[Unit] = keyword("UUID") def Whole[u: P]: P[Unit] = keyword("Whole") + + def anyPredefType[u:P]: P[Unit] = + P(realTypes | integerTypes | timeTypes | otherTypes | Abstract | Boolean | Current | Currency | Date | DateTime | + Decimal | Duration | Id | Integer | Location | Length | Luminosity | Mass | Mole | Nothing | Natural | Number | + Pattern | Range | Real | String_ | Temperature | Time | TimeStamp | Unknown | URL | UserId | UUID | Whole) } object PredefType { @@ -117,4 +122,40 @@ object PredefType { final val UUID = "UUID" final val Whole = "Whole" final val ZonedDateTime = "ZonedDateTime" + + // NOTE: Keep this list in synch with the one in TokenStreamParser + final val allPredefTypes: Seq[String] = Seq( + Abstract, + Blob, + Boolean, + Current, + Currency, + Date, + DateTime, + Decimal, + Duration, + Id, + Integer, + Location, + Length, + Luminosity, + Mass, + Mole, + Nothing, + Natural, + Number, + Pattern, + Range, + Real, + String, + Temperature, + Time, + TimeStamp, + Unknown, + URI, + UserId, + UUID, + Whole, + ZonedDateTime + ) } diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Punctuation.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Punctuation.scala index c979d83c7..e37ec9f21 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Punctuation.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Punctuation.scala @@ -5,6 +5,7 @@ */ package com.ossuminc.riddl.language.parsing +import fastparse.* object Punctuation { final val asterisk = "*" @@ -25,6 +26,7 @@ object Punctuation { final val undefinedMark = "???" final val verticalBar = "|" + // NOTE: Keep this link in synch with the list in TokenStreamParser def allPunctuation: Seq[String] = Seq( asterisk, atSign, @@ -44,4 +46,28 @@ object Punctuation { undefinedMark, verticalBar ) + + def anyPunctuation[u: P]: P[Unit] = { + P( + StringIn( + asterisk, + atSign, + comma, + colon, + curlyOpen, + curlyClose, + dot, + equalsSign, + plus, + question, + quote, + roundOpen, + roundClose, + squareOpen, + squareClose, + undefinedMark, + verticalBar + ) + ) + } } diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Readability.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Readability.scala index b7680042d..09fe6b3e4 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Readability.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/Readability.scala @@ -49,27 +49,69 @@ trait Readability { def wants[u: P]: P[Unit] = keyword("wants") def `with`[u: P]: P[Unit] = keyword("with") + + def anyReadability[u: P]: P[Unit] = { + P( + StringIn( + ReadabilityWords.and, + ReadabilityWords.are, + ReadabilityWords.as, + ReadabilityWords.at, + ReadabilityWords.by, + ReadabilityWords.colon, + ReadabilityWords.equals, + ReadabilityWords.`for`, + ReadabilityWords.from, + ReadabilityWords.in, + ReadabilityWords.is, + ReadabilityWords.of, + ReadabilityWords.so, + ReadabilityWords.that, + ReadabilityWords.to, + ReadabilityWords.wants, + ReadabilityWords.with_ + ) + ) + } } -object Readability { +object ReadabilityWords { + final val and = "and" + final val are = "are" + final val as = "as" + final val at = "at" + final val by = "by" + final val `for` = "for" + final val from = "from" + final val in = "in" + final val is = "is" + final val colon = ":" + final val equals = "=" + final val of = "of" + final val so = "so" + final val that = "that" + final val to = "to" + final val wants = "wants" + final val with_ = "with" + + // NOTE: Keep this list in synch with the list in TokenStreamParser def allReadability: Seq[String] = Seq( - "and", - "are", - "as", - "at", - "by", - "for", - "from", - "in", - "is", - "are", - ":", - "=", - "of", - "so", - "that", - "to", - "wants", - "with" + and, + are, + as, + at, + by, + `for`, + from, + in, + is, + colon, + equals, + of, + so, + that, + to, + wants, + with_ ) } diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TokenStreamParser.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TokenStreamParser.scala new file mode 100644 index 000000000..4940cf425 --- /dev/null +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TokenStreamParser.scala @@ -0,0 +1,87 @@ +/* + * Copyright 2019 Ossum, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.ossuminc.riddl.language.parsing + +import com.ossuminc.riddl.language.AST.* +import com.ossuminc.riddl.language.{AST, At} +import com.ossuminc.riddl.language.Messages.Messages +import com.ossuminc.riddl.utils.{CommonOptions, PlatformContext, Timer} +import com.ossuminc.riddl.utils.SeqHelpers.* +import com.ossuminc.riddl.utils.URL +import fastparse.* +import fastparse.MultiLineWhitespace.* +import fastparse.Parsed.Failure +import fastparse.Parsed.Success +import jdk.jshell.SourceCodeAnalysis.Documentation + +import scala.util.control.NonFatal + +trait TokenStreamParser extends CommonParser with Readability { + + def punctuationToken[u: P]: P[PunctuationTKN] = { + P(Index ~~ Punctuation.anyPunctuation ~~ Index)./.map { case (start, end) => PunctuationTKN(at(start, end)) } + } + + def quotedStringToken[u: P]: P[QuotedStringTKN] = { + P(literalString)./.map { case litStr: LiteralString => QuotedStringTKN(litStr.loc) } + } + + def readabilityToken[u: P]: P[ReadabilityTKN] = { + P(Index ~~ anyReadability ~~ Index)./.map { case (start, end) => ReadabilityTKN(at(start, end)) } + } + + def predefinedToken[u: P]: P[PredefinedTKN] = { + import com.ossuminc.riddl.language.parsing.PredefType.* + P(Index ~~ PredefTypes.anyPredefType ~~ Index)./.map { case (start, end) => PredefinedTKN(at(start, end)) } + } + def keywordToken[u: P]: P[KeywordTKN] = { + P(Index ~~ Keywords.anyKeyword ~~ Index)./.map { case (start, end) => KeywordTKN(at(start, end)) } + } + def commentToken[u: P]: P[CommentTKN] = { + P(comment)./.map { case comment: Comment => CommentTKN(comment.loc) } + } + + def literalStringToken[u: P]: P[LiteralStringTKN] = { + P(literalString)./.map { case litStr: LiteralString => LiteralStringTKN(litStr.loc) } + } + + def markdownLinesToken[u: P]: P[MarkdownLinesTKN] = { + P(markdownLines)./.map { case mdl: Seq[LiteralString] => + require(mdl.nonEmpty, "markdownLines return empty list of lines") + val first = mdl.head.loc + val last = mdl.last.loc + MarkdownLinesTKN(At.range(first, last)) + } + } + + def identifierToken[u: P]: P[IdentifierTKN] = { + P(identifier)./.map { case id: Identifier => IdentifierTKN(id.loc) } + } + + def otherToken[u: P]: P[OtherTKN] = { + P(Index ~~ AnyChar.rep(1) ~~ Index)./.map { case (start, end) => OtherTKN(at(start, end)) } + } + + def parseAnyToken[u: P]: P[Token] = { + P( + keywordToken | + readabilityToken | + punctuationToken | + quotedStringToken | + predefinedToken | + identifierToken | + commentToken | + markdownLinesToken | + literalStringToken | + otherToken + )./ + } + + def parseAllTokens[u: P]: P[List[Token]] = { + P(Start ~ parseAnyToken.rep(0) ~ End).map(_.toList) + } +} diff --git a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TopLevelParser.scala b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TopLevelParser.scala index 4b25087f2..ac4e6cfc4 100644 --- a/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TopLevelParser.scala +++ b/language/shared/src/main/scala/com/ossuminc/riddl/language/parsing/TopLevelParser.scala @@ -79,14 +79,15 @@ object TopLevelParser { } /** Parse a string directly - * - * @param input - * The input string to parse - * @param withVerboseFailures - * For the utility of RIDDL implementers. - * @return - * Left(messages) -> messages indicaitng the error Right(root) -> the resulting AST.Root from the parse - */ + * + * @param input + * The input string to parse + * @param withVerboseFailures + * For the utility of RIDDL implementers. + * @return + * Left(messages) -> messages indicaitng the error + * Right(root) -> the resulting AST.Root from the parse + */ def parseString( input: String, withVerboseFailures: Boolean = false @@ -115,4 +116,21 @@ object TopLevelParser { tlp.parseNebula } } + + /** Parse the input to a list of tokens. This is aimed to making highlighting + * in editors quick and simple. The input is not validate for syntactic + * correctness and likely succeeds on most input. + * @param input + * The input to be parsed + * @param withVerboseFailures + * Set to true to debug parsing failures. Probably of interest only to + * the implementors. The default, false, causes no functional difference. + */ + def parseToTokens( + input: RiddlParserInput, + withVerboseFailures: Boolean = false + )(using io: PlatformContext): Either[Messages, List[Token]] = { + val tlp = new TopLevelParser(input, withVerboseFailures) + tlp.parseTokens + } } diff --git a/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/KeywordsTest.scala b/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/KeywordsTest.scala index 3d6de62a0..fb87c04de 100644 --- a/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/KeywordsTest.scala +++ b/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/KeywordsTest.scala @@ -15,16 +15,16 @@ class KeywordsTest extends AbstractTestingBasis { Keyword.allKeywords.size must be(139) } } - + "Punctuation" should { "produce all punctuation marks" in { Punctuation.allPunctuation.size must be (17) } } - + "Readability" should { "produce all readability words" in { - Readability.allReadability.size must be(18) + ReadabilityWords.allReadability.size must be(17) } } } diff --git a/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/TokenStreamParserTest.scala b/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/TokenStreamParserTest.scala new file mode 100644 index 000000000..e32014215 --- /dev/null +++ b/language/shared/src/test/scala/com/ossuminc/riddl/language/parsing/TokenStreamParserTest.scala @@ -0,0 +1,69 @@ +/* + * Copyright 2019 Ossum, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.ossuminc.riddl.language.parsing + +import com.ossuminc.riddl.language.Messages.Messages +import com.ossuminc.riddl.language.{AST, At} +import com.ossuminc.riddl.language.parsing.{RiddlParserInput, TopLevelParser} +import com.ossuminc.riddl.utils.{Await, PlatformContext, Timer, URL} +import org.scalatest.TestData + +import scala.concurrent.ExecutionContext +import scala.io.AnsiColor.{GREEN, RED, RESET} + +abstract class TokenStreamParserTest(using pc: PlatformContext) extends AbstractParsingTest { + "TokenStreamParser" must { + "handle simple document fragment" in { (td: TestData) => + val rpi: RiddlParserInput = RiddlParserInput( + """module foo is { + | // this is a comment + | domain blah is { ??? } + |} + |""".stripMargin, + td + ) + val result = Timer.time("Token Collection: simple document") { + TopLevelParser.parseToTokens(rpi) + } + result match + case Left(messages) => + fail(messages.format) + case Right(tokens) => + val expected = Seq( + AST.KeywordTKN(At(rpi, 0, 6)), + AST.IdentifierTKN(At(rpi, 7, 10)), + AST.ReadabilityTKN(At(rpi, 11, 13)), + AST.PunctuationTKN(At(rpi, 14, 15)), + AST.CommentTKN(At(rpi, 19, 39)), + AST.KeywordTKN(At(rpi, 43, 49)), + AST.IdentifierTKN(At(rpi, 50, 54)), + AST.ReadabilityTKN(At(rpi, 55, 57)), + AST.PunctuationTKN(At(rpi, 58, 59)), + AST.PunctuationTKN(At(rpi, 60, 63)), + AST.PunctuationTKN(At(rpi, 64, 65)), + AST.PunctuationTKN(At(rpi, 66, 67)) + ) + tokens must be(expected) + } + } + "handle full document" in { (td: TestData) => + implicit val ec: ExecutionContext = pc.ec + val url = URL.fromCwdPath("language/jvm/src/test/input/everything.riddl") + val future = RiddlParserInput.fromURL(url, td).map { rpi => + val result = Timer.time("Token Collection: full document") { + TopLevelParser.parseToTokens(rpi) + } + result match + case Left(messages) => + fail(messages.format) + case Right(tokens) => + tokens + end match + } + Await.result(future,3) + } +}