diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index c950f6d5..4bc89975 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -41,10 +41,27 @@ object Platform { } private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern] + private val namedGroupPattern = Pattern.compile("\\(\\?<(.+?)>.*?\\)") + private val namedGroupPatternReplace = Pattern.compile("(\\(\\?P<)(.+?>.*?\\))") // scala.js does not rely on re2. Per https://www.scala-js.org/doc/regular-expressions.html. // Expect to see some differences in behavior. - def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat)) + def getPatternFromCache(pat: String) : Pattern = { + val fixedPattern = namedGroupPatternReplace.matcher(pat).replaceAll("(?<$2") + regexCache.computeIfAbsent(pat, _ => Pattern.compile(fixedPattern)) + } + + + def getNamedGroupsMap(pat: Pattern): Map[String, Int] = { + val namedGroups = Map.newBuilder[String, Int] + val matcher = namedGroupPattern.matcher(pat.pattern()) + while (matcher.find()) { + for (i <- 1 to matcher.groupCount()) { + namedGroups += matcher.group(i) -> i + } + } + namedGroups.result() + } def regexQuote(s: String): String = Pattern.quote(s) } diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index 17dcecb0..b3bb1678 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -112,7 +112,19 @@ object Platform { } private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern] + private val dashPattern = getPatternFromCache("-") + def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat)) - def regexQuote(s: String): String = Pattern.quote(s) + def getNamedGroupsMap(pat: Pattern): Map[String, Int] = pat.namedGroups().asScala.view.mapValues(_.intValue()).toMap + + def regexQuote(s: String): String = { + val quote = Pattern.quote(s) + val matcher = dashPattern.matcher(quote) + if (matcher.find()) { + matcher.replaceAll("\\\\-") + } else { + quote + } + } } diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index 4ded95f3..4ef3f34c 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -54,9 +54,20 @@ object Platform { } private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern] - // scala native is powered by RE2, per https://scala-native.org/en/latest/lib/javalib.html#regular-expressions-java-util-regexp - // It should perform similarly to the JVM implementation. + private val dashPattern = getPatternFromCache("-") + def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat)) - def regexQuote(s: String): String = Pattern.quote(s) + def getNamedGroupsMap(pat: Pattern): Map[String, Int] = scala.jdk.javaapi.CollectionConverters.asScala( + pat.re2.namedGroups).view.mapValues(_.intValue()).toMap + + def regexQuote(s: String): String = { + val quote = Pattern.quote(s) + val matcher = dashPattern.matcher(quote) + if (matcher.find()) { + matcher.replaceAll("\\\\-") + } else { + quote + } + } } diff --git a/sjsonnet/src/sjsonnet/Std.scala b/sjsonnet/src/sjsonnet/Std.scala index 9cb4bc57..4d594f7d 100644 --- a/sjsonnet/src/sjsonnet/Std.scala +++ b/sjsonnet/src/sjsonnet/Std.scala @@ -483,26 +483,14 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map. } private object StripUtils { - private val dashPattern = Platform.getPatternFromCache("-") - - private def cleanupPattern(chars: String): String = { - val matcher = dashPattern.matcher(chars) - if (matcher.find()) { - matcher.replaceAll("") + "-" - } else { - chars - } - } - private def getLeadingPattern(chars: String): String = "^[" + Platform.regexQuote(chars) + "]+" private def getTrailingPattern(chars: String): String = "[" + Platform.regexQuote(chars) + "]+$" def unspecializedStrip(str: String, chars: String, left: Boolean, right: Boolean): String = { var s = str - val cleanedUpPattern = cleanupPattern(chars) - if (right) s = Platform.getPatternFromCache(getTrailingPattern(cleanedUpPattern)).matcher(s).replaceAll("") - if (left) s = Platform.getPatternFromCache(getLeadingPattern(cleanedUpPattern)).matcher(s).replaceAll("") + if (right) s = Platform.getPatternFromCache(getTrailingPattern(chars)).matcher(s).replaceAll("") + if (left) s = Platform.getPatternFromCache(getLeadingPattern(chars)).matcher(s).replaceAll("") s } @@ -512,9 +500,8 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map. right: Boolean, functionName: String ) extends Val.Builtin1(functionName, "str") { - private[this] val cleanedUpPattern = cleanupPattern(chars) - private[this] val leftPattern = Platform.getPatternFromCache(getLeadingPattern(cleanedUpPattern)) - private[this] val rightPattern = Platform.getPatternFromCache(getTrailingPattern(cleanedUpPattern)) + private[this] val leftPattern = Platform.getPatternFromCache(getLeadingPattern(chars)) + private[this] val rightPattern = Platform.getPatternFromCache(getTrailingPattern(chars)) def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = { var s = str.asString diff --git a/sjsonnet/src/sjsonnet/StdRegex.scala b/sjsonnet/src/sjsonnet/StdRegex.scala index 18a355bb..0e38f498 100644 --- a/sjsonnet/src/sjsonnet/StdRegex.scala +++ b/sjsonnet/src/sjsonnet/StdRegex.scala @@ -4,66 +4,58 @@ import sjsonnet.Expr.Member.Visibility import sjsonnet.Val.Obj object StdRegex { + def regexPartialMatch(pos: Position, pattern: String, str: String): Val = { + val compiledPattern = Platform.getPatternFromCache(pattern) + val matcher = compiledPattern.matcher(str) + var returnStr: Val = null + val groupCount = matcher.groupCount() + val captures = Array.newBuilder[Val] + captures.sizeHint(groupCount) + + while (matcher.find()) { + if (returnStr == null) { + val m = matcher.group(0) + if (m != null) { + returnStr = Val.Str(pos.noOffset, matcher.group(0)) + } else { + returnStr = Val.Str(pos.noOffset, "") + } + } + for (i <- 1 to groupCount) { + val m = matcher.group(i) + if (m == null) { + captures += Val.Str(pos.noOffset, "") + } else { + captures += Val.Str(pos.noOffset, m) + } + } + } + if (returnStr == null) { + return Val.Null(pos.noOffset) + } + + val result = captures.result() + val namedCaptures = Platform.getNamedGroupsMap(compiledPattern).map { + case (k, v) => + k -> new Obj.ConstMember(true, Visibility.Normal, result(v - 1)) + }.toSeq + + Val.Obj.mk(pos.noOffset, + "string" -> new Obj.ConstMember(true, Visibility.Normal, returnStr), + "captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result)), + "namedCaptures" -> new Obj.ConstMember(true, Visibility.Normal, Val.Obj.mk(pos.noOffset, namedCaptures: _*)) + ) + } + def functions: Map[String, Val.Builtin] = Map( "regexPartialMatch" -> new Val.Builtin2("regexPartialMatch", "pattern", "str") { override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = { - val compiledPattern = Platform.getPatternFromCache(pattern.asString) - val matcher = compiledPattern.matcher(str.asString) - var returnStr: Val = null - val captures = Array.newBuilder[Val] - val groupCount = matcher.groupCount() - while (matcher.find()) { - if (returnStr == null) { - val m = matcher.group(0) - if (m != null) { - returnStr = Val.Str(pos.noOffset, matcher.group(0)) - } else { - returnStr = Val.Null(pos.noOffset) - } - } - for (i <- 1 to groupCount) { - val m = matcher.group(i) - if (m == null) { - captures += Val.Null(pos.noOffset) - } else { - captures += Val.Str(pos.noOffset, m) - } - } - } - val result = captures.result() - Val.Obj.mk(pos.noOffset, - "string" -> new Obj.ConstMember(true, Visibility.Normal, - if (returnStr == null) Val.Null(pos.noOffset) else returnStr), - "captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result)) - ) + regexPartialMatch(pos, pattern.asString, str.asString) } }, "regexFullMatch" -> new Val.Builtin2("regexFullMatch", "pattern", "str") { override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = { - val compiledPattern = Platform.getPatternFromCache(pattern.asString) - val matcher = compiledPattern.matcher(str.asString) - if (!matcher.matches()) { - Val.Obj.mk(pos.noOffset, - "string" -> new Obj.ConstMember(true, Visibility.Normal, Val.Null(pos.noOffset)), - "captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, Array.empty[Lazy])) - ) - } else { - val captures = Array.newBuilder[Val] - val groupCount = matcher.groupCount() - for (i <- 0 to groupCount) { - val m = matcher.group(i) - if (m == null) { - captures += Val.Null(pos.noOffset) - } else { - captures += Val.Str(pos.noOffset, m) - } - } - val result = captures.result() - Val.Obj.mk(pos.noOffset, - "string" -> new Obj.ConstMember(true, Visibility.Normal, result.head), - "captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result.drop(1))) - ) - } + regexPartialMatch(pos, s"^${pattern.asString}$$", str.asString) } }, "regexGlobalReplace" -> new Val.Builtin3("regexGlobalReplace", "str", "pattern", "to") { diff --git a/sjsonnet/test/src/sjsonnet/StdRegexTests.scala b/sjsonnet/test/src/sjsonnet/StdRegexTests.scala index 0f6e6bf9..0b000ba8 100644 --- a/sjsonnet/test/src/sjsonnet/StdRegexTests.scala +++ b/sjsonnet/test/src/sjsonnet/StdRegexTests.scala @@ -8,28 +8,36 @@ object StdRegexTests extends TestSuite { test("std.native - regex") { eval("""std.native("regexPartialMatch")("a(b)c", "cabc")""") ==> ujson.Obj( "string" -> "abc", - "captures" -> ujson.Arr("b") + "captures" -> ujson.Arr("b"), + "namedCaptures" -> ujson.Obj() ) - eval("""std.native("regexPartialMatch")("a(b)c", "def")""") ==> ujson.Obj( - "string" -> ujson.Null, - "captures" -> ujson.Arr() + eval("""std.native("regexPartialMatch")("a(?Pb)c", "cabc")""") ==> ujson.Obj( + "string" -> "abc", + "captures" -> ujson.Arr("b"), + "namedCaptures" -> ujson.Obj( + "foo" -> ujson.Str("b") + ) ) + eval("""std.native("regexPartialMatch")("a(b)c", "def")""") ==> ujson.Null eval("""std.native("regexPartialMatch")("a(b)c", "abcabc")""") ==> ujson.Obj( "string" -> "abc", - "captures" -> ujson.Arr("b", "b") + "captures" -> ujson.Arr("b", "b"), + "namedCaptures" -> ujson.Obj() ) eval("""std.native("regexFullMatch")("a(b)c", "abc")""") ==> ujson.Obj( "string" -> "abc", - "captures" -> ujson.Arr("b") - ) - eval("""std.native("regexFullMatch")("a(b)c", "cabc")""") ==> ujson.Obj( - "string" -> ujson.Null, - "captures" -> ujson.Arr() + "captures" -> ujson.Arr("b"), + "namedCaptures" -> ujson.Obj() ) - eval("""std.native("regexFullMatch")("a(b)c", "def")""") ==> ujson.Obj( - "string" -> ujson.Null, - "captures" -> ujson.Arr() + eval("""std.native("regexFullMatch")("a(?Pb)c", "abc")""") ==> ujson.Obj( + "string" -> "abc", + "captures" -> ujson.Arr("b"), + "namedCaptures" -> ujson.Obj( + "foo" -> ujson.Str("b") + ) ) + eval("""std.native("regexFullMatch")("a(b)c", "cabc")""") ==> ujson.Null + eval("""std.native("regexFullMatch")("a(b)c", "def")""") ==> ujson.Null eval("""std.native("regexGlobalReplace")("abcbbb", "b", "d")""") ==> ujson.Str("adcddd") eval("""std.native("regexReplace")("abcbbb", "b", "d")""") ==> ujson.Str("adcbbb") eval("""std.native("regexQuoteMeta")("a.b")""") ==> ujson.Str(Platform.regexQuote("a.b"))