From 32a47849c34a246e1d83fcc5ad77d52d695f1a5e Mon Sep 17 00:00:00 2001 From: Jirka Marsik Date: Fri, 1 Mar 2024 11:50:11 +0100 Subject: [PATCH 1/5] Add tests with unsupported null-check behavior --- .../truffle/regex/tregex/test/OracleDBTests.java | 15 ++++++++++++++- .../tools/casefolding/src/main.rs | 10 +++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java index b5aa7ceb805a..abf278f69c47 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java @@ -48,7 +48,7 @@ public class OracleDBTests extends RegexTestBase { @Override String getEngineOptions() { - return "Flavor=OracleDB"; + return "Flavor=OracleDB,DumpAutomata=true"; } @Override @@ -918,6 +918,14 @@ public void generatedTests() { test("(a|())*?\\2", "", "a", 0, true, 0, 1, 1, 1, 1, 1); test("(a*)+", "", "a", 0, true, 0, 1, 1, 1); test("(\\1a|){2}", "", "aa", 0, true, 0, 0, 0, 0); + test("(|[ab]){3,3}b", "", "aab", 0, true, 0, 3, 2, 2); + test("(|[ab]){3}b", "", "aab", 0, true, 0, 3, 2, 2); + test("(|a){3}b", "", "aab", 0, true, 0, 3, 2, 2); + test("(|a){2}b", "", "ab", 0, true, 0, 2, 1, 1); + test("(|a){1}b", "", "b", 0, true, 0, 1, 0, 0); + test("(|a)b", "", "b", 0, true, 0, 1, 0, 0); + test("(|a)(|a)(|a)b", "", "aab", 0, true, 0, 3, 0, 0, 0, 1, 1, 2); + test("(|a)(|a)b", "", "ab", 0, true, 0, 2, 0, 0, 0, 1); /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ } @@ -945,4 +953,9 @@ public void nfaTraversalTests() { test("()?*", "", "c", 0, true, 0, 0, 0, 0); test("X(.?){8,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); } + + @Test + public void gr52397() { + test("(|[ab]){3,3}b", "", "aab", 0, true, 0, 3, 2, 2); + } } diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs index 04ef3acc8304..bc2f9a94dc86 100644 --- a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs @@ -2027,7 +2027,15 @@ fn oracledb_generate_tests() -> Result<()> { ("()??\\1", "", "a"), ("(a|())*?\\2", "", "a"), ("(a*)+", "", "a"), - ("(\\1a|){2}", "", "aa") + ("(\\1a|){2}", "", "aa"), + ("(|[ab]){3,3}b", "", "aab"), + ("(|[ab]){3}b", "", "aab"), + ("(|a){3}b", "", "aab"), + ("(|a){2}b", "", "ab"), + ("(|a){1}b", "", "b"), + ("(|a)b", "", "b"), + ("(|a)(|a)(|a)b", "", "aab"), + ("(|a)(|a)b", "", "ab") ] { let from_index = 1; let e_pattern = java_string_escape(pattern); From a9c650871187278b947ddca2a4c07ef743896c3b Mon Sep 17 00:00:00 2001 From: Jirka Marsik Date: Fri, 1 Mar 2024 11:50:44 +0100 Subject: [PATCH 2/5] Test empty-checks even before reaching needed no. of iterations --- .../tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java index 5ebb6d3a7305..fffa4208ac83 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java @@ -952,8 +952,7 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut break; case exitZeroWidth: if (locals.getZeroWidthQuantifierGuardIndex(q) == index && - (!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q)) && - (!q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) { + (!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q))) { return false; } break; From 53a28299d4335243300f36a4b0a0b2ce1a226006 Mon Sep 17 00:00:00 2001 From: Jirka Marsik Date: Fri, 1 Mar 2024 11:58:01 +0100 Subject: [PATCH 3/5] Fix NPEs when dumping regexes --- .../src/com/oracle/truffle/regex/tregex/nfa/NFA.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/NFA.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/NFA.java index a51f0b3b35ac..139afb21b935 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/NFA.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/NFA.java @@ -166,7 +166,7 @@ private static int transitionListIndexOfTarget(NFAStateTransition[] transitions, private static boolean transitionListContainsTarget(NFAStateTransition[] transitions, NFAState target) { for (NFAStateTransition t : transitions) { - if (t.getTarget() == target) { + if (t != null && t.getTarget() == target) { return true; } } @@ -373,7 +373,7 @@ public JsonValue toJson(boolean forward) { @TruffleBoundary private static JsonArray fwdEntryToJson(NFAStateTransition[] entryArray) { - return Json.array(Arrays.stream(entryArray).map(x -> Json.val(x.getTarget().getId()))); + return Json.array(Arrays.stream(entryArray).map(x -> x == null ? Json.nullValue() : Json.val(x.getTarget().getId()))); } @TruffleBoundary From 61e5903252fa588063d1a1c47b8d241613564b73 Mon Sep 17 00:00:00 2001 From: Jirka Marsik Date: Fri, 1 Mar 2024 12:23:44 +0100 Subject: [PATCH 4/5] Run empty-checks in mandatory part of unrolled quantifiers --- .../tregex/parser/RegexASTPostProcessor.java | 113 ++++++++++-------- .../regex/tregex/parser/ast/RegexASTNode.java | 46 +++++-- .../visitors/NFATraversalRegexASTVisitor.java | 35 ++++-- 3 files changed, 123 insertions(+), 71 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTPostProcessor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTPostProcessor.java index 74819467c3f7..a814bedeacde 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTPostProcessor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTPostProcessor.java @@ -41,6 +41,7 @@ package com.oracle.truffle.regex.tregex.parser; import java.util.ArrayList; +import java.util.function.Supplier; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.regex.RegexFlags; @@ -48,7 +49,6 @@ import com.oracle.truffle.regex.charset.Constants; import com.oracle.truffle.regex.tregex.TRegexOptions; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.buffer.ObjectArrayBuffer; import com.oracle.truffle.regex.tregex.parser.ast.BackReference; import com.oracle.truffle.regex.tregex.parser.ast.CalcASTPropsVisitor; import com.oracle.truffle.regex.tregex.parser.ast.CharacterClass; @@ -87,7 +87,7 @@ public void prepareForDFA() { } OptimizeLookAroundsVisitor.optimizeLookArounds(ast, compilationBuffer); if (properties.hasQuantifiers()) { - UnrollQuantifiersVisitor.unrollQuantifiers(ast, compilationBuffer); + UnrollQuantifiersVisitor.unrollQuantifiers(ast); } CalcASTPropsVisitor.run(ast, compilationBuffer); ast.createPrefix(); @@ -135,13 +135,13 @@ private static final class UnrollQuantifiersVisitor extends DepthFirstTraversalR private final ShouldUnrollQuantifierVisitor shouldUnrollVisitor = new ShouldUnrollQuantifierVisitor(); private final QuantifierExpander quantifierExpander; - private UnrollQuantifiersVisitor(RegexAST ast, CompilationBuffer compilationBuffer) { + private UnrollQuantifiersVisitor(RegexAST ast) { this.ast = ast; - this.quantifierExpander = new QuantifierExpander(ast, compilationBuffer); + this.quantifierExpander = new QuantifierExpander(ast); } - public static void unrollQuantifiers(RegexAST ast, CompilationBuffer compilationBuffer) { - new UnrollQuantifiersVisitor(ast, compilationBuffer).run(ast.getRoot()); + public static void unrollQuantifiers(RegexAST ast) { + new UnrollQuantifiersVisitor(ast).run(ast.getRoot()); } @Override @@ -203,23 +203,25 @@ protected void visit(Group group) { private static final class QuantifierExpander { private final RegexAST ast; - private final CompilationBuffer compilationBuffer; - private final CopyVisitor copyVisitor; + private TermCopySupplier copySupplier; private Group curGroup; private Sequence curSequence; private Term curTerm; - QuantifierExpander(RegexAST ast, CompilationBuffer compilationBuffer) { + QuantifierExpander(RegexAST ast) { this.ast = ast; - this.compilationBuffer = compilationBuffer; - this.copyVisitor = new CopyVisitor(ast); } private void pushGroup() { - Group group = ast.createGroup(); - curSequence.add(group); - curGroup = group; + curGroup = ast.createGroup(); + curSequence.add(curGroup); + nextSequence(); + } + + private void replaceCurTermWithNewGroup() { + curGroup = ast.createGroup(); + curSequence.replace(curTerm.getSeqIndex(), curGroup); nextSequence(); } @@ -239,22 +241,19 @@ private void addTerm(Term term) { curTerm = term; } - private void createOptionalBranch(QuantifiableTerm term, Token.Quantifier quantifier, boolean copy, boolean unroll, int recurse) { - addTerm(copy ? copyVisitor.copy(term) : term); - curTerm.setExpandedQuantifier(false); + private void createOptionalBranch(QuantifiableTerm term, Token.Quantifier quantifier, boolean unroll, int recurse) { + addTerm(copySupplier.get()); + curTerm.setUnrolledQuantifer(false); ((QuantifiableTerm) curTerm).setQuantifier(null); curTerm.setEmptyGuard(true); - createOptional(term, quantifier, true, unroll, recurse - 1); + createOptional(term, quantifier, unroll, recurse - 1); } - private void createOptional(QuantifiableTerm term, Token.Quantifier quantifier, boolean copy, boolean unroll, int recurse) { + private void createOptional(QuantifiableTerm term, Token.Quantifier quantifier, boolean unroll, int recurse) { if (recurse < 0) { return; } - if (copy) { - // the outermost group is already generated by expandQuantifier if copy == false - pushGroup(); - } + pushGroup(); curGroup.setExpandedQuantifier(unroll); curGroup.setQuantifier(quantifier); if (term.isGroup()) { @@ -262,13 +261,13 @@ private void createOptional(QuantifiableTerm term, Token.Quantifier quantifier, curGroup.setEnclosedCaptureGroupsHigh(term.asGroup().getCaptureGroupsHigh()); } if (quantifier.isGreedy()) { - createOptionalBranch(term, quantifier, copy, unroll, recurse); + createOptionalBranch(term, quantifier, unroll, recurse); nextSequence(); curSequence.setExpandedQuantifier(true); } else { curSequence.setExpandedQuantifier(true); nextSequence(); - createOptionalBranch(term, quantifier, copy, unroll, recurse); + createOptionalBranch(term, quantifier, unroll, recurse); } popGroup(); } @@ -277,35 +276,26 @@ private void expandQuantifier(QuantifiableTerm toExpand, boolean unroll) { assert toExpand.hasNotUnrolledQuantifier(); Token.Quantifier quantifier = toExpand.getQuantifier(); assert !unroll || toExpand.isUnrollingCandidate(); + + copySupplier = new TermCopySupplier(ast, toExpand); curTerm = toExpand; curSequence = (Sequence) curTerm.getParent(); curGroup = curSequence.getParent(); - ObjectArrayBuffer buf = compilationBuffer.getObjectBuffer1(); - if (unroll && quantifier.getMin() > 0) { - // stash successors of toExpand to buffer - int size = curSequence.size(); - for (int i = toExpand.getSeqIndex() + 1; i < size; i++) { - buf.add(curSequence.getLastTerm()); - curSequence.removeLastTerm(); - } + // replace the term to expand with a new wrapper group + replaceCurTermWithNewGroup(); + + // unroll mandatory part ( x{3} -> xxx ) + if (unroll) { // unroll non-optional part ( x{3} -> xxx ) - toExpand.setExpandedQuantifier(true); - for (int i = 0; i < quantifier.getMin() - 1; i++) { - Term term = copyVisitor.copy(toExpand); + for (int i = 0; i < quantifier.getMin(); i++) { + Term term = copySupplier.get(); term.setExpandedQuantifier(true); - curSequence.add(term); - curTerm = term; + term.setUnrolledQuantifer(true); + addTerm(term); } - } else { - assert !unroll || quantifier.getMin() == 0; - // replace the term to expand with a new wrapper group - curGroup = ast.createGroup(); - curGroup.addSequence(ast); - curSequence.replace(toExpand.getSeqIndex(), curGroup); - curSequence = curGroup.getFirstAlternative(); - curTerm = null; } + // unroll optional part ( x{0,3} -> (x(x(x|)|)|) ) // In flavors like Python or Ruby, loops can be repeated past the point where the // position in the string keeps advancing (i.e. we are matching at least one @@ -314,14 +304,35 @@ private void expandQuantifier(QuantifiableTerm toExpand, boolean unroll) { // iteration is run because there is no backtracking after failing the empty check. // We can emulate this behavior by dropping empty guards in small bounded loops, // such as is the case for unrolled loops. - createOptional(toExpand, quantifier, unroll && quantifier.getMin() > 0, unroll, !unroll || quantifier.isInfiniteLoop() ? 0 : (quantifier.getMax() - quantifier.getMin()) - 1); + createOptional(toExpand, quantifier, unroll, !unroll || quantifier.isInfiniteLoop() ? 0 : (quantifier.getMax() - quantifier.getMin()) - 1); if (!unroll || quantifier.isInfiniteLoop()) { ((Group) curTerm).setLoop(true); } - if (unroll && quantifier.getMin() > 0) { - // restore the stashed successors - for (int i = buf.length() - 1; i >= 0; i--) { - curSequence.add(buf.get(i)); + } + + /** + * This class implements a stateful closure that produces copies of a given term, but + * reuses the original term for the first copy produced. + */ + private static final class TermCopySupplier implements Supplier { + + private final Term term; + private boolean firstCopyIssued; + private final CopyVisitor copyVisitor; + + TermCopySupplier(RegexAST ast, Term term) { + this.term = term; + this.firstCopyIssued = false; + this.copyVisitor = new CopyVisitor(ast); + } + + @Override + public Term get() { + if (!firstCopyIssued) { + firstCopyIssued = true; + return term; + } else { + return copyVisitor.copy(term); } } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexASTNode.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexASTNode.java index 9f1213a5ad40..fb82920a0c58 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexASTNode.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexASTNode.java @@ -63,16 +63,17 @@ public abstract class RegexASTNode implements JsonConvertible { static final int FLAG_BACK_REFERENCE_IS_IGNORE_CASE = 1 << 9; static final int FLAG_GROUP_LOOP = 1 << 10; static final int FLAG_GROUP_EXPANDED_QUANTIFIER = 1 << 11; - static final int FLAG_GROUP_LOCAL_FLAGS = 1 << 12; - static final int FLAG_EMPTY_GUARD = 1 << 13; - static final int FLAG_LOOK_AROUND_NEGATED = 1 << 14; - static final int FLAG_HAS_LOOPS = 1 << 15; - static final int FLAG_HAS_CAPTURE_GROUPS = 1 << 16; - static final int FLAG_HAS_QUANTIFIERS = 1 << 17; - static final int FLAG_HAS_LOOK_BEHINDS = 1 << 18; - static final int FLAG_HAS_LOOK_AHEADS = 1 << 19; - static final int FLAG_HAS_BACK_REFERENCES = 1 << 20; - static final int FLAG_CHARACTER_CLASS_WAS_SINGLE_CHAR = 1 << 21; + static final int FLAG_GROUP_UNROLLED_QUANTIFIER = 1 << 12; + static final int FLAG_GROUP_LOCAL_FLAGS = 1 << 13; + static final int FLAG_EMPTY_GUARD = 1 << 14; + static final int FLAG_LOOK_AROUND_NEGATED = 1 << 15; + static final int FLAG_HAS_LOOPS = 1 << 16; + static final int FLAG_HAS_CAPTURE_GROUPS = 1 << 17; + static final int FLAG_HAS_QUANTIFIERS = 1 << 18; + static final int FLAG_HAS_LOOK_BEHINDS = 1 << 19; + static final int FLAG_HAS_LOOK_AHEADS = 1 << 20; + static final int FLAG_HAS_BACK_REFERENCES = 1 << 21; + static final int FLAG_CHARACTER_CLASS_WAS_SINGLE_CHAR = 1 << 22; private int id = -1; private RegexASTNode parent; @@ -364,8 +365,8 @@ public void setHasBackReferences() { *
  • A+? is expanded as A(|A)* *
  • A? is expanded as (A|) *
  • A?? is expanded as (|A) - *
  • A{2,4} is expanded as AA(A|)(A|) - *
  • A{2,4}? is expanded as AA(|A)(|A) + *
  • A{2,4} is expanded as AA(A(A|)|) + *
  • A{2,4}? is expanded as AA(|A(|A)) * * where (X|Y) is a group with alternatives X and Y and (X|Y)* is a looping group with * alternatives X and Y. In the examples above, all of the occurrences of A in the expansions as @@ -385,6 +386,27 @@ public void setExpandedQuantifier(boolean expandedQuantifier) { setFlag(FLAG_GROUP_EXPANDED_QUANTIFIER, expandedQuantifier); } + /** + * Indicates whether this {@link RegexASTNode} represents a mandatory copy of a quantified term + * after unrolling. + * + * E.g., in the expansion of A{2,4}, which is AA(A(A|)|), the first two occurrences of A are + * marked with this flag. + */ + public boolean isUnrolledQuantifier() { + return isFlagSet(FLAG_GROUP_UNROLLED_QUANTIFIER); + } + + /** + * Marks this {@link RegexASTNode} as being inserted into the AST as part of unrolling the + * mandatory part of a quantified term. + * + * @see #isUnrolledQuantifier() + */ + public void setUnrolledQuantifer(boolean unrolledQuantifer) { + setFlag(FLAG_GROUP_UNROLLED_QUANTIFIER, unrolledQuantifer); + } + public int getMinPath() { return minPath; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/NFATraversalRegexASTVisitor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/NFATraversalRegexASTVisitor.java index 5039d53d8f63..78f984233ffb 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/NFATraversalRegexASTVisitor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/NFATraversalRegexASTVisitor.java @@ -632,8 +632,7 @@ private boolean retreat() { } insideEmptyGuardGroup.remove(group); } - } else if (ast.getOptions().getFlavor().failingEmptyChecksDontBacktrack() && pathIsGroupExit(lastVisited) && group.hasQuantifier() && group.getQuantifier().hasZeroWidthIndex() && - (group.getFirstAlternative().isExpandedQuantifier() || group.getLastAlternative().isExpandedQuantifier())) { + } else if (ast.getOptions().getFlavor().failingEmptyChecksDontBacktrack() && pathIsGroupExit(lastVisited) && isZeroWidthGroup(group)) { // In Ruby, Python and OracleDB, when we finish an iteration of a loop, there is // an empty check. If we pass the empty check, we return to the beginning of the // loop where we get to make a non-deterministic choice whether we want to start @@ -649,7 +648,10 @@ private boolean retreat() { if (shouldRetreat) { return retreat(); } - return advanceTerm(group); + // When we expand quantifiers, we wrap them in a group. This lets us escape past + // the expansion of the quantifier even in cases when we are in the mandatory + // prefix (e.g. empty-check fails in the first A in (AA(A(A|)|))). + return advanceTerm(group.getParent().getParent().asGroup()); } else { if (pathIsGroupExit(lastVisited)) { popGroupExit(group); @@ -818,6 +820,15 @@ private static QuantifierGuard getConditionalBackReferenceGroupQuantifierGuard(G } } + /** + * Tests whether {@code group} is a group that has an empty-check implemented via + * enterZeroWidth/exitZeroWidth. + */ + private boolean isZeroWidthGroup(Group group) { + return group.hasQuantifier() && group.getQuantifier().hasZeroWidthIndex() && ((ast.getOptions().getFlavor().failingEmptyChecksDontBacktrack() && group.isUnrolledQuantifier()) || + group.getFirstAlternative().isExpandedQuantifier() || group.getLastAlternative().isExpandedQuantifier()); + } + /// Pushing and popping group elements to and from the path private void pushGroupEnter(Group group, int groupAltIndex) { curPath.add(createPathElement(group) | (groupAltIndex << PATH_GROUP_ALT_INDEX_OFFSET) | PATH_GROUP_ACTION_ENTER); @@ -841,7 +852,7 @@ private void pushGroupEnter(Group group, int groupAltIndex) { pushQuantifierGuard(QuantifierGuard.createEnter(quantifier)); } } - if (quantifier.hasZeroWidthIndex() && (group.getFirstAlternative().isExpandedQuantifier() || group.getLastAlternative().isExpandedQuantifier())) { + if (isZeroWidthGroup(group)) { pushQuantifierGuard(QuantifierGuard.createEnterZeroWidth(quantifier)); } } @@ -891,9 +902,9 @@ private void pushGroupExit(Group group) { if (quantifier.hasIndex()) { quantifierGuardsLoop[quantifier.getIndex()]++; } - if (quantifier.hasZeroWidthIndex() && (group.getFirstAlternative().isExpandedQuantifier() || group.getLastAlternative().isExpandedQuantifier())) { - // exitZeroWidth quantifier guards are only used in flavors which can have empty - // loop iterations. Otherwise, checkEmptyMatch is used. + if (isZeroWidthGroup(group)) { + // exitZeroWidth quantifier guards are primarily used in flavors which can have + // empty loop iterations. Otherwise, checkEmptyMatch is used. if (ast.getOptions().getFlavor().canHaveEmptyLoopIterations() || !root.isCharacterClass()) { pushQuantifierGuard(QuantifierGuard.createExitZeroWidth(quantifier)); } @@ -981,6 +992,13 @@ private void switchExitToEscape(Group group) { private void pushGroupEscape(Group group) { curPath.add(createPathElement(group) | PATH_GROUP_ACTION_ESCAPE); + // Capture groups + if (group.isCapturing()) { + captureGroupUpdate(forward ? group.getBoundaryIndexEnd() : group.getBoundaryIndexStart()); + if (ast.getOptions().getFlavor().usesLastGroupResultField() && group.getGroupNumber() != 0) { + lastGroupUpdate(group.getGroupNumber()); + } + } // Quantifier guards if (useQuantifierGuards()) { if (group.hasQuantifier()) { @@ -988,7 +1006,7 @@ private void pushGroupEscape(Group group) { if (quantifier.hasIndex()) { quantifierGuardsExited[quantifier.getIndex()]++; } - if (quantifier.hasZeroWidthIndex() && (group.getFirstAlternative().isExpandedQuantifier() || group.getLastAlternative().isExpandedQuantifier())) { + if (isZeroWidthGroup(group)) { pushQuantifierGuard(QuantifierGuard.createEscapeZeroWidth(quantifier)); } } @@ -1010,6 +1028,7 @@ private void popGroupEscape(Group group) { } } } + popCaptureGroupEvents(); } /// Capture group data handling From a4628d086ee90a4b7b7627ab69f9b04aee9e5e14 Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Fri, 1 Mar 2024 12:55:56 +0000 Subject: [PATCH 5/5] Drop DumpAutomata mode in OracleDBTests --- .../src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java index abf278f69c47..6e21398a2e04 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java @@ -48,7 +48,7 @@ public class OracleDBTests extends RegexTestBase { @Override String getEngineOptions() { - return "Flavor=OracleDB,DumpAutomata=true"; + return "Flavor=OracleDB"; } @Override