Skip to content

Commit

Permalink
[GR-52397] Fix empty-check issue in OracleDB flavor.
Browse files Browse the repository at this point in the history
PullRequest: graal/17157
  • Loading branch information
jirkamarsik committed Mar 3, 2024
2 parents 7e8c513 + a4628d0 commit 739a82c
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,14 @@ public void generatedTests() {
test("(a|())*?\\2", "", "a", 0, true, 0, 1, 1, 1, 1, 1);
test("(a*)+", "", "a", 0, true, 0, 1, 1, 1);
test("(\\1a|){2}", "", "aa", 0, true, 0, 0, 0, 0);
test("(|[ab]){3,3}b", "", "aab", 0, true, 0, 3, 2, 2);
test("(|[ab]){3}b", "", "aab", 0, true, 0, 3, 2, 2);
test("(|a){3}b", "", "aab", 0, true, 0, 3, 2, 2);
test("(|a){2}b", "", "ab", 0, true, 0, 2, 1, 1);
test("(|a){1}b", "", "b", 0, true, 0, 1, 0, 0);
test("(|a)b", "", "b", 0, true, 0, 1, 0, 0);
test("(|a)(|a)(|a)b", "", "aab", 0, true, 0, 3, 0, 0, 0, 1, 1, 2);
test("(|a)(|a)b", "", "ab", 0, true, 0, 2, 0, 0, 0, 1);
/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
}

Expand Down Expand Up @@ -945,4 +953,9 @@ public void nfaTraversalTests() {
test("()?*", "", "c", 0, true, 0, 0, 0, 0);
test("X(.?){8,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8);
}

@Test
public void gr52397() {
test("(|[ab]){3,3}b", "", "aab", 0, true, 0, 3, 2, 2);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ private static int transitionListIndexOfTarget(NFAStateTransition[] transitions,

private static boolean transitionListContainsTarget(NFAStateTransition[] transitions, NFAState target) {
for (NFAStateTransition t : transitions) {
if (t.getTarget() == target) {
if (t != null && t.getTarget() == target) {
return true;
}
}
Expand Down Expand Up @@ -373,7 +373,7 @@ public JsonValue toJson(boolean forward) {

@TruffleBoundary
private static JsonArray fwdEntryToJson(NFAStateTransition[] entryArray) {
return Json.array(Arrays.stream(entryArray).map(x -> Json.val(x.getTarget().getId())));
return Json.array(Arrays.stream(entryArray).map(x -> x == null ? Json.nullValue() : Json.val(x.getTarget().getId())));
}

@TruffleBoundary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -952,8 +952,7 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut
break;
case exitZeroWidth:
if (locals.getZeroWidthQuantifierGuardIndex(q) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q)) &&
(!q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) {
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q))) {
return false;
}
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@
package com.oracle.truffle.regex.tregex.parser;

import java.util.ArrayList;
import java.util.function.Supplier;

import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.regex.RegexFlags;
import com.oracle.truffle.regex.charset.CodePointSet;
import com.oracle.truffle.regex.charset.Constants;
import com.oracle.truffle.regex.tregex.TRegexOptions;
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
import com.oracle.truffle.regex.tregex.buffer.ObjectArrayBuffer;
import com.oracle.truffle.regex.tregex.parser.ast.BackReference;
import com.oracle.truffle.regex.tregex.parser.ast.CalcASTPropsVisitor;
import com.oracle.truffle.regex.tregex.parser.ast.CharacterClass;
Expand Down Expand Up @@ -87,7 +87,7 @@ public void prepareForDFA() {
}
OptimizeLookAroundsVisitor.optimizeLookArounds(ast, compilationBuffer);
if (properties.hasQuantifiers()) {
UnrollQuantifiersVisitor.unrollQuantifiers(ast, compilationBuffer);
UnrollQuantifiersVisitor.unrollQuantifiers(ast);
}
CalcASTPropsVisitor.run(ast, compilationBuffer);
ast.createPrefix();
Expand Down Expand Up @@ -135,13 +135,13 @@ private static final class UnrollQuantifiersVisitor extends DepthFirstTraversalR
private final ShouldUnrollQuantifierVisitor shouldUnrollVisitor = new ShouldUnrollQuantifierVisitor();
private final QuantifierExpander quantifierExpander;

private UnrollQuantifiersVisitor(RegexAST ast, CompilationBuffer compilationBuffer) {
private UnrollQuantifiersVisitor(RegexAST ast) {
this.ast = ast;
this.quantifierExpander = new QuantifierExpander(ast, compilationBuffer);
this.quantifierExpander = new QuantifierExpander(ast);
}

public static void unrollQuantifiers(RegexAST ast, CompilationBuffer compilationBuffer) {
new UnrollQuantifiersVisitor(ast, compilationBuffer).run(ast.getRoot());
public static void unrollQuantifiers(RegexAST ast) {
new UnrollQuantifiersVisitor(ast).run(ast.getRoot());
}

@Override
Expand Down Expand Up @@ -203,23 +203,25 @@ protected void visit(Group group) {
private static final class QuantifierExpander {

private final RegexAST ast;
private final CompilationBuffer compilationBuffer;
private final CopyVisitor copyVisitor;

private TermCopySupplier copySupplier;
private Group curGroup;
private Sequence curSequence;
private Term curTerm;

QuantifierExpander(RegexAST ast, CompilationBuffer compilationBuffer) {
QuantifierExpander(RegexAST ast) {
this.ast = ast;
this.compilationBuffer = compilationBuffer;
this.copyVisitor = new CopyVisitor(ast);
}

private void pushGroup() {
Group group = ast.createGroup();
curSequence.add(group);
curGroup = group;
curGroup = ast.createGroup();
curSequence.add(curGroup);
nextSequence();
}

private void replaceCurTermWithNewGroup() {
curGroup = ast.createGroup();
curSequence.replace(curTerm.getSeqIndex(), curGroup);
nextSequence();
}

Expand All @@ -239,36 +241,33 @@ private void addTerm(Term term) {
curTerm = term;
}

private void createOptionalBranch(QuantifiableTerm term, Token.Quantifier quantifier, boolean copy, boolean unroll, int recurse) {
addTerm(copy ? copyVisitor.copy(term) : term);
curTerm.setExpandedQuantifier(false);
private void createOptionalBranch(QuantifiableTerm term, Token.Quantifier quantifier, boolean unroll, int recurse) {
addTerm(copySupplier.get());
curTerm.setUnrolledQuantifer(false);
((QuantifiableTerm) curTerm).setQuantifier(null);
curTerm.setEmptyGuard(true);
createOptional(term, quantifier, true, unroll, recurse - 1);
createOptional(term, quantifier, unroll, recurse - 1);
}

private void createOptional(QuantifiableTerm term, Token.Quantifier quantifier, boolean copy, boolean unroll, int recurse) {
private void createOptional(QuantifiableTerm term, Token.Quantifier quantifier, boolean unroll, int recurse) {
if (recurse < 0) {
return;
}
if (copy) {
// the outermost group is already generated by expandQuantifier if copy == false
pushGroup();
}
pushGroup();
curGroup.setExpandedQuantifier(unroll);
curGroup.setQuantifier(quantifier);
if (term.isGroup()) {
curGroup.setEnclosedCaptureGroupsLow(term.asGroup().getCaptureGroupsLow());
curGroup.setEnclosedCaptureGroupsHigh(term.asGroup().getCaptureGroupsHigh());
}
if (quantifier.isGreedy()) {
createOptionalBranch(term, quantifier, copy, unroll, recurse);
createOptionalBranch(term, quantifier, unroll, recurse);
nextSequence();
curSequence.setExpandedQuantifier(true);
} else {
curSequence.setExpandedQuantifier(true);
nextSequence();
createOptionalBranch(term, quantifier, copy, unroll, recurse);
createOptionalBranch(term, quantifier, unroll, recurse);
}
popGroup();
}
Expand All @@ -277,35 +276,26 @@ private void expandQuantifier(QuantifiableTerm toExpand, boolean unroll) {
assert toExpand.hasNotUnrolledQuantifier();
Token.Quantifier quantifier = toExpand.getQuantifier();
assert !unroll || toExpand.isUnrollingCandidate();

copySupplier = new TermCopySupplier(ast, toExpand);
curTerm = toExpand;
curSequence = (Sequence) curTerm.getParent();
curGroup = curSequence.getParent();

ObjectArrayBuffer<Term> buf = compilationBuffer.getObjectBuffer1();
if (unroll && quantifier.getMin() > 0) {
// stash successors of toExpand to buffer
int size = curSequence.size();
for (int i = toExpand.getSeqIndex() + 1; i < size; i++) {
buf.add(curSequence.getLastTerm());
curSequence.removeLastTerm();
}
// replace the term to expand with a new wrapper group
replaceCurTermWithNewGroup();

// unroll mandatory part ( x{3} -> xxx )
if (unroll) {
// unroll non-optional part ( x{3} -> xxx )
toExpand.setExpandedQuantifier(true);
for (int i = 0; i < quantifier.getMin() - 1; i++) {
Term term = copyVisitor.copy(toExpand);
for (int i = 0; i < quantifier.getMin(); i++) {
Term term = copySupplier.get();
term.setExpandedQuantifier(true);
curSequence.add(term);
curTerm = term;
term.setUnrolledQuantifer(true);
addTerm(term);
}
} else {
assert !unroll || quantifier.getMin() == 0;
// replace the term to expand with a new wrapper group
curGroup = ast.createGroup();
curGroup.addSequence(ast);
curSequence.replace(toExpand.getSeqIndex(), curGroup);
curSequence = curGroup.getFirstAlternative();
curTerm = null;
}

// unroll optional part ( x{0,3} -> (x(x(x|)|)|) )
// In flavors like Python or Ruby, loops can be repeated past the point where the
// position in the string keeps advancing (i.e. we are matching at least one
Expand All @@ -314,14 +304,35 @@ private void expandQuantifier(QuantifiableTerm toExpand, boolean unroll) {
// iteration is run because there is no backtracking after failing the empty check.
// We can emulate this behavior by dropping empty guards in small bounded loops,
// such as is the case for unrolled loops.
createOptional(toExpand, quantifier, unroll && quantifier.getMin() > 0, unroll, !unroll || quantifier.isInfiniteLoop() ? 0 : (quantifier.getMax() - quantifier.getMin()) - 1);
createOptional(toExpand, quantifier, unroll, !unroll || quantifier.isInfiniteLoop() ? 0 : (quantifier.getMax() - quantifier.getMin()) - 1);
if (!unroll || quantifier.isInfiniteLoop()) {
((Group) curTerm).setLoop(true);
}
if (unroll && quantifier.getMin() > 0) {
// restore the stashed successors
for (int i = buf.length() - 1; i >= 0; i--) {
curSequence.add(buf.get(i));
}

/**
* This class implements a stateful closure that produces copies of a given term, but
* reuses the original term for the first copy produced.
*/
private static final class TermCopySupplier implements Supplier<Term> {

private final Term term;
private boolean firstCopyIssued;
private final CopyVisitor copyVisitor;

TermCopySupplier(RegexAST ast, Term term) {
this.term = term;
this.firstCopyIssued = false;
this.copyVisitor = new CopyVisitor(ast);
}

@Override
public Term get() {
if (!firstCopyIssued) {
firstCopyIssued = true;
return term;
} else {
return copyVisitor.copy(term);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,17 @@ public abstract class RegexASTNode implements JsonConvertible {
static final int FLAG_BACK_REFERENCE_IS_IGNORE_CASE = 1 << 9;
static final int FLAG_GROUP_LOOP = 1 << 10;
static final int FLAG_GROUP_EXPANDED_QUANTIFIER = 1 << 11;
static final int FLAG_GROUP_LOCAL_FLAGS = 1 << 12;
static final int FLAG_EMPTY_GUARD = 1 << 13;
static final int FLAG_LOOK_AROUND_NEGATED = 1 << 14;
static final int FLAG_HAS_LOOPS = 1 << 15;
static final int FLAG_HAS_CAPTURE_GROUPS = 1 << 16;
static final int FLAG_HAS_QUANTIFIERS = 1 << 17;
static final int FLAG_HAS_LOOK_BEHINDS = 1 << 18;
static final int FLAG_HAS_LOOK_AHEADS = 1 << 19;
static final int FLAG_HAS_BACK_REFERENCES = 1 << 20;
static final int FLAG_CHARACTER_CLASS_WAS_SINGLE_CHAR = 1 << 21;
static final int FLAG_GROUP_UNROLLED_QUANTIFIER = 1 << 12;
static final int FLAG_GROUP_LOCAL_FLAGS = 1 << 13;
static final int FLAG_EMPTY_GUARD = 1 << 14;
static final int FLAG_LOOK_AROUND_NEGATED = 1 << 15;
static final int FLAG_HAS_LOOPS = 1 << 16;
static final int FLAG_HAS_CAPTURE_GROUPS = 1 << 17;
static final int FLAG_HAS_QUANTIFIERS = 1 << 18;
static final int FLAG_HAS_LOOK_BEHINDS = 1 << 19;
static final int FLAG_HAS_LOOK_AHEADS = 1 << 20;
static final int FLAG_HAS_BACK_REFERENCES = 1 << 21;
static final int FLAG_CHARACTER_CLASS_WAS_SINGLE_CHAR = 1 << 22;

private int id = -1;
private RegexASTNode parent;
Expand Down Expand Up @@ -364,8 +365,8 @@ public void setHasBackReferences() {
* <li>A+? is expanded as A(|A)*
* <li>A? is expanded as (A|)
* <li>A?? is expanded as (|A)
* <li>A{2,4} is expanded as AA(A|)(A|)
* <li>A{2,4}? is expanded as AA(|A)(|A)
* <li>A{2,4} is expanded as AA(A(A|)|)
* <li>A{2,4}? is expanded as AA(|A(|A))
* </ul>
* where (X|Y) is a group with alternatives X and Y and (X|Y)* is a looping group with
* alternatives X and Y. In the examples above, all of the occurrences of A in the expansions as
Expand All @@ -385,6 +386,27 @@ public void setExpandedQuantifier(boolean expandedQuantifier) {
setFlag(FLAG_GROUP_EXPANDED_QUANTIFIER, expandedQuantifier);
}

/**
* Indicates whether this {@link RegexASTNode} represents a mandatory copy of a quantified term
* after unrolling.
*
* E.g., in the expansion of A{2,4}, which is AA(A(A|)|), the first two occurrences of A are
* marked with this flag.
*/
public boolean isUnrolledQuantifier() {
return isFlagSet(FLAG_GROUP_UNROLLED_QUANTIFIER);
}

/**
* Marks this {@link RegexASTNode} as being inserted into the AST as part of unrolling the
* mandatory part of a quantified term.
*
* @see #isUnrolledQuantifier()
*/
public void setUnrolledQuantifer(boolean unrolledQuantifer) {
setFlag(FLAG_GROUP_UNROLLED_QUANTIFIER, unrolledQuantifer);
}

public int getMinPath() {
return minPath;
}
Expand Down
Loading

0 comments on commit 739a82c

Please sign in to comment.