Skip to content

Commit

Permalink
Absent fn tests (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Jan 21, 2025
1 parent 5f0d78d commit 9789dc6
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ The table above doesn't include all aspects that Oniguruma-To-ES emulates (inclu
3. Target `ES2018` doesn't support nested *negated* character classes.
4. It's not an error for *numbered* backreferences to come before their referenced group in Oniguruma, but an error is the best path for Oniguruma-To-ES because ① most placements are mistakes and can never match (based on the Oniguruma behavior for backreferences to nonparticipating groups), ② erroring matches the behavior of named backreferences, and ③ the edge cases where they're matchable rely on rules for backreference resetting within quantified groups that are different in JavaScript and aren't emulatable. Note that it's not a backreference in the first place if using `\10` or higher and not as many capturing groups are defined to the left (it's an octal or identity escape).
5. Oniguruma's recursion depth limit is `20`. Oniguruma-To-ES uses the same limit by default but allows customizing it via the `rules.recursionLimit` option. Two rare uses of recursion aren't yet supported: overlapping recursions, and use of backreferences when a recursed subpattern contains captures. Patterns that would trigger an infinite recursion error in Oniguruma might find a match in Oniguruma-To-ES (since recursion is bounded), but future versions will detect this and error at transpilation time.
6. Exotic (and extremely rare) forms of absent functions that start with `(?~|` (absent expressions, stoppers, and clearers) aren't yet supported.
6. Exotic (and extremely rare) forms of absent functions that start with `(?~|` (absent expressions, stoppers, and clearers) aren't yet supported. Also note that Oniguruma's absent functions have different behavior than Onigmo.

## ❌ Unsupported features

Expand Down
51 changes: 51 additions & 0 deletions spec/match-absent-fn.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import {toDetails, toRegExp} from '../dist/esm/index.js';
import {r} from '../src/utils.js';

describe('AbsentFunction', () => {
describe('absent repeater', () => {
it('should match any input not matched by absent', () => {
expect('abc'.match(toRegExp('(?~ab)', {global: true}))).toEqual(['', 'bc', '']);
expect('abc'.match(toRegExp('(?~)', {global: true}))).toEqual(['', '', '', '']);
expect('abc'.match(toRegExp('(?~a|b)', {global: true}))).toEqual(['', '', 'c', '']);
});

it('should not match atomically', () => {
expect('abc'.match(toRegExp('(?~ab).', {global: true}))).toEqual(['a', 'bc']);
});

it('should allow quantification', () => {
expect('abc'.match(toRegExp('(?~ab)?.', {global: true}))).toEqual(['a', 'bc']);
expect('abc'.match(toRegExp('(?~ab)??.', {global: true}))).toEqual(['a', 'b', 'c']);
expect('abc'.match(toRegExp('(?~ab)?+.', {global: true}))).toEqual(['a']);
});

it('should throw for nested absent repeaters', () => {
expect(() => toDetails('(?~(?~))')).toThrow();
expect(() => toDetails('(?~a(?~))')).toThrow();
expect(() => toDetails('(?~(?~a))')).toThrow();
expect(() => toDetails('(?~a(?~b))')).toThrow();
});
});

describe('absent expression', () => {
// Not supported
it('should throw', () => {
expect(() => toDetails(r`(?~|abc|\O*)`)).toThrow();
});
});

describe('absent stopper', () => {
// Not supported
it('should throw', () => {
expect(() => toDetails('(?~|abc)')).toThrow();
});
});

describe('absent clearer', () => {
// Not supported
it('should throw', () => {
expect(() => toDetails('(?~|)')).toThrow();
expect(() => toDetails('(?~|abc)(?~|)')).toThrow();
});
});
});
4 changes: 2 additions & 2 deletions src/generate.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ function generate(ast, options) {
case AstTypes.Recursion:
return genRecursion(node, state);
default:
// Node types `Directive`, `Subroutine`, and `VariableLengthCharacterSet` are never
// included in transformer output
// Node types `AbsentFunction`, `Directive`, `Subroutine`, and `VariableLengthCharacterSet`
// are never included in transformer output
throw new Error(`Unexpected node type "${node.type}"`);
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,9 @@ const FirstPassVisitor = {
}
} else if (kind === AstDirectiveKinds.keep) {
const firstAltFirstEl = ast.pattern.alternatives[0].elements[0];
// Supporting a full-pattern wrapper around `\K` enables use with flag modifiers
const hasWrapperGroup =
// Not emulatable if within a `CapturingGroup`
hasOnlyChild(ast.pattern, kid => kid.type === AstTypes.Group) &&
firstAltFirstEl.alternatives.length === 1;
const topLevel = hasWrapperGroup ? firstAltFirstEl : ast.pattern;
Expand Down
7 changes: 5 additions & 2 deletions src/utils-ast.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ function isAlwaysNonZeroLength(node) {
}

// Consumptive groups add to the match.
// - Includes: capturing, named capturing, noncapturing, atomic, and flag groups
// - Excludes: lookarounds
// - Includes: Capturing, named capturing, noncapturing, atomic, and flag groups.
// - Excludes: Lookarounds.
// - Special case: Absent functions are consumptive (and negated, quantified) but are different
// in other ways so are excluded here.
// See also `AstTypeAliases.AnyGroup`.
function isConsumptiveGroup({type}) {
return type === AstTypes.CapturingGroup || type === AstTypes.Group;
}
Expand Down

0 comments on commit 9789dc6

Please sign in to comment.