Skip to content

Commit

Permalink
refactor: simplify encoders (#88)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdjastrzebski authored May 6, 2024
1 parent ea35d60 commit 7252a7e
Show file tree
Hide file tree
Showing 28 changed files with 265 additions and 521 deletions.
7 changes: 3 additions & 4 deletions src/builders.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { RegexFlags, RegexSequence } from './types';
import { encodeSequence } from './encoder/encoder';
import { ensureArray } from './utils/elements';
import { encode } from './encoder';

/**
* Generate RegExp object from elements with optional flags.
Expand All @@ -10,7 +9,7 @@ import { ensureArray } from './utils/elements';
* @returns RegExp object
*/
export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp {
const pattern = encodeSequence(ensureArray(sequence)).pattern;
const pattern = encode(sequence).pattern;
const flagsString = encodeFlags(flags ?? {});
return new RegExp(pattern, flagsString);
}
Expand All @@ -21,7 +20,7 @@ export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp
* @returns regex pattern string
*/
export function buildPattern(sequence: RegexSequence): string {
return encodeSequence(ensureArray(sequence)).pattern;
return encode(sequence).pattern;
}

function encodeFlags(flags: RegexFlags): string {
Expand Down
17 changes: 7 additions & 10 deletions src/constructs/__tests__/char-class.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ test('`charClass` joins character escapes', () => {
expect(charClass(word, nonDigit)).toEqualRegex(/[\w\D]/);
});

test('`charClass` throws on empty text', () => {
expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(
`"\`charClass\` should receive at least one element"`,
);
});

test('`charRange` pattern', () => {
expect(charRange('a', 'z')).toEqualRegex(/[a-z]/);
expect(['x', charRange('0', '9')]).toEqualRegex(/x[0-9]/);
Expand Down Expand Up @@ -115,16 +121,7 @@ test('`negated` character class matching', () => {
});

test('`encodeCharacterClass` throws on empty text', () => {
expect(() =>
buildRegExp(
// @ts-expect-error
negated({
type: 'characterClass',
chars: [],
ranges: [],
}),
),
).toThrowErrorMatchingInlineSnapshot(
expect(() => buildRegExp(negated({ chars: [], ranges: [] }))).toThrowErrorMatchingInlineSnapshot(
`"Character class should contain at least one character or character range"`,
);
});
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ test('`buildRegExp` throws error on unknown element', () => {
expect(() =>
// @ts-expect-error intentionally passing incorrect object
buildRegExp({ type: 'unknown' }),
).toThrowErrorMatchingInlineSnapshot(`"\`encodeNode\`: unknown element type unknown"`);
).toThrowErrorMatchingInlineSnapshot(`
"\`encodeElement\`: unknown element: {
"type": "unknown"
}"
`);
});

test('`buildPattern` throws on empty text', () => {
Expand Down
43 changes: 13 additions & 30 deletions src/constructs/anchors.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,26 @@
import type { EncodeResult } from '../encoder/types';
import type { RegexConstruct } from '../types';
import type { EncodedRegex } from '../types';

export interface Anchor extends RegexConstruct {
type: 'anchor';
symbol: string;
}

export const startOfString: Anchor = {
type: 'anchor',
symbol: '^',
encode: encodeAnchor,
export const startOfString: EncodedRegex = {
precedence: 'atom',
pattern: '^',
};

export const endOfString: Anchor = {
type: 'anchor',
symbol: '$',
encode: encodeAnchor,
export const endOfString: EncodedRegex = {
precedence: 'atom',
pattern: '$',
};

export const wordBoundary: Anchor = {
type: 'anchor',
symbol: '\\b',
encode: encodeAnchor,
export const wordBoundary: EncodedRegex = {
precedence: 'atom',
pattern: '\\b',
};

export const nonWordBoundary: Anchor = {
type: 'anchor',
symbol: '\\B',
encode: encodeAnchor,
export const nonWordBoundary: EncodedRegex = {
precedence: 'atom',
pattern: '\\B',
};

/**
* @deprecated Renamed to `nonWordBoundary`.
*/
export const notWordBoundary = nonWordBoundary;

function encodeAnchor(this: Anchor): EncodeResult {
return {
precedence: 'sequence',
pattern: this.symbol,
};
}
57 changes: 16 additions & 41 deletions src/constructs/capture.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import { encodeSequence } from '../encoder/encoder';
import type { EncodeResult } from '../encoder/types';
import { ensureArray } from '../utils/elements';
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';

export interface Capture extends RegexConstruct {
type: 'capture';
children: RegexElement[];
options?: CaptureOptions;
}
import { encode } from '../encoder';
import type { EncodedRegex, RegexSequence } from '../types';

export type CaptureOptions = {
/**
Expand All @@ -16,8 +8,7 @@ export type CaptureOptions = {
name?: string;
};

export interface Reference extends RegexConstruct {
type: 'reference';
export interface Reference extends EncodedRegex {
name: string;
}

Expand All @@ -26,12 +17,18 @@ export interface Reference extends RegexConstruct {
* - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`)
* - in the regex itself, through {@link ref}
*/
export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture {
export function capture(sequence: RegexSequence, options?: CaptureOptions): EncodedRegex {
const name = options?.name;
if (name) {
return {
precedence: 'atom',
pattern: `(?<${name}>${encode(sequence).pattern})`,
};
}

return {
type: 'capture',
children: ensureArray(sequence),
options,
encode: encodeCapture,
precedence: 'atom',
pattern: `(${encode(sequence).pattern})`,
};
}

Expand All @@ -45,31 +42,9 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capt
* @param name - Name of the capturing group to reference.
*/
export function ref(name: string): Reference {
return {
type: 'reference',
name,
encode: encodeReference,
};
}

function encodeCapture(this: Capture): EncodeResult {
const name = this.options?.name;
if (name) {
return {
precedence: 'atom',
pattern: `(?<${name}>${encodeSequence(this.children).pattern})`,
};
}

return {
precedence: 'atom',
pattern: `(${encodeSequence(this.children).pattern})`,
};
}

function encodeReference(this: Reference): EncodeResult {
return {
precedence: 'atom',
pattern: `\\k<${this.name}>`,
pattern: `\\k<${name}>`,
name,
};
}
59 changes: 8 additions & 51 deletions src/constructs/char-class.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,14 @@
import type { EncodeResult } from '../encoder/types';
import type { RegexConstruct } from '../types';
import type { CharacterEscape } from './char-escape';

/**
* Character range from start to end (inclusive).
*/
export interface CharacterRange {
start: string;
end: string;
}

export interface CharacterClass extends RegexConstruct {
type: 'characterClass';
chars: string[];
ranges?: CharacterRange[];
}
import { encodeCharClass } from '../encoder';
import type { CharacterClass, CharacterEscape, EncodedRegex } from '../types';

export function charClass(...elements: Array<CharacterClass | CharacterEscape>): CharacterClass {
if (!elements.length) {
throw new Error('`charClass` should receive at least one element');
}

return {
type: 'characterClass',
chars: elements.map((c) => c.chars).flat(),
ranges: elements.map((c) => c.ranges ?? []).flat(),
encode: encodeCharacterClass,
};
}

Expand All @@ -39,10 +26,8 @@ export function charRange(start: string, end: string): CharacterClass {
}

return {
type: 'characterClass',
chars: [],
ranges: [{ start, end }],
encode: encodeCharacterClass,
};
}

Expand All @@ -54,47 +39,19 @@ export function anyOf(characters: string): CharacterClass {
}

return {
type: 'characterClass',
chars,
encode: encodeCharacterClass,
};
}

export function negated(element: CharacterClass | CharacterEscape): EncodeResult {
return encodeCharacterClass.call(element, true);
export function negated(element: CharacterClass | CharacterEscape): EncodedRegex {
return encodeCharClass(element, true);
}

/**
* @deprecated Renamed to `negated`.
*/
export const inverted = negated;

export function encodeCharacterClass(
this: CharacterClass | CharacterEscape,
isNegated?: boolean,
): EncodeResult {
if (!this.chars.length && !this.ranges?.length) {
throw new Error('Character class should contain at least one character or character range');
}

// If passed characters includes hyphen (`-`) it need to be moved to
// first (or last) place in order to treat it as hyphen character and not a range.
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
const hyphen = this.chars.includes('-') ? '-' : '';
const caret = this.chars.includes('^') ? '^' : '';
const otherChars = this.chars.filter((c) => c !== '-' && c !== '^').join('');
const ranges = this.ranges?.map(({ start, end }) => `${start}-${end}`).join('') ?? '';
const negation = isNegated ? '^' : '';

let pattern = `[${negation}${ranges}${otherChars}${caret}${hyphen}]`;
if (pattern === '[^-]') pattern = '[\\^-]';

return {
precedence: 'atom',
pattern,
};
}

function escapeForCharacterClass(text: string): string {
return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string
}
18 changes: 2 additions & 16 deletions src/constructs/char-escape.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
import type { EncodeResult } from '../encoder/types';

export interface CharacterEscape extends EncodeResult {
kind: 'escape';

// `CharacterClass` compatibility
chars: string[];
ranges?: never;
}
import type { CharacterEscape, EncodedRegex } from '../types';

/**
* Matches any single character.
* Specifically this one is NOT a character escape.
*/
export const any: EncodeResult = {
export const any: EncodedRegex = {
precedence: 'atom',
pattern: '.',
};
Expand All @@ -21,42 +13,36 @@ export const digit: CharacterEscape = {
precedence: 'atom',
pattern: '\\d',
chars: ['\\d'],
kind: 'escape',
};

export const nonDigit: CharacterEscape = {
precedence: 'atom',
pattern: '\\D',
chars: ['\\D'],
kind: 'escape',
};

export const word: CharacterEscape = {
precedence: 'atom',
pattern: '\\w',
chars: ['\\w'],
kind: 'escape',
};

export const nonWord: CharacterEscape = {
precedence: 'atom',
pattern: '\\W',
chars: ['\\W'],
kind: 'escape',
};

export const whitespace: CharacterEscape = {
precedence: 'atom',
pattern: '\\s',
chars: ['\\s'],
kind: 'escape',
};

export const nonWhitespace: CharacterEscape = {
precedence: 'atom',
pattern: '\\S',
chars: ['\\S'],
kind: 'escape',
};

/**
Expand Down
Loading

0 comments on commit 7252a7e

Please sign in to comment.