Skip to content

Latest commit

 

History

History
206 lines (156 loc) · 7.02 KB

README.md

File metadata and controls

206 lines (156 loc) · 7.02 KB

Visual FA

Visual FA a Unicode enabled DFA regular expression engine

There's a series of articles on it starting here: https://www.codeproject.com/Articles/5375797/Visual-FA-Part-1-Understanding-Finite-Automata

License Notice

Portions of this code in the file FA.cs and FA.ToString.cs were derived from code created by other authors, and similarly licensed under MIT

Build Tools Notice

The two executables in the root folder are necessary for the build process. They are safe decompilable .NET assemblies

CSBrick - https://github.com/codewitch-honey-crisis/CSBrick

DSlang - https://github.com/codewitch-honey-crisis/Deslang

Performance

Microsoft Regex "Lexer": [■■■■■■■■■■] 100% Found 220000 matches in 33ms

Microsoft Regex compiled "Lexer": [■■■■■■■■■■] 100% Found 220000 matches in 20ms

FAStringRunner (proto): [■■■■■■■■■■] 100% Found 220000 matches in 7ms

FATextReaderRunner: (proto) [■■■■■■■■■■] 100% Found 220000 matches in 12ms

FAStringDfaTableRunner: [■■■■■■■■■■] 100% Found 220000 matches in 10ms

FATextReaderDfaTableRunner: [■■■■■■■■■■] 100% Found 220000 matches in 14ms

FAStringStateRunner (NFA): [■■■■■■■■■■] 100% Found 220000 matches in 144ms

FAStringStateRunner (Compact NFA): [■■■■■■■■■■] 100% Found 220000 matches in 42ms

FATextReaderStateRunner (Compact NFA): [■■■■■■■■■■] 100% Found 220000 matches in 47ms

FAStringStateRunner (DFA): [■■■■■■■■■■] 100% Found 220000 matches in 11ms

FATextReaderStateRunner (DFA): [■■■■■■■■■■] 100% Found 220000 matches in 15ms

FAStringRunner (Compiled): [■■■■■■■■■■] 100% Found 220000 matches in 7ms

FATextReaderRunner (Compiled): [■■■■■■■■■■] 100% Found 220000 matches in 12ms

Use

The C# source generation (requires C#9 or better)

partial class Tokenizers
{
    [FARule(@"\/\*", Symbol = "commentBlock", BlockEnd = @"\*\/")]
    [FARule(@"\/\/[^\n]*", Symbol = "lineComment")]
    [FARule(@"[ \t\r\n]+", Symbol = "whiteSpace")]
    [FARule(@"[A-Za-z_][A-Za-z0-9_]*", Symbol = "identifier")]
    [FARule(@"(0|([1-9][0-9]*))((\.[0-9]+[Ee]\-?[1-9][0-9]*)?|\.[0-9]+)", Symbol = "number")]
    [FARule(@"\+", Symbol = "plus")]
    [FARule(@"\-", Symbol = "minus")]
    [FARule(@"\*", Symbol = "multiply")]
    [FARule(@"\/", Symbol = "divide")]
    [FARule(@"%", Symbol = "modulo")]
    //internal static partial FATextReaderRunner CalcTextReaderRunner(TextReader text);
    internal static partial FAStringRunner CalcStringRunner(string text);
    //internal static partial FATextReaderDfaTableRunner CalcTextReaderTableRunner(TextReader text);
    //internal static partial FAStringDfaTableRunner CalcStringTableRunner(string text);
    static void Main(string[] args)
    {
        var exp = "the 10 quick brown #@%$! foxes jumped over 1.5 lazy dogs";
        foreach (var match in Tokenizers.Calc(exp))
        {
            Console.WriteLine(match);
        }
    }
}

The runtimes

string exp = @"[A-Z_a-z][A-Z_a-z0-9]*|0|\-?[1-9][0-9]*";
string text = "the quick brown fox jumped over the -10 lazy #@!*$ dog";
// lex a string
foreach (FAMatch match in FA.Parse(exp).Run(text))
{
	Console.WriteLine("{0} at {1}", match.Value, match.Position);
}
// *or* parse it into AST
RegexExpression ast = RegexExpression.Parse(exp);
// visit the AST
ast.Visit((parent, expr) => { Console.WriteLine(expr.GetType().Name +" "+ expr); return true; });
// turn it into a state machine
// (with expanded epsilons)
FA nfa = ast.ToFA(0,false);

// compact the expanded
// epsilons (if desired)
nfa.Compact();

// turn it into a DFA
FA dfa = nfa.ToDfa();

// *or* turn it into an optimized DFA
FA mdfa = nfa.ToMinimizedDfa();

// turn it back into a regular expression
Console.WriteLine(mdfa.ToString("e"));
// or do that, and reduce the expression
// (slower)
Console.WriteLine(mdfa.ToString("r"));

// FARunner has MatchNext()/Reset()
// and IEnumerable<FAMatch>

// If you reference FA.Compiler:

var compiledStr = mdfa.CompileString();
compiledStr.Set(text);
//or
var compiledRdr = mdfa.CompileTextReader();
compiledRdr.Set(text)
// to lex (as above)
foreach(FAMatch match in <compiledStr/compiledRdr>) {
	Console.WriteLine("{0}:{1}",match.Position,match.Value);
}


// for table DFA

int[] table = mdfa.ToArray();

// Note: You can create table NFAs as well
// but the following functions require a 
// DFA table

// to lex (as above)

foreach(FAMatch match in FA.Run(text,table)) {
	Console.WriteLine("{0}:{1}",match.Position,match.Value);
}

// to recreate an FA from an array
// (works with NFAs too)
FA newFA = FA.FromArray(table);

// on FAs if you have GraphViz installed
// from https://graphviz.org and in
// your PATH:

// can be dot/jpg/png/svg and more
newFA.RenderToFile("my_image.png");

using(Stream stream = nfa.RenderToStream("jpg")) {
	...
}

// If you reference FA.Generator and the CodeDom
// you can generate source code, potentially
// dependency free (when GenerateSharedCode is true)

var file = "MyLexer.cs";
using(var sw = new StreamWriter(file, false))
{					
	var ident = FA.Parse("[A-Z_a-z][0-9A-Z_a-z]*", 0, false);
	var num = FA.Parse("0|-?[1-9][0-9]*", 1, false);
	var ws = FA.Parse("[ ]+", 2, false);
	var commentStart = FA.Parse(@"\/\*", 3, false);
	var commentEnd = FA.Parse(@"\*\/", 3, false);
	var lexer = FA.ToLexer(new FA[] { ident, num, ws, commentStart }, true);
	var opts = new FAGeneratorOptions();
	opts.ClassName = "MyLexer";
	opts.Dependencies = FAGeneratorDependencies.GenerateSharedCode;
	opts.GenerateTables = true;
	opts.Symbols = new string[] { "ident","num","ws","comment" };
	var runner = lexer.Generate(new FA[] { null, null, null, commentEnd }, opts);
	var cprov = new CSharpCodeProvider();
	var copt = new CodeGeneratorOptions();
	cprov.GenerateCodeFromCompileUnit(runner, sw, copt);
}

Projects

  • VisualFA - The main project (.NET 6)
  • VisualFA.DNF - The main project (.NET Framework 4.8)
  • VisualFA.Compiler - The compiler (.NET 6)
  • VisualFA.Compiler.DNF - The compiler (.NET Framework 4.8)
  • VisualFA.Generator - The code generator (.NET 6)
  • VisualFA.Generator.DNF - The code generator (.NET Framework 4.8)
  • LexGen - A command line tool for generating regex lexer source code (.NET 6)
  • LexGen.DNF - The same tool (.NET Framework 4.8)
  • FsmExplorer - A visual app for stepping through regex state machines (.NET Framework 4.8, Windows only)
  • Benchmarks - Runs several benchmarks (.NET 7)
  • GeneratorDemo - Demonstrates source code generation
  • GeneratorDemoVB - Same as above but for VB.NET instead of C#
  • SimpleStuff - Various demonstrations
  • ArticleImages - Code I used to generate codeproject.com content
  • MinimizationStress - Just testing large state machine minimization