-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexamples.py
156 lines (118 loc) · 3.81 KB
/
examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import re
import humre
import ke
import simplematch
from bourbaki.regex import ANYCHAR
from bourbaki.regex import C
from bourbaki.regex import Digit
from humre import CLOSE_BRACKET
from humre import CLOSE_PARENTHESIS
from humre import DIGIT
from humre import LETTER
from humre import OPEN_BRACKET
from humre import OPEN_PARENTHESIS
from humre import SOMETHING
from humre import either
from humre import group
from humre import noncap_group
from humre import one_or_more
from parse import parse
from pregex.core.classes import Any
from pregex.core.classes import AnyDigit
from pregex.core.classes import AnyUppercaseLetter
from pregex.core.groups import Capture
from pregex.core.operators import Either
from pregex.core.quantifiers import OneOrMore
from scanf import scanf
from verbalexpressions import VerEx
STRING_TO_MATCH = "This is a title [KEY-123]"
STRING_NO_MATCH = "[KEY-123]"
# re
pattern = re.compile(r"(?P<title>.+) (\(|\[)(?P<key>[A-Z]+)-(?P<number>\d+)(\)|\])")
match = pattern.match(STRING_TO_MATCH)
no_match = pattern.match(STRING_NO_MATCH)
assert match
assert match.groupdict() == {"title": "This is a title", "key": "KEY", "number": "123"}
assert not no_match
# PythonVerbalExpressions
pattern = VerEx().anything().then(" ").then("[").OR("(").anything().then("-").anything().then("]").OR(")")
match = pattern.match(STRING_TO_MATCH)
no_match = pattern.match(STRING_NO_MATCH)
assert match
assert not no_match
# prerex
pattern = (
Capture(OneOrMore(Any()), name="title") +
" " +
Either("(", "[") +
Capture(OneOrMore(AnyUppercaseLetter()), name="key") +
"-" +
Capture(OneOrMore(AnyDigit()), name="number") +
Either(")", "]")
)
captures = pattern.get_captures(STRING_TO_MATCH)
no_captures = pattern.get_captures(STRING_NO_MATCH)
assert captures == [('This is a title', 'KEY', '123')]
assert no_captures == []
# humre
pattern = (
group(SOMETHING) +
" " +
noncap_group(either(OPEN_PARENTHESIS, OPEN_BRACKET)) +
group(one_or_more(LETTER)) +
"-" +
group(one_or_more(DIGIT)) +
noncap_group(either(CLOSE_PARENTHESIS, CLOSE_BRACKET))
)
compiled = humre.compile(pattern)
match = compiled.match(STRING_TO_MATCH)
no_match = compiled.match(STRING_NO_MATCH)
assert match.groups() == ('This is a title', 'KEY', '123')
assert not no_match
# bourbaki.regex
pattern = (
ANYCHAR[1:] ("title") +
" [" +
C["A":"Z"][1:] ("key") +
"-" +
Digit[1:] ("number") +
"]"
)
result = pattern.match(STRING_TO_MATCH)
result_no_match = pattern.match(STRING_NO_MATCH)
assert result
assert not result_no_match
# scanf
pattern = "%s [%s-%d]"
result = scanf(pattern, STRING_TO_MATCH)
result_no_parse = parse(pattern, STRING_NO_MATCH)
assert result == ("title", "KEY", 123)
assert not result_no_parse
# parse
pattern = "{title} [{key:l}-{id:3d}]"
result = parse(pattern, STRING_TO_MATCH)
result_no_parse = parse(pattern, STRING_NO_MATCH)
assert result.named == {'title': 'This is a title', 'key': 'KEY', 'id': 123}
assert not result_no_parse
# simplematch
pattern = "{title} [{key}-{id:int}]"
match = simplematch.match(pattern, STRING_TO_MATCH)
no_match = simplematch.match(pattern, STRING_NO_MATCH)
assert match == {'title': 'This is a title', 'key': 'KEY', 'id': 123}, match
assert not no_match
# grok
pattern = "%{GREEDYDATA:title} [%{WORD:key}-%{NUMBER:id}]"
# grok = Grok(pattern)
# match = grok.match(STRING_TO_MATCH)
# no_match = grok.match(STRING_NO_MATCH)
#
# assert match == {'title': 'This is a title', 'key': 'KEY', 'id': 123}, match
# assert not no_match
# kleenexp
pattern = (
"[[capture:title 1+ #any] ' ' #tag=[[capture:key #letters] '-' [capture:id #digits]] ['(' #tag ')' | '[' #tag ']']]"
)
match = ke.match(pattern, STRING_TO_MATCH)
no_match = ke.match(pattern, STRING_NO_MATCH)
assert match.groupdict() == {"title": "This is a title", "key": "KEY", "id": "123"}
assert not no_match