-
-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unit parsing #69
Comments
I started looking at adding a new file mode 100644
index 0000000..0c6fc02
--- /dev/null
+++ b/classification/UnitClassification.js
@@ -0,0 +1,11 @@
+const Classification = require('./Classification')
+
+class UnitClassification extends Classification {
+ constructor (confidence, meta) {
+ super(confidence, meta)
+ this.public = true
+ this.label = 'unit'
+ }
+}
+
+module.exports = UnitClassification
diff --git a/classification/UnitClassification.test.js b/classification/UnitClassification.test.js
new file mode 100644
index 0000000..bdf44e7
--- /dev/null
+++ b/classification/UnitClassification.test.js
@@ -0,0 +1,24 @@
+const Classification = require('./UnitClassification')
+
+module.exports.tests = {}
+
+module.exports.tests.constructor = (test) => {
+ test('constructor', (t) => {
+ let c = new Classification()
+ t.false(c.public)
+ t.equals(c.label, 'unit')
+ t.equals(c.confidence, 1.0)
+ t.deepEqual(c.meta, {})
+ t.end()
+ })
+}
+
+module.exports.all = (tape, common) => {
+ function test (name, testFunction) {
+ return tape(`UnitClassification: ${name}`, testFunction)
+ }
+
+ for (var testCase in module.exports.tests) {
+ module.exports.tests[testCase](test, common)
+ }
+}
diff --git a/classifier/UnitClassifier.js b/classifier/UnitClassifier.js
new file mode 100644
index 0000000..c3e12c3
--- /dev/null
+++ b/classifier/UnitClassifier.js
@@ -0,0 +1,12 @@
+const WordClassifier = require('./super/WordClassifier')
+const UnitClassification = require('../classification/UnitClassification')
+
+class UnitClassifier extends WordClassifier {
+ each (span) {
+ if (/^[a-zA-Z\u0400-\u04FF]{1}$/.test(span.body)) {
+ span.classify(new UnitClassification(1))
+ }
+ }
+}
+
+module.exports = UnitClassifier
diff --git a/classifier/UnitClassifier.test.js b/classifier/UnitClassifier.test.js
new file mode 100644
index 0000000..68e5089
--- /dev/null
+++ b/classifier/UnitClassifier.test.js
@@ -0,0 +1,45 @@
+const UnitClassifier = require('./UnitClassifier')
+const UnitClassification = require('../classification/UnitClassification')
+const Span = require('../tokenization/Span')
+const classifier = new UnitClassifier()
+
+module.exports.tests = {}
+
+function classify (body) {
+ let s = new Span(body)
+ classifier.each(s)
+ return s
+}
+
+module.exports.tests.alpha = (test) => {
+ test('UnitClassification: English letter uppercase', (t) => {
+ let s = classify('A')
+ t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
+ t.end()
+ })
+ test('UnitClassification: English letter lowercase', (t) => {
+ let s = classify('a')
+ t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
+ t.end()
+ })
+ test('UnitClassification: Cyrillic', (t) => {
+ let s = classify('в')
+ t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
+ t.end()
+ })
+ test('UnitClassification: Cyrillic', (t) => {
+ let s = classify('б')
+ t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
+ t.end()
+ })
+}
+
+module.exports.all = (tape, common) => {
+ function test (name, testFunction) {
+ return tape(`UnitClassifier: ${name}`, testFunction)
+ }
+
+ for (var testCase in module.exports.tests) {
+ module.exports.tests[testCase](test, common)
+ }
+}
diff --git a/parser/AddressParser.js b/parser/AddressParser.js
index d070631..8e8aa7d 100644
--- a/parser/AddressParser.js
+++ b/parser/AddressParser.js
@@ -2,6 +2,7 @@ const Parser = require('./Parser')
const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier')
const TokenPositionClassifier = require('../classifier/TokenPositionClassifier')
const HouseNumberClassifier = require('../classifier/HouseNumberClassifier')
+const UnitClassifier = require('../classifier/UnitClassifier')
const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier')
@@ -42,6 +43,7 @@ class AddressParser extends Parser {
// word classifiers
new HouseNumberClassifier(),
+ new UnitClassifier(),
new PostcodeClassifier(),
new StreetPrefixClassifier(),
new StreetSuffixClassifier(), |
Closed
Is this the wanted result ?
|
Sorry for the delay, the desired result would be:
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The following addresses with unit numbers are not able to accurately detect the 'B' as a unit.
The text was updated successfully, but these errors were encountered: