From 3aa4bf24b920ec6b0158657a8182c274674c6883 Mon Sep 17 00:00:00 2001 From: Justin Parker Date: Wed, 18 Dec 2024 23:05:38 -0800 Subject: [PATCH] v1.0.2 --- CHANGELOG.md | 4 + package-lock.json | 756 +++++++++++++++++----------------------------- package.json | 7 +- run-test.js | 4 +- 4 files changed, 282 insertions(+), 489 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 403adca..5b00aad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog All notable changes to this project will be documented in this file. +## [1.0.2] - 2024-12-18 +### 📦 Updated +- Changed sentence parsing library to `sentence-parse` + ## [1.0.1] - 2024-12-15 ### 📦 Updated - Updated TransformersJS to 3.2.0 diff --git a/package-lock.json b/package-lock.json index dfa670f..49e311c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,18 @@ { "name": "fast-topic-analysis", - "version": "1.0.1", + "version": "1.0.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "fast-topic-analysis", - "version": "1.0.1", + "version": "1.0.2", "license": "ISC", "dependencies": { "@huggingface/transformers": "^3.2.0", - "@stdlib/nlp-sentencize": "^0.2.2", "chalk": "^5.3.0", - "dotenv": "^16.4.7" + "dotenv": "^16.4.7", + "sentence-parse": "^1.0.2" } }, "node_modules/@emnapi/runtime": { @@ -475,483 +475,6 @@ "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==" }, - "node_modules/@stdlib/assert-has-own-property": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/assert-has-own-property/-/assert-has-own-property-0.2.2.tgz", - "integrity": "sha512-m5rV4Z2/iNkwx2vRsNheM6sQZMzc8rQQOo90LieICXovXZy8wA5jNld4kRKjMNcRt/TjrNP7i2Rhh8hruRDlHg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/assert-has-symbol-support": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/assert-has-symbol-support/-/assert-has-symbol-support-0.2.2.tgz", - "integrity": "sha512-vCsGGmDZz5dikGgdF26rIL0y0nHvH7qaVf89HLLTybceuZijAqFSJEqcB3Gpl5uaeueLNAWExHi2EkoUVqKHGg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/assert-has-tostringtag-support": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/assert-has-tostringtag-support/-/assert-has-tostringtag-support-0.2.2.tgz", - "integrity": "sha512-bSHGqku11VH0swPEzO4Y2Dr+lTYEtjSWjamwqCTC8udOiOIOHKoxuU4uaMGKJjVfXG1L+XefLHqzuO5azxdRaA==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-has-symbol-support": "^0.2.1" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/assert-is-boolean": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/assert-is-boolean/-/assert-is-boolean-0.2.2.tgz", - "integrity": "sha512-3KFLRTYZpX6u95baZ6PubBvjehJs2xBU6+zrenR0jx8KToUYCnJPxqqj7JXRhSD+cOURmcjj9rocVaG9Nz18Pg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-has-tostringtag-support": "^0.2.2", - "@stdlib/boolean-ctor": "^0.2.2", - "@stdlib/utils-define-nonenumerable-read-only-property": "^0.2.2", - "@stdlib/utils-native-class": "^0.2.1" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/assert-is-string": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/assert-is-string/-/assert-is-string-0.2.2.tgz", - "integrity": "sha512-SOkFg4Hq443hkadM4tzcwTHWvTyKP9ULOZ8MSnnqmU0nBX1zLVFLFGY8jnF6Cary0dL0V7QQBCfuxqKFM6u2PQ==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-has-tostringtag-support": "^0.2.2", - "@stdlib/utils-define-nonenumerable-read-only-property": "^0.2.2", - "@stdlib/utils-native-class": "^0.2.1" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/boolean-ctor": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/boolean-ctor/-/boolean-ctor-0.2.2.tgz", - "integrity": "sha512-qIkHzmfxDvGzQ3XI9R7sZG97QSaWG5TvWVlrvcysOGT1cs6HtQgnf4D//SRzZ52VLm8oICP+6OKtd8Hpm6G7Ww==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/error-tools-fmtprodmsg": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/error-tools-fmtprodmsg/-/error-tools-fmtprodmsg-0.2.2.tgz", - "integrity": "sha512-2IliQfTes4WV5odPidZFGD5eYDswZrPXob7oOu95Q69ERqImo8WzSwnG2EDbHPyOyYCewuMfM5Ha6Ggf+u944Q==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/nlp-sentencize": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/nlp-sentencize/-/nlp-sentencize-0.2.2.tgz", - "integrity": "sha512-dWDkcyPoLNvuQA59OxGHorcEQ79Rk6ghJPGwuS8kt9c3Rp0YjSE1cs/+2C9crm8uQXRJr6dcp25hLGLrSiqgsw==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-is-string": "^0.2.1", - "@stdlib/nlp-tokenize": "^0.2.1", - "@stdlib/string-base-trim": "^0.2.2" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/nlp-tokenize": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/nlp-tokenize/-/nlp-tokenize-0.2.2.tgz", - "integrity": "sha512-qoKCiOHrV/PuDQpgxY69ROyJcA+uxNORR6JWlLPL4K53rweCGwLf+htDXXuhRcTx/g7mPKNWqoBlGm0WHd8WWg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-has-own-property": "^0.2.1", - "@stdlib/assert-is-boolean": "^0.2.1", - "@stdlib/assert-is-string": "^0.2.1", - "@stdlib/error-tools-fmtprodmsg": "^0.2.2", - "@stdlib/string-format": "^0.2.2" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/string-base-format-interpolate": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/string-base-format-interpolate/-/string-base-format-interpolate-0.2.2.tgz", - "integrity": "sha512-i9nU9rAB2+o/RR66TS9iQ8x+YzeUDL1SGiAo6GY3hP6Umz5Dx9Qp/v8T69gWVsb4a1YSclz5+YeCWaFgwvPjKA==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/string-base-format-tokenize": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/string-base-format-tokenize/-/string-base-format-tokenize-0.2.2.tgz", - "integrity": "sha512-kXq2015i+LJjqth5dN+hYnvJXBSzRm8w0ABWB5tYAsIuQTpQK+mSo2muM8JBEFEnqUHAwpUsu2qNTK/9o8lsJg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/string-base-replace": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/string-base-replace/-/string-base-replace-0.2.2.tgz", - "integrity": "sha512-Y4jZwRV4Uertw7AlA/lwaYl1HjTefSriN5+ztRcQQyDYmoVN3gzoVKLJ123HPiggZ89vROfC+sk/6AKvly+0CA==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/string-base-trim": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/string-base-trim/-/string-base-trim-0.2.2.tgz", - "integrity": "sha512-1BVoM1XYxPbZ7xQcf4+TNP4dya5KphlkRnj0e0FBrmlYacJITvPgmsywOREivHqF8osH1oQQBZ47J/J/hQ79Eg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/string-base-replace": "^0.2.1" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/string-format": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/string-format/-/string-format-0.2.2.tgz", - "integrity": "sha512-GUa50uxgMAtoItsxTbMmwkyhIwrCxCrsjzk3nAbLnt/1Kt1EWOWMwsALqZdD6K4V/xSJ4ns6PZur3W6w+vKk9g==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/string-base-format-interpolate": "^0.2.1", - "@stdlib/string-base-format-tokenize": "^0.2.2" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/symbol-ctor": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/symbol-ctor/-/symbol-ctor-0.2.2.tgz", - "integrity": "sha512-XsmiTfHnTb9jSPf2SoK3O0wrNOXMxqzukvDvtzVur1XBKfim9+seaAS4akmV1H3+AroAXQWVtde885e1B6jz1w==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/utils-define-nonenumerable-read-only-property": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/utils-define-nonenumerable-read-only-property/-/utils-define-nonenumerable-read-only-property-0.2.2.tgz", - "integrity": "sha512-V3mpAesJemLYDKG376CsmoczWPE/4LKsp8xBvUxCt5CLNAx3J/1W39iZQyA5q6nY1RStGinGn1/dYZwa8ig0Uw==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/utils-define-property": "^0.2.3" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/utils-define-property": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/@stdlib/utils-define-property/-/utils-define-property-0.2.4.tgz", - "integrity": "sha512-XlMdz7xwuw/sqXc9LbsV8XunCzZXjbZPC+OAdf4t4PBw4ZRwGzlTI6WED+f4PYR5Tp9F1cHgLPyMYCIBfA2zRg==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/error-tools-fmtprodmsg": "^0.2.1", - "@stdlib/string-format": "^0.2.1" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, - "node_modules/@stdlib/utils-native-class": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@stdlib/utils-native-class/-/utils-native-class-0.2.2.tgz", - "integrity": "sha512-cSn/FozbEpfR/FlJAoceQtZ8yUJFhZ8RFkbEsxW/7+H4o09yln3lBS0HSfUJISYNtpTNN/2/Fup88vmvwspvwA==", - "os": [ - "aix", - "darwin", - "freebsd", - "linux", - "macos", - "openbsd", - "sunos", - "win32", - "windows" - ], - "dependencies": { - "@stdlib/assert-has-own-property": "^0.2.1", - "@stdlib/assert-has-tostringtag-support": "^0.2.2", - "@stdlib/symbol-ctor": "^0.2.2" - }, - "engines": { - "node": ">=0.10.0", - "npm": ">2.7.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/stdlib" - } - }, "node_modules/@types/node": { "version": "22.10.2", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.2.tgz", @@ -987,6 +510,11 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, "node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -996,9 +524,9 @@ } }, "node_modules/chalk": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", - "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.0.tgz", + "integrity": "sha512-ZkD35Mx92acjB2yNJgziGqT9oKHEOxjTBTDRpOsRWtdecL/0jM3z5kM/CTzHWvHIen1GvkM85p6TuFfDGfc8/Q==", "engines": { "node": "^12.17.0 || ^14.13 || >=16.0.0" }, @@ -1006,6 +534,46 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/cheerio": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0.tgz", + "integrity": "sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "encoding-sniffer": "^0.2.0", + "htmlparser2": "^9.1.0", + "parse5": "^7.1.2", + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^6.19.5", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=18.17" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chownr": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", @@ -1064,6 +632,32 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/detect-libc": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", @@ -1072,6 +666,57 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "16.4.7", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", @@ -1093,6 +738,29 @@ "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==" }, + "node_modules/encoding-sniffer": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz", + "integrity": "sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg==", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/flatbuffers": { "version": "1.12.0", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz", @@ -1137,6 +805,35 @@ "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==" }, + "node_modules/htmlparser2": { + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-arrayish": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", @@ -1227,6 +924,17 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/onnxruntime-common": { "version": "1.20.1", "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.1.tgz", @@ -1270,6 +978,40 @@ "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==" }, + "node_modules/parse5": { + "version": "7.2.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", + "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", + "dependencies": { + "entities": "^4.5.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-key": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", @@ -1335,6 +1077,11 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, "node_modules/semver": { "version": "7.6.3", "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", @@ -1346,6 +1093,15 @@ "node": ">=10" } }, + "node_modules/sentence-parse": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/sentence-parse/-/sentence-parse-1.0.2.tgz", + "integrity": "sha512-ETWk1CNCNuonN700SiLzwsPsCWU42/CFSKRwotQyAxdp7bieGHRieFVeAQEnS9BX3m9GDJAeqZilvKVpaSwn8A==", + "dependencies": { + "cheerio": "^1.0.0", + "string-segmenter": "^1.2.0" + } + }, "node_modules/sharp": { "version": "0.33.5", "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz", @@ -1422,6 +1178,11 @@ "is-arrayish": "^0.3.1" } }, + "node_modules/string-segmenter": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/string-segmenter/-/string-segmenter-1.2.0.tgz", + "integrity": "sha512-xJoXUcoQaMLs3vyFbdHadegLg1apJb7aPivFhxbH88y/1vJvRiKs+tTdB3QvlYau7whEZqXpWBkhKV8llFx03g==" + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -1532,11 +1293,38 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "optional": true }, + "node_modules/undici": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.0.tgz", + "integrity": "sha512-BUgJXc752Kou3oOIuU1i+yZZypyZRqNPW0vqoMPl8VaoalSfeR0D8/t4iAS3yirs79SSMTxTag+ZC86uswv+Cw==", + "engines": { + "node": ">=18.17" + } + }, "node_modules/undici-types": { "version": "6.20.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==" }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "engines": { + "node": ">=18" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 206162a..1d79bf8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fast-topic-analysis", - "version": "1.0.1", + "version": "1.0.2", "description": "Semantically create chunks from large texts. Useful for workflows involving large language models (LLMs).", "repository": { "type": "git", @@ -14,6 +14,7 @@ "type": "module", "scripts": { "start": "node run-test.js", + "test": "node run-test.js", "generate": "node generate.js" }, "keywords": [ @@ -37,8 +38,8 @@ "license": "ISC", "dependencies": { "@huggingface/transformers": "^3.2.0", - "@stdlib/nlp-sentencize": "^0.2.2", "chalk": "^5.3.0", - "dotenv": "^16.4.7" + "dotenv": "^16.4.7", + "sentence-parse": "^1.0.2" } } diff --git a/run-test.js b/run-test.js index 1908de5..f8b2f4b 100644 --- a/run-test.js +++ b/run-test.js @@ -3,7 +3,7 @@ // ------------- import { generateEmbeddings } from "./modules/embedding.js"; import { cosineSimilarity } from "./modules/similarity.js"; -import sentencize from '@stdlib/nlp-sentencize'; +import { parseSentences } from 'sentence-parse'; import fs from 'fs'; import readline from 'readline'; import chalk from 'chalk'; @@ -48,7 +48,7 @@ let totalSentences = 0; async function testSimilarity(testMessage) { console.log(chalk.blue('\n-----------------------------------------------------\n')); - const sentences = sentencize(testMessage); + const sentences = await parseSentences(testMessage); totalSentences += sentences.length; let sentencesWithEmbeddings = await generateEmbeddings(sentences, true);