From 850f2906ea3bf45327321a04c1f3f82a87ef6de8 Mon Sep 17 00:00:00 2001 From: Jakob Kraus Date: Fri, 24 May 2024 03:42:58 +0200 Subject: [PATCH] feat: Add basic functionality --- .editorconfig | 4 + .github/workflows/format.yml | 8 + .github/workflows/lint.yml | 10 +- .github/workflows/test.yml | 25 + Cargo.lock | 1657 +++++++++++++++++++++++++++++++++- Cargo.toml | 13 +- Makefile | 22 +- README.md | 8 + src/datatypes/attributes.rs | 16 + src/datatypes/key.rs | 36 +- src/datatypes/mod.rs | 2 +- src/datatypes/value.rs | 30 +- src/errors.rs | 29 +- src/graph/edge.rs | 65 +- src/graph/mod.rs | 875 +++++++++++++++++- src/graph/node.rs | 72 +- src/graph/polars.rs | 747 +++++++++++++++ src/lib.rs | 10 +- src/macros.rs | 1 + src/prelude.rs | 3 + 20 files changed, 3567 insertions(+), 66 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 src/graph/polars.rs diff --git a/.editorconfig b/.editorconfig index 74bd1ec..7f478a5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,3 +12,7 @@ insert_final_newline = true [*.{cfg,toml,rst,rs}] indent_style = space indent_size = 4 + +[Makefile] +indent_style = tab +indent_size = 4 diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 4ebd1fa..1cd7af4 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -1,8 +1,16 @@ name: Format Pull Request on: + push: + branches: [main] + paths: + - src/** + - .github/workflows/test.yml pull_request: branches: [main] + paths: + - src/** + - .github/workflows/test.yml jobs: format: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5503282..ff98385 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,8 +1,16 @@ name: Lint Pull Request on: + push: + branches: [main] + paths: + - src/** + - .github/workflows/test.yml pull_request: branches: [main] + paths: + - src/** + - .github/workflows/test.yml jobs: lint: @@ -15,4 +23,4 @@ jobs: with: components: clippy - name: Lint with clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: make lint diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..9085958 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,25 @@ +name: Run Tests + +on: + push: + branches: [main] + paths: + - src/** + - .github/workflows/test.yml + pull_request: + branches: [main] + paths: + - src/** + - .github/workflows/test.yml + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Rust + uses: dtolnay/rust-toolchain@nightly + - name: Run tests + run: make test + - name: Run doc-tests + run: make test-doc diff --git a/Cargo.lock b/Cargo.lock index d808b9a..d15a7e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,31 +2,1670 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "argminmax" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" +dependencies = [ + "num-traits", +] + +[[package]] +name = "array-init-cursor" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atoi_simd" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "bytemuck" version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm", + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "enum_dispatch" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +dependencies = [ + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "byteorder" +name = "ethnum" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + +[[package]] +name = "foreign_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "graphster" -version = "0.0.0-a0" +version = "0.0.0-a1" dependencies = [ - "roaring", + "polars", + "rayon", ] [[package]] -name = "roaring" -version = "0.10.4" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b26f4c25a604fcb3a1bcd96dd6ba37c93840de95de8198d94c0d571a74a804d1" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "bytemuck", - "byteorder", + "ahash", + "allocator-api2", + "rayon", + "serde", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "itoap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] + +[[package]] +name = "multiversion" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" +dependencies = [ + "multiversion-macros", + "target-features", +] + +[[package]] +name = "multiversion-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "target-features", +] + +[[package]] +name = "now" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" +dependencies = [ + "chrono", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "parquet-format-safe" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1131c54b167dd4e4799ce762e1ab01549ebb94d5bdd13e6ec1b467491c378e1f" + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "planus" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" +dependencies = [ + "array-init-cursor", +] + +[[package]] +name = "polars" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442" +dependencies = [ + "getrandom", + "polars-arrow", + "polars-core", + "polars-error", + "polars-io", + "polars-lazy", + "polars-ops", + "polars-parquet", + "polars-sql", + "polars-time", + "polars-utils", + "version_check", +] + +[[package]] +name = "polars-arrow" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e" +dependencies = [ + "ahash", + "atoi", + "atoi_simd", + "bytemuck", + "chrono", + "chrono-tz", + "dyn-clone", + "either", + "ethnum", + "fast-float", + "foreign_vec", + "getrandom", + "hashbrown", + "itoa", + "itoap", + "lz4", + "multiversion", + "num-traits", + "polars-arrow-format", + "polars-error", + "polars-utils", + "ryu", + "simdutf8", + "streaming-iterator", + "strength_reduce", + "version_check", + "zstd", +] + +[[package]] +name = "polars-arrow-format" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b0ef2474af9396b19025b189d96e992311e6a47f90c53cd998b36c4c64b84c" +dependencies = [ + "planus", + "serde", +] + +[[package]] +name = "polars-compute" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0" +dependencies = [ + "bytemuck", + "either", + "num-traits", + "polars-arrow", + "polars-error", + "polars-utils", + "strength_reduce", + "version_check", +] + +[[package]] +name = "polars-core" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a" +dependencies = [ + "ahash", + "bitflags", + "bytemuck", + "chrono", + "chrono-tz", + "comfy-table", + "either", + "hashbrown", + "indexmap", + "num-traits", + "once_cell", + "polars-arrow", + "polars-compute", + "polars-error", + "polars-row", + "polars-utils", + "rand", + "rand_distr", + "rayon", + "regex", + "smartstring", + "thiserror", + "version_check", + "xxhash-rust", +] + +[[package]] +name = "polars-error" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a" +dependencies = [ + "polars-arrow-format", + "regex", + "simdutf8", + "thiserror", +] + +[[package]] +name = "polars-expr" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484" +dependencies = [ + "ahash", + "bitflags", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", +] + +[[package]] +name = "polars-io" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e" +dependencies = [ + "ahash", + "atoi_simd", + "bytes", + "chrono", + "fast-float", + "home", + "itoa", + "memchr", + "memmap2", + "num-traits", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-error", + "polars-time", + "polars-utils", + "rayon", + "regex", + "ryu", + "simdutf8", + "smartstring", +] + +[[package]] +name = "polars-lazy" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5" +dependencies = [ + "ahash", + "bitflags", + "glob", + "once_cell", + "polars-arrow", + "polars-core", + "polars-expr", + "polars-io", + "polars-ops", + "polars-pipe", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-ops" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58" +dependencies = [ + "ahash", + "argminmax", + "base64", + "bytemuck", + "chrono", + "chrono-tz", + "either", + "hashbrown", + "hex", + "indexmap", + "memchr", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-error", + "polars-utils", + "rayon", + "regex", + "smartstring", + "unicode-reverse", + "version_check", +] + +[[package]] +name = "polars-parquet" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8" +dependencies = [ + "ahash", + "base64", + "ethnum", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-error", + "polars-utils", + "seq-macro", + "simdutf8", + "streaming-decompression", +] + +[[package]] +name = "polars-pipe" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436" +dependencies = [ + "crossbeam-channel", + "crossbeam-queue", + "enum_dispatch", + "hashbrown", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-expr", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-utils", + "rayon", + "smartstring", + "uuid", + "version_check", +] + +[[package]] +name = "polars-plan" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90" +dependencies = [ + "ahash", + "bytemuck", + "chrono-tz", + "either", + "hashbrown", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-time", + "polars-utils", + "rayon", + "recursive", + "regex", + "smartstring", + "strum_macros", + "version_check", +] + +[[package]] +name = "polars-row" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40" +dependencies = [ + "bytemuck", + "polars-arrow", + "polars-error", + "polars-utils", +] + +[[package]] +name = "polars-sql" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9" +dependencies = [ + "hex", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-lazy", + "polars-plan", + "rand", + "serde", + "serde_json", + "sqlparser", +] + +[[package]] +name = "polars-time" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97" +dependencies = [ + "atoi", + "bytemuck", + "chrono", + "chrono-tz", + "now", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-ops", + "polars-utils", + "regex", + "smartstring", +] + +[[package]] +name = "polars-utils" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c" +dependencies = [ + "ahash", + "bytemuck", + "hashbrown", + "indexmap", + "num-traits", + "once_cell", + "polars-error", + "raw-cpuid", + "rayon", + "smartstring", + "stacker", + "sysinfo", + "version_check", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "raw-cpuid" +version = "11.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.66", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + +[[package]] +name = "sqlparser" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" +dependencies = [ + "log", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "streaming-decompression" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" +dependencies = [ + "fallible-streaming-iterator", +] + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + +[[package]] +name = "strum_macros" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.30.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "windows", +] + +[[package]] +name = "target-features" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-reverse" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6f4888ebc23094adfb574fdca9fdc891826287a6397d2cd28802ffd6f20c76" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" + +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "xxhash-rust" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "zstd" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.10+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +dependencies = [ + "cc", + "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index ff9590d..edc74ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,19 @@ [package] name = "graphster" -version = "0.0.0-a0" +version = "0.0.0-a1" edition = "2021" description = "High-performance DataGraph Library" license = "BSD-3-Clause" readme = "README.md" +repository = "https://github.com/graphster-dev/graphster" [dependencies] -roaring = "0.10.4" +rayon = "1.10.0" +polars = { version = "0.40.0", optional = true } + +[features] +polars = ["dep:polars"] + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/Makefile b/Makefile index 683d8d5..57f1e57 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,22 @@ -.PHONY = build lint format clean +.PHONY = build clean docs format lint test test-doc build: - cargo build + cargo build -lint: - cargo clippy --all-targets --all-features -- -D warnings +clean: + rm -rf target + +docs: + RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --no-deps --all-features format: - cargo fmt + cargo fmt -clean: - rm -rf target +lint: + cargo clippy --all-targets --all-features -- -D warnings + +test: + cargo test --lib --all-features +test-doc: + cargo +nightly test --doc --all-features diff --git a/README.md b/README.md index deffc4a..5b2754e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,15 @@
+ + Latest Release on crates.io + Graphster License + + Graphster Testing Status +
+ +## Graphster is a DataGraph library designed for rapid data manipulation, analysis, and processing diff --git a/src/datatypes/attributes.rs b/src/datatypes/attributes.rs index 3abc6a3..2de1558 100644 --- a/src/datatypes/attributes.rs +++ b/src/datatypes/attributes.rs @@ -2,3 +2,19 @@ use super::{AttributeKey, AttributeValue}; use std::collections::HashMap; pub type Attributes = HashMap; + +pub trait IntoAttributes { + fn into_attributes(self) -> Attributes; +} + +impl IntoAttributes for HashMap +where + K: Into, + V: Into, +{ + fn into_attributes(self) -> Attributes { + self.into_iter() + .map(|(key, value)| (key.into(), value.into())) + .collect() + } +} diff --git a/src/datatypes/key.rs b/src/datatypes/key.rs index 6e0981a..182ef59 100644 --- a/src/datatypes/key.rs +++ b/src/datatypes/key.rs @@ -1,12 +1,18 @@ use crate::implement_from_wrapper; +use std::fmt::Display; -#[derive(Debug, Clone, PartialEq, Hash, Eq)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] pub enum AttributeKey { + Boolean(bool), Int16(i16), Int32(i32), Int64(i64), Int8(i8), String(String), + UInt16(u16), + UInt32(u32), + UInt64(u64), + UInt8(u8), Usize(usize), } @@ -15,4 +21,32 @@ implement_from_wrapper!(AttributeKey, Int32, i32); implement_from_wrapper!(AttributeKey, Int64, i64); implement_from_wrapper!(AttributeKey, Int8, i8); implement_from_wrapper!(AttributeKey, String, String); +implement_from_wrapper!(AttributeKey, UInt16, u16); +implement_from_wrapper!(AttributeKey, UInt32, u32); +implement_from_wrapper!(AttributeKey, UInt64, u64); +implement_from_wrapper!(AttributeKey, UInt8, u8); implement_from_wrapper!(AttributeKey, Usize, usize); + +impl From<&str> for AttributeKey { + fn from(value: &str) -> Self { + value.to_string().into() + } +} + +impl Display for AttributeKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AttributeKey::Boolean(value) => write!(f, "{}", value), + AttributeKey::Int16(value) => write!(f, "{}", value), + AttributeKey::Int32(value) => write!(f, "{}", value), + AttributeKey::Int64(value) => write!(f, "{}", value), + AttributeKey::Int8(value) => write!(f, "{}", value), + AttributeKey::String(value) => write!(f, "{}", value), + AttributeKey::UInt16(value) => write!(f, "{}", value), + AttributeKey::UInt32(value) => write!(f, "{}", value), + AttributeKey::UInt64(value) => write!(f, "{}", value), + AttributeKey::UInt8(value) => write!(f, "{}", value), + AttributeKey::Usize(value) => write!(f, "{}", value), + } + } +} diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index 521be34..b61dde2 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -2,6 +2,6 @@ mod attributes; mod key; mod value; -pub use attributes::Attributes; +pub use attributes::{Attributes, IntoAttributes}; pub use key::AttributeKey; pub use value::AttributeValue; diff --git a/src/datatypes/value.rs b/src/datatypes/value.rs index ec00465..4a2d027 100644 --- a/src/datatypes/value.rs +++ b/src/datatypes/value.rs @@ -3,20 +3,46 @@ use crate::implement_from_wrapper; #[derive(Debug, Clone, PartialEq)] pub enum AttributeValue { Boolean(bool), - Float(f64), + Float32(f32), + Float64(f64), Int16(i16), Int32(i32), Int64(i64), Int8(i8), + Null, String(String), + UInt16(u16), + UInt32(u32), + UInt64(u64), + UInt8(u8), Usize(usize), } implement_from_wrapper!(AttributeValue, Boolean, bool); -implement_from_wrapper!(AttributeValue, Float, f64); +implement_from_wrapper!(AttributeValue, Float32, f32); +implement_from_wrapper!(AttributeValue, Float64, f64); implement_from_wrapper!(AttributeValue, Int16, i16); implement_from_wrapper!(AttributeValue, Int32, i32); implement_from_wrapper!(AttributeValue, Int64, i64); implement_from_wrapper!(AttributeValue, Int8, i8); implement_from_wrapper!(AttributeValue, String, String); +implement_from_wrapper!(AttributeValue, UInt16, u16); +implement_from_wrapper!(AttributeValue, UInt32, u32); +implement_from_wrapper!(AttributeValue, UInt64, u64); +implement_from_wrapper!(AttributeValue, UInt8, u8); implement_from_wrapper!(AttributeValue, Usize, usize); + +impl From<&str> for AttributeValue { + fn from(value: &str) -> Self { + value.to_string().into() + } +} + +impl> From> for AttributeValue { + fn from(value: Option) -> Self { + match value { + Some(value) => value.into(), + None => AttributeValue::Null, + } + } +} diff --git a/src/errors.rs b/src/errors.rs index 4bcc98d..fa5785c 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,5 +1,30 @@ +use super::graph::NodeIndex; +use crate::graph::EdgeIndex; +use std::fmt::Display; + #[derive(Debug)] pub enum GraphsterError { - NodeNotFound(String), - EdgeNotFound(String), + NodeNotFound { node_index: NodeIndex }, + NodeAlreadyExists { node_index: NodeIndex }, + EdgeNotFound { edge_index: EdgeIndex }, + ConversionError(String), } + +impl Display for GraphsterError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + GraphsterError::NodeNotFound { node_index } => { + write!(f, "NodeNotFound: Node with index {} not found", node_index) + } + GraphsterError::NodeAlreadyExists { node_index } => { + write!(f, "Node with index {} already exists", node_index) + } + GraphsterError::EdgeNotFound { edge_index } => { + write!(f, "EdgeNotFound: Edge with index {} not found", edge_index) + } + GraphsterError::ConversionError(message) => write!(f, "ConversionError: {}", message), + } + } +} + +pub type GraphsterResult = Result; diff --git a/src/graph/edge.rs b/src/graph/edge.rs index 57bc1ff..255b538 100644 --- a/src/graph/edge.rs +++ b/src/graph/edge.rs @@ -1,7 +1,62 @@ use super::node::NodeIndex; -use crate::datatypes::Attributes; +use crate::datatypes::{Attributes, IntoAttributes}; +use std::{fmt::Display, ops::Deref}; -pub type EdgeIndex = usize; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct EdgeIndex(usize); + +impl From for EdgeIndex { + fn from(value: usize) -> Self { + EdgeIndex(value) + } +} + +impl From for usize { + fn from(value: EdgeIndex) -> Self { + value.0 + } +} + +impl Display for EdgeIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "EdgeIndex({})", self.0) + } +} + +pub enum EdgeIndexRef<'a> { + Owned(EdgeIndex), + Borrowed(&'a EdgeIndex), +} + +impl<'a> AsRef for EdgeIndexRef<'a> { + fn as_ref(&self) -> &EdgeIndex { + match self { + EdgeIndexRef::Owned(value) => value, + EdgeIndexRef::Borrowed(value) => value, + } + } +} + +impl Deref for EdgeIndexRef<'_> { + type Target = EdgeIndex; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl<'a> From<&'a EdgeIndex> for EdgeIndexRef<'a> { + fn from(value: &'a EdgeIndex) -> Self { + EdgeIndexRef::Borrowed(value) + } +} + +impl<'a, T: Into> From for EdgeIndexRef<'a> { + fn from(value: T) -> Self { + EdgeIndexRef::Owned(value.into()) + } +} #[derive(Debug, Clone)] pub struct Edge { @@ -19,3 +74,9 @@ impl Edge { } } } + +impl, N2: Into, A: IntoAttributes> From<(N1, N2, A)> for Edge { + fn from(value: (N1, N2, A)) -> Self { + Self::new(value.0.into(), value.1.into(), value.2.into_attributes()) + } +} diff --git a/src/graph/mod.rs b/src/graph/mod.rs index eea88a9..063924b 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -1,18 +1,67 @@ mod edge; mod node; +#[cfg(feature = "polars")] +#[cfg_attr(docsrs, doc(cfg(feature = "polars")))] +mod polars; -use crate::{datatypes::Attributes, GraphsterError}; -use edge::{Edge, EdgeIndex}; -use node::{Node, NodeIndex}; +pub use self::{edge::EdgeIndex, node::NodeIndex}; +use crate::{ + datatypes::IntoAttributes, + errors::{GraphsterError, GraphsterResult}, + prelude::Attributes, +}; +use edge::{Edge, EdgeIndexRef}; +use node::{IntoNodeTuple, Node, NodeIndexRef}; +use rayon::prelude::*; use std::{collections::HashMap, sync::atomic::AtomicUsize}; +/// A graph data structure that supports rapid data manipulation, analysis, and processing. +/// +/// # Examples +/// +/// ```rust +/// use graphster::prelude::*; +/// +/// let mut graph = DataGraph::new(); +/// +/// // Add a node with an index of 0 and some attributes +/// graph.add_node(0, Attributes::new()).unwrap(); +/// +/// // Add another node with an index of 1 +/// graph.add_node(1, Attributes::new()).unwrap(); +/// +/// // Add an edge between node 0 and node 1 +/// graph.add_edge(0, 1, Attributes::new()).unwrap(); +/// +/// // Get the number of nodes in the graph +/// let node_count = graph.node_count(); +/// assert_eq!(node_count, 2); +/// +/// // Get the number of edges in the graph +/// let edge_count = graph.edge_count(); +/// assert_eq!(edge_count, 1); +/// ``` pub struct DataGraph { nodes: HashMap, - edges: HashMap, + edges: HashMap, edge_index_counter: AtomicUsize, } impl DataGraph { + /// Creates a new empty graph. + /// + /// Returns a new `DataGraph` instance with no nodes or edges. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let graph = DataGraph::new(); + /// + /// assert_eq!(graph.node_count(), 0); + /// assert_eq!(graph.edge_count(), 0); + /// ``` pub fn new() -> Self { DataGraph { nodes: HashMap::new(), @@ -21,70 +70,834 @@ impl DataGraph { } } - pub fn add_node(&mut self, index: impl Into, attributes: impl Into) { - let node = Node::new(attributes.into()); + /// Creates a new graph from an iterator of nodes. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// use std::collections::HashMap; + /// + /// let nodes = vec![ + /// (0, HashMap::from([("foo", "bar")])), + /// (1, HashMap::from([("bar", "foo")])), + /// ]; + /// let graph = DataGraph::from_nodes(nodes); + /// + /// assert_eq!(graph.node_count(), 2); + /// assert_eq!(graph.edge_count(), 0); + /// ``` + pub fn from_nodes(nodes: N) -> Self + where + N: IntoParallelIterator, + T: IntoNodeTuple, + { + let nodes = nodes + .into_par_iter() + .map(|node| { + let node = node.into_node_tuple(); - self.nodes.insert(index.into(), node); + (node.0, node.1.into()) + }) + .collect::>(); + + DataGraph { + nodes, + edges: HashMap::new(), + edge_index_counter: AtomicUsize::new(0), + } } - pub fn add_edge( + /// Creates a new graph from an iterator of nodes and an iterator of edges. + /// + /// This function returns a `GraphsterResult` because it can fail if a source or target node + /// referenced in an edge doesn't exist in the provided nodes iterator. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// use std::collections::HashMap; + /// + /// let nodes = vec![ + /// (0, HashMap::from([("foo", "bar")])), + /// (1, HashMap::from([("bar", "foo")])), + /// ]; + /// let edges = vec![(0, 1, HashMap::from([("foo", "bar")]))]; + /// + /// let graph = DataGraph::from_nodes_and_edges(nodes, edges).unwrap(); + /// + /// assert_eq!(graph.node_count(), 2); + /// assert_eq!(graph.edge_count(), 1); + /// ``` + pub fn from_nodes_and_edges(nodes: N, edges: E) -> GraphsterResult + where + N: IntoParallelIterator, + NT: IntoNodeTuple, + E: IntoParallelIterator, + ET: Into, + { + let nodes_mapping = nodes + .into_par_iter() + .map(|node| { + let node = node.into_node_tuple(); + + (node.0, node.1.into()) + }) + .collect::>(); + + let edge_index_counter = AtomicUsize::new(0); + + let edges = edges + .into_par_iter() + .map(|edge| { + let edge = edge.into(); + + if !nodes_mapping.contains_key(&edge.source_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.source_index, + }); + } + + if !nodes_mapping.contains_key(&edge.target_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.target_index, + }); + } + + let edge_index = edge_index_counter + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + .into(); + + Ok((edge_index, edge)) + }) + .collect::>()?; + + Ok(DataGraph { + nodes: nodes_mapping, + edges, + edge_index_counter, + }) + } + + /// Adds a new node to the graph with the specified index and attributes. + /// + /// If a node with the same index already exists in the graph, this function returns + /// an error of type `GraphsterError::NodeAlreadyExists`. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// graph.add_node(0, Attributes::new()).unwrap(); + /// + /// // Trying to add another node with the same index will fail + /// assert!(graph.add_node(0, Attributes::new()).is_err()); + /// + /// assert_eq!(graph.node_count(), 1); + /// ``` + pub fn add_node, A: IntoAttributes>( &mut self, - source_index: impl Into, - target_index: impl Into, - attributes: impl Into, + node_index: I, + attributes: A, + ) -> GraphsterResult<()> { + let node_index = node_index.into(); + + if self.nodes.contains_key(&node_index) { + return Err(GraphsterError::NodeAlreadyExists { node_index }); + } + + self.nodes + .insert(node_index, attributes.into_attributes().into()); + + Ok(()) + } + + /// Adds multiple nodes to the graph from an iterator. + /// + /// If a node with the same index already exists in the graph, this function returns + /// an error of type `GraphsterError::NodeAlreadyExists`. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new())]; + /// // Trying to add the same nodes again will fail + /// assert!(graph.add_nodes(nodes).is_err()); + /// ``` + pub fn add_nodes(&mut self, nodes: N) -> GraphsterResult<()> + where + N: IntoParallelIterator, + T: IntoNodeTuple, + { + let nodes = nodes + .into_par_iter() + .map(|node| { + let (node_index, attributes) = node.into_node_tuple(); + + if self.nodes.contains_key(&node_index) { + return Err(GraphsterError::NodeAlreadyExists { node_index }); + } + + Ok((node_index, attributes.into())) + }) + .collect::>>()?; + + self.nodes.extend(nodes); + + Ok(()) + } + + /// Adds a new edge to the graph between the specified source and target nodes. + /// + /// If either the source or target node doesn't exist in the graph, this function returns + /// an error of type `GraphsterError::NodeNotFound`. + /// + /// This function returns a `Result` containing the newly created edge's index on success + /// or a `GraphsterError` on failure. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// graph.add_node(0, Attributes::new()).unwrap(); + /// graph.add_node(1, Attributes::new()).unwrap(); + /// + /// let edge_index = graph.add_edge(0, 1, Attributes::new()).unwrap(); + /// assert_eq!(graph.edge_count(), 1); + /// ``` + pub fn add_edge, NT: Into, A: IntoAttributes>( + &mut self, + source_index: NS, + target_index: NT, + attributes: A, ) -> Result { let edge_index = self .edge_index_counter - .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + .into(); let source_index = source_index.into(); let target_index = target_index.into(); - let source_node = self - .nodes - .get_mut(&source_index) - .ok_or(GraphsterError::NodeNotFound(format!( - "Node with index {:?} not found", - source_index - )))?; + let source_node = + self.nodes + .get_mut(&source_index) + .ok_or(GraphsterError::NodeNotFound { + node_index: source_index.clone(), + })?; source_node.outgoing_edges.insert(edge_index); - let target_node = self - .nodes - .get_mut(&target_index) - .ok_or(GraphsterError::NodeNotFound(format!( - "Node with index {:?} not found", - target_index, - )))?; + let target_node = + self.nodes + .get_mut(&target_index) + .ok_or(GraphsterError::NodeNotFound { + node_index: target_index.clone(), + })?; target_node.incoming_edges.insert(edge_index); - let edge = Edge::new(source_index, target_index.into(), attributes.into()); + let edge = (source_index, target_index, attributes).into(); self.edges.insert(edge_index, edge); Ok(edge_index) } + + /// Adds multiple edges to the graph from an iterator. + /// + /// If a source or target node referenced in an edge doesn't exist in the graph, + /// this function returns a `GraphsterResult` containing a `GraphsterError::NodeNotFound` error. + /// + /// This function returns a `GraphsterResult` containing a vector of indices for the added edges + /// on success or a `GraphsterError` on failure. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new()), (2, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// let edges = vec![ + /// (0, 1, Attributes::new()), + /// (1, 2, Attributes::new()), + /// ]; + /// let edge_indices = graph.add_edges(edges).unwrap(); + /// + /// assert_eq!(graph.edge_count(), 2); + /// ``` + pub fn add_edges(&mut self, edges: E) -> GraphsterResult> + where + E: IntoParallelIterator, + T: Into, + { + let (edge_indices, edges) = edges + .into_par_iter() + .map(|edge| { + let edge: Edge = edge.into(); + + if !self.nodes.contains_key(&edge.source_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.source_index, + }); + } + + if !self.nodes.contains_key(&edge.target_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.target_index, + }); + } + + let edge_index = self + .edge_index_counter + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + .into(); + + Ok((edge_index, (edge_index, edge))) + }) + .collect::, Vec<(EdgeIndex, Edge)>)>>()?; + + for (edge_index, edge) in edges { + let source_node = self + .nodes + .get_mut(&edge.source_index) + .expect("Node must exist"); + source_node.outgoing_edges.insert(edge_index); + + let target_node = self + .nodes + .get_mut(&edge.target_index) + .expect("Node must exist"); + target_node.incoming_edges.insert(edge_index); + + self.edges.insert(edge_index, edge); + } + + Ok(edge_indices) + } + + pub fn remove_node<'a, N: Into>>( + &mut self, + node_index: N, + ) -> GraphsterResult { + let node_index_ref = node_index.into(); + + let node = + self.nodes + .remove(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })?; + + for edge_index in node.incoming_edges { + self.edges.remove(&edge_index).expect("Edge must exist"); + } + + for edge_index in node.outgoing_edges { + self.edges.remove(&edge_index).expect("Edge must exist"); + } + + Ok(node.attributes) + } + + pub fn remove_edge<'a, E: Into>>( + &mut self, + edge_index: E, + ) -> GraphsterResult { + let edge_index_ref: EdgeIndexRef = edge_index.into(); + + let edge = + self.edges + .remove(edge_index_ref.as_ref()) + .ok_or(GraphsterError::EdgeNotFound { + edge_index: *edge_index_ref, + })?; + + let source_node = self + .nodes + .get_mut(&edge.source_index) + .expect("Node must exist"); + source_node.outgoing_edges.remove(edge_index_ref.as_ref()); + + let target_node = self + .nodes + .get_mut(&edge.target_index) + .expect("Node must exist"); + target_node.incoming_edges.remove(edge_index_ref.as_ref()); + + Ok(edge.attributes) + } + + /// Returns the number of nodes in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// graph.add_node(1, Attributes::new()).unwrap(); + /// graph.add_node(2, Attributes::new()).unwrap(); + /// + /// assert_eq!(graph.node_count(), 2); + /// ``` + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + /// Returns the number of valid edges in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new()), (2, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// graph.add_edge(0, 1, Attributes::new()).unwrap(); + /// graph.add_edge(1, 2, Attributes::new()).unwrap(); + + /// assert_eq!(graph.edge_count(), 2); + /// ``` + pub fn edge_count(&self) -> usize { + self.edges.len() + } + + /// Returns an iterator over the indices of all nodes in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// graph.add_node(0, Attributes::new()).unwrap(); + /// graph.add_node(1, Attributes::new()).unwrap(); + /// + /// for node_index in graph.node_indices() { + /// println!("Node index: {}", node_index); + /// } + /// ``` + pub fn node_indices(&self) -> impl Iterator { + self.nodes.keys() + } + + /// Returns an iterator over the indices of all edges in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new()), (2, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// graph.add_edge(0, 1, Attributes::new()).unwrap(); + /// graph.add_edge(1, 2, Attributes::new()).unwrap(); + + /// for edge_index in graph.edge_indices() { + /// println!("Edge index: {}", edge_index); + /// } + /// ``` + pub fn edge_indices(&self) -> impl Iterator { + self.edges.keys() + } + + /// Checks if a node with the given index exists in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// graph.add_node(0, Attributes::new()).unwrap(); + /// + /// assert!(graph.contains_node(0)); + /// assert!(!graph.contains_node(1)); + /// ``` + pub fn contains_node<'a, N: Into>>(&self, node_index: N) -> bool { + self.nodes.contains_key(node_index.into().as_ref()) + } + + /// Checks if an edge with the given index exists in the graph. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// let edge_index = graph.add_edge(0, 1, Attributes::new()).unwrap(); + /// + /// assert!(graph.contains_edge(&edge_index)); + /// assert!(!graph.contains_edge(100_usize)); + /// ``` + pub fn contains_edge<'a, E: Into>>(&self, edge_index: E) -> bool { + self.edges.contains_key(edge_index.into().as_ref()) + } + + /// Returns a reference to the attributes of the node with the given index. + /// + /// Returns an error of type `GraphsterError::NodeNotFound` if the node does not exist. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// use std::collections::HashMap; + /// + /// let mut graph = DataGraph::new(); + /// graph.add_node(0, HashMap::from([("name", "Alice")])).unwrap(); + /// + /// let node_attributes = graph.node_attributes(0).unwrap(); + /// assert_eq!(node_attributes.get(&"name".into()), Some(&"Alice".into())); + /// ``` + pub fn node_attributes<'a, N: Into>>( + &self, + node_index: N, + ) -> GraphsterResult<&Attributes> { + let node_index_ref = node_index.into(); + + Ok(&self + .nodes + .get(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })? + .attributes) + } + + /// Returns a mutable reference to the attributes of the node with the given index. + /// + /// Returns an error of type `GraphsterError::NodeNotFound` if the node does not exist. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// graph.add_node(0, Attributes::new()).unwrap(); + /// + /// let mut node_attributes = graph.node_attributes_mut(0).unwrap(); + /// node_attributes.insert("name".into(), "Alice".into()); + /// ``` + pub fn node_attributes_mut<'a, N: Into>>( + &mut self, + node_index: N, + ) -> GraphsterResult<&mut Attributes> { + let node_index_ref = node_index.into(); + + Ok(&mut self + .nodes + .get_mut(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })? + .attributes) + } + + /// Returns a reference to the attributes of the edge with the given index. + /// + /// Returns an error of type `GraphsterError::EdgeNotFound` if the edge does not exist. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// use std::collections::HashMap; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// let edge_index = graph.add_edge(0, 1, HashMap::from([("weight", 10)])).unwrap(); + /// + /// let edge_attributes = graph.edge_attributes(&edge_index).unwrap(); + /// assert_eq!(edge_attributes.get(&"weight".into()), Some(&10.into())); + /// ``` + pub fn edge_attributes<'a, E: Into>>( + &self, + edge_index: E, + ) -> GraphsterResult<&Attributes> { + let edge_index_ref = edge_index.into(); + + Ok(&self + .edges + .get(edge_index_ref.as_ref()) + .ok_or(GraphsterError::EdgeNotFound { + edge_index: *edge_index_ref, + })? + .attributes) + } + + /// Returns a mutable reference to the attributes of the edge with the given index. + /// + /// Returns an error of type `GraphsterError::EdgeNotFound` if the edge does not exist. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let mut graph = DataGraph::new(); + /// + /// let nodes = vec![(0, Attributes::new()), (1, Attributes::new())]; + /// graph.add_nodes(nodes).unwrap(); + /// + /// let edge_index = graph.add_edge(0, 1, Attributes::new()).unwrap(); + /// + /// let mut edge_attributes = graph.edge_attributes_mut(&edge_index).unwrap(); + /// edge_attributes.insert("weight".into(), 20.into()); + /// ``` + pub fn edge_attributes_mut<'a, E: Into>>( + &mut self, + edge_index: E, + ) -> GraphsterResult<&mut Attributes> { + let edge_index_ref = edge_index.into(); + + Ok(&mut self + .edges + .get_mut(edge_index_ref.as_ref()) + .ok_or(GraphsterError::EdgeNotFound { + edge_index: *edge_index_ref, + })? + .attributes) + } + + pub fn incoming_edge_indices<'a, N: Into>>( + &self, + node_index: N, + ) -> GraphsterResult> { + let node_index_ref = node_index.into(); + + Ok(self + .nodes + .get(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })? + .incoming_edges + .iter()) + } + + pub fn outgoing_edge_indices<'a, N: Into>>( + &self, + node_index: N, + ) -> GraphsterResult> { + let node_index_ref = node_index.into(); + + Ok(self + .nodes + .get(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })? + .outgoing_edges + .iter()) + } + + pub fn edges_connecting<'a, 'b, N1: Into>, N2: Into>>( + &self, + source_index: N1, + target_index: N2, + ) -> GraphsterResult> { + let source_index_ref = source_index.into(); + let target_index_ref = target_index.into(); + + let source_node = + self.nodes + .get(source_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: source_index_ref.clone(), + })?; + + let target_node = + self.nodes + .get(target_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: target_index_ref.clone(), + })?; + + Ok(source_node + .outgoing_edges + .intersection(&target_node.incoming_edges)) + } + + pub fn edges_connecting_undirected< + 'a, + 'b, + N1: Into>, + N2: Into>, + >( + &self, + first_node_index: N1, + second_node_index: N2, + ) -> GraphsterResult> { + let first_node_index_ref = first_node_index.into(); + let second_node_index_ref = second_node_index.into(); + + let first_node = + self.nodes + .get(first_node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: first_node_index_ref.clone(), + })?; + + let second_node = + self.nodes + .get(second_node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: second_node_index_ref.clone(), + })?; + + Ok(first_node + .outgoing_edges + .intersection(&second_node.incoming_edges) + .chain( + first_node + .incoming_edges + .intersection(&second_node.outgoing_edges), + )) + } + + pub fn neighbors<'a, N: Into>>( + &self, + node_index: N, + ) -> GraphsterResult> { + let node_index_ref = node_index.into(); + + let node = self + .nodes + .get(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })?; + + Ok(node.outgoing_edges.iter().map(|edge_index| { + &self + .edges + .get(edge_index) + .expect("Edge must exist") + .target_index + })) + } + + pub fn neighbors_undirected<'a, N: Into>>( + &self, + node_index: N, + ) -> GraphsterResult> { + let node_index_ref = node_index.into(); + + let node = self + .nodes + .get(node_index_ref.as_ref()) + .ok_or(GraphsterError::NodeNotFound { + node_index: node_index_ref.clone(), + })?; + + Ok(node + .outgoing_edges + .iter() + .map(|edge_index| { + &self + .edges + .get(edge_index) + .expect("Edge must exist") + .target_index + }) + .chain(node.incoming_edges.iter().map(|edge_index| { + &self + .edges + .get(edge_index) + .expect("Edge must exist") + .source_index + }))) + } + + pub fn clear(&mut self) { + self.nodes.clear(); + self.edges.clear(); + self.edge_index_counter = AtomicUsize::new(0); + } + + pub fn clear_edges(&mut self) { + self.edges.clear(); + self.edge_index_counter = AtomicUsize::new(0); + + for node in self.nodes.values_mut() { + node.incoming_edges.clear(); + node.outgoing_edges.clear(); + } + } +} + +impl Default for DataGraph { + /// Creates a new empty graph using the `DataGraph::new` function. + /// + /// # Examples + /// + /// ```rust + /// use graphster::prelude::*; + /// + /// let graph = DataGraph::default(); + /// assert_eq!(graph.node_count(), 0); + /// assert_eq!(graph.edge_count(), 0); + /// ``` + fn default() -> Self { + Self::new() + } } #[cfg(test)] mod test { - use super::*; + use crate::{datatypes::Attributes, prelude::DataGraph}; #[test] fn test_add_node() { let mut graph = DataGraph::new(); - graph.add_node(0, Attributes::new()); + graph.add_node(0, Attributes::new()).unwrap(); assert_eq!(graph.nodes.len(), 1); + + graph.remove_node(0).unwrap(); } #[test] fn test_add_edge() { let mut graph = DataGraph::new(); - graph.add_node(0, Attributes::new()); - graph.add_node(1, Attributes::new()); + graph.add_node(0, Attributes::new()).unwrap(); + graph.add_node(1, Attributes::new()).unwrap(); graph.add_edge(0, 1, Attributes::new()).unwrap(); diff --git a/src/graph/node.rs b/src/graph/node.rs index 03ebcd6..6290153 100644 --- a/src/graph/node.rs +++ b/src/graph/node.rs @@ -1,8 +1,56 @@ use super::edge::EdgeIndex; -use crate::datatypes::{AttributeKey, Attributes}; -use std::collections::HashSet; +use crate::datatypes::{AttributeKey, Attributes, IntoAttributes}; +use std::{collections::HashSet, fmt::Display, ops::Deref}; -pub type NodeIndex = AttributeKey; +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct NodeIndex(AttributeKey); + +impl> From for NodeIndex { + fn from(value: T) -> Self { + NodeIndex(value.into()) + } +} + +impl Display for NodeIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NodeIndex({})", self.0) + } +} + +pub enum NodeIndexRef<'a> { + Owned(NodeIndex), + Borrowed(&'a NodeIndex), +} + +impl<'a> AsRef for NodeIndexRef<'a> { + fn as_ref(&self) -> &NodeIndex { + match self { + NodeIndexRef::Owned(value) => value, + NodeIndexRef::Borrowed(value) => value, + } + } +} + +impl<'a> From<&'a NodeIndex> for NodeIndexRef<'a> { + fn from(value: &'a NodeIndex) -> Self { + NodeIndexRef::Borrowed(value) + } +} + +impl<'a, T: Into> From for NodeIndexRef<'a> { + fn from(value: T) -> Self { + NodeIndexRef::Owned(value.into()) + } +} + +impl Deref for NodeIndexRef<'_> { + type Target = NodeIndex; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} #[derive(Debug, Clone)] pub struct Node { @@ -20,3 +68,21 @@ impl Node { } } } + +impl From for Node { + fn from(value: Attributes) -> Self { + Self::new(value) + } +} + +pub type NodeTuple = (NodeIndex, Attributes); + +pub trait IntoNodeTuple { + fn into_node_tuple(self) -> NodeTuple; +} + +impl, A: IntoAttributes> IntoNodeTuple for (N, A) { + fn into_node_tuple(self) -> NodeTuple { + (self.0.into(), self.1.into_attributes()) + } +} diff --git a/src/graph/polars.rs b/src/graph/polars.rs new file mode 100644 index 0000000..1fd19c3 --- /dev/null +++ b/src/graph/polars.rs @@ -0,0 +1,747 @@ +use super::{ + edge::{Edge, EdgeIndex}, + node::{Node, NodeIndex}, + DataGraph, +}; +use crate::{ + errors::{GraphsterError, GraphsterResult}, + prelude::{AttributeKey, AttributeValue, Attributes}, +}; +use polars::{datatypes::AnyValue, frame::DataFrame}; +use rayon::prelude::*; +use std::{collections::HashMap, sync::atomic::AtomicUsize}; + +impl TryFrom> for AttributeKey { + type Error = GraphsterError; + + fn try_from(value: AnyValue) -> GraphsterResult { + match value { + AnyValue::Boolean(value) => Ok(Self::Boolean(value)), + AnyValue::Int16(value) => Ok(Self::Int16(value)), + AnyValue::Int32(value) => Ok(Self::Int32(value)), + AnyValue::Int64(value) => Ok(Self::Int64(value)), + AnyValue::Int8(value) => Ok(Self::Int8(value)), + AnyValue::String(value) => Ok(Self::String(value.to_string())), + AnyValue::UInt16(value) => Ok(Self::UInt16(value)), + AnyValue::UInt32(value) => Ok(Self::UInt32(value)), + AnyValue::UInt64(value) => Ok(Self::UInt64(value)), + AnyValue::UInt8(value) => Ok(Self::UInt8(value)), + _ => Err(GraphsterError::ConversionError(format!( + "Could not convert {} into AttributeKey", + value + ))), + } + } +} + +impl TryFrom> for AttributeValue { + type Error = GraphsterError; + + fn try_from(value: AnyValue) -> GraphsterResult { + match value { + AnyValue::Boolean(value) => Ok(Self::Boolean(value)), + AnyValue::Float32(value) => Ok(Self::Float32(value)), + AnyValue::Float64(value) => Ok(Self::Float64(value)), + AnyValue::Int16(value) => Ok(Self::Int16(value)), + AnyValue::Int32(value) => Ok(Self::Int32(value)), + AnyValue::Int64(value) => Ok(Self::Int64(value)), + AnyValue::Int8(value) => Ok(Self::Int8(value)), + AnyValue::Null => Ok(Self::Null), + AnyValue::String(value) => Ok(Self::String(value.to_string())), + AnyValue::UInt16(value) => Ok(Self::UInt16(value)), + AnyValue::UInt32(value) => Ok(Self::UInt32(value)), + AnyValue::UInt64(value) => Ok(Self::UInt64(value)), + AnyValue::UInt8(value) => Ok(Self::UInt8(value)), + _ => Err(GraphsterError::ConversionError(format!( + "Could not convert {} into AttributeValue", + value + ))), + } + } +} + +#[derive(Debug, Clone)] +pub struct NodesDataFrame<'a> { + pub(crate) dataframe: &'a DataFrame, + pub(crate) index_column: &'a str, +} + +impl NodesDataFrame<'_> { + pub fn new<'a>(dataframe: &'a DataFrame, index_column: &'a str) -> NodesDataFrame<'a> { + NodesDataFrame { + dataframe, + index_column, + } + } +} + +impl<'a> From<(&'a DataFrame, &'a str)> for NodesDataFrame<'a> { + fn from(value: (&'a DataFrame, &'a str)) -> Self { + NodesDataFrame::new(value.0, value.1) + } +} + +pub enum NodeDataFrameInput<'a> { + Single(NodesDataFrame<'a>), + Multiple(Vec>), +} + +impl<'a> From> for NodeDataFrameInput<'a> { + fn from(value: NodesDataFrame<'a>) -> Self { + NodeDataFrameInput::Single(value) + } +} + +impl<'a> From>> for NodeDataFrameInput<'a> { + fn from(value: Vec>) -> Self { + NodeDataFrameInput::Multiple(value) + } +} + +impl<'a, const N: usize> From<&[NodesDataFrame<'a>; N]> for NodeDataFrameInput<'a> { + fn from(value: &[NodesDataFrame<'a>; N]) -> Self { + NodeDataFrameInput::Multiple(value.to_vec()) + } +} + +#[derive(Debug, Clone)] +pub struct EdgesDataFrame<'a> { + pub(crate) dataframe: &'a DataFrame, + pub(crate) source_index_column: &'a str, + pub(crate) target_index_column: &'a str, +} + +impl EdgesDataFrame<'_> { + pub fn new<'a>( + dataframe: &'a DataFrame, + source_index_column: &'a str, + target_index_column: &'a str, + ) -> EdgesDataFrame<'a> { + EdgesDataFrame { + dataframe, + source_index_column, + target_index_column, + } + } +} + +impl<'a> From<(&'a DataFrame, &'a str, &'a str)> for EdgesDataFrame<'a> { + fn from(value: (&'a DataFrame, &'a str, &'a str)) -> Self { + EdgesDataFrame::new(value.0, value.1, value.2) + } +} + +pub enum EdgeDataFrameInput<'a> { + Single(EdgesDataFrame<'a>), + Multiple(Vec>), +} + +impl<'a> From> for EdgeDataFrameInput<'a> { + fn from(value: EdgesDataFrame<'a>) -> Self { + EdgeDataFrameInput::Single(value) + } +} + +impl<'a> From>> for EdgeDataFrameInput<'a> { + fn from(value: Vec>) -> Self { + EdgeDataFrameInput::Multiple(value) + } +} + +impl<'a, const N: usize> From<&[EdgesDataFrame<'a>; N]> for EdgeDataFrameInput<'a> { + fn from(value: &[EdgesDataFrame<'a>; N]) -> Self { + EdgeDataFrameInput::Multiple(value.to_vec()) + } +} + +fn dataframe_to_nodes<'a>( + nodes_dataframe: NodesDataFrame<'a>, +) -> GraphsterResult> + 'a> { + let dataframe = nodes_dataframe.dataframe; + let index_column = nodes_dataframe.index_column; + + let attribute_column_names = dataframe + .get_column_names() + .into_iter() + .filter(|name| *name != index_column) + .collect::>(); + + let index_column = dataframe + .column(index_column) + .map_err(|_| { + GraphsterError::ConversionError(format!( + "Could not find column with name {} in dataframe", + index_column + )) + })? + .iter(); + + let mut attribute_columns = dataframe + .columns(&attribute_column_names) + .expect("Columns must exist") + .iter() + .map(|s| s.iter()) + .zip(attribute_column_names) + .collect::>(); + + Ok(index_column.map(move |index_value| { + Ok(( + AttributeKey::try_from(index_value)?.into(), + attribute_columns + .iter_mut() + .map(|(column, column_name)| { + Ok(( + (*column_name).into(), + column + .next() + .expect("Has as many iterations as index_column") + .try_into()?, + )) + }) + .collect::>()? + .into(), + )) + })) +} + +fn dataframe_to_edges<'a>( + edges_dataframe: EdgesDataFrame<'a>, + edge_index_counter: &'a mut AtomicUsize, +) -> GraphsterResult> + 'a> { + let dataframe = edges_dataframe.dataframe; + let source_index_column = edges_dataframe.source_index_column; + let target_index_column = edges_dataframe.target_index_column; + + let attribute_column_names = dataframe + .get_column_names() + .into_iter() + .filter(|name| *name != source_index_column && *name != target_index_column) + .collect::>(); + + let source_index_column = dataframe + .column(source_index_column) + .map_err(|_| { + GraphsterError::ConversionError(format!( + "Could find column with name {} in dataframe", + source_index_column + )) + })? + .iter(); + + let target_index_column = dataframe + .column(target_index_column) + .map_err(|_| { + GraphsterError::ConversionError(format!( + "Could find column with name {} in dataframe", + target_index_column + )) + })? + .iter(); + + let mut attribute_columns = dataframe + .columns(&attribute_column_names) + .expect("Columns must exist") + .iter() + .map(|s| s.iter()) + .zip(attribute_column_names) + .collect::>(); + + Ok(source_index_column.zip(target_index_column).map( + move |(source_index_value, target_index_value)| { + let index = edge_index_counter + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + .into(); + + Ok(( + index, + ( + NodeIndex::from(AttributeKey::try_from(source_index_value)?), + NodeIndex::from(AttributeKey::try_from(target_index_value)?), + attribute_columns + .iter_mut() + .map(|(column, column_name)| { + Ok(( + (*column_name).into(), + column + .next() + .expect("Has as many iterations as index_columns") + .try_into()?, + )) + }) + .collect::>()?, + ) + .into(), + )) + }, + )) +} + +fn create_nodes_from_single_dataframe( + nodes_dataframe: NodesDataFrame, +) -> GraphsterResult> { + dataframe_to_nodes(nodes_dataframe)?.par_bridge().collect() +} + +fn create_nodes_from_multiple_dataframes( + nodes_dataframes: Vec, +) -> GraphsterResult> { + let nodes = nodes_dataframes + .into_par_iter() + .map(|nodes_dataframe| { + dataframe_to_nodes(nodes_dataframe)?.collect::>>() + }) + .collect::>>()? + .into_par_iter() + .flatten() + .collect(); + + Ok(nodes) +} + +fn create_edges_from_single_dataframe<'a>( + edges_dataframe: EdgesDataFrame<'a>, + nodes: &'a mut HashMap, + edge_index_counter: &'a mut AtomicUsize, +) -> GraphsterResult> + 'a> { + let edges = dataframe_to_edges(edges_dataframe, edge_index_counter)?; + + Ok(edges.map(|result| { + let (edge_index, edge) = result?; + + let source_node = + nodes + .get_mut(&edge.source_index) + .ok_or(GraphsterError::NodeNotFound { + node_index: edge.source_index.clone(), + })?; + source_node.outgoing_edges.insert(edge_index); + + let target_node = + nodes + .get_mut(&edge.target_index) + .ok_or(GraphsterError::NodeNotFound { + node_index: edge.target_index.clone(), + })?; + target_node.incoming_edges.insert(edge_index); + + Ok((edge_index, edge)) + })) +} + +fn create_edges_from_multiple_dataframes<'a>( + edges_dataframes: Vec, + nodes: &'a mut HashMap, + edge_index_counter: &'a mut AtomicUsize, +) -> GraphsterResult + 'a> { + let edges = edges_dataframes + .into_par_iter() + .map(|edges_dataframe| { + let mut counter = AtomicUsize::new(edge_index_counter.fetch_add( + edges_dataframe.dataframe.height(), + std::sync::atomic::Ordering::Relaxed, + )); + + let raw_edges = dataframe_to_edges(edges_dataframe, &mut counter)?; + + raw_edges.collect::>>() + }) + .collect::>>()? + .into_par_iter() + .flatten(); + + Ok(edges + .map(|result| { + let (edge_index, edge) = result; + + if !nodes.contains_key(&edge.source_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.source_index, + }); + } + + if !nodes.contains_key(&edge.target_index) { + return Err(GraphsterError::NodeNotFound { + node_index: edge.target_index, + }); + } + + Ok((edge_index, edge)) + }) + .collect::>>()? + .into_iter() + .map(|(edge_index, edge)| { + let source_node = nodes + .get_mut(&edge.source_index) + .expect("Source node must exist"); + source_node.outgoing_edges.insert(edge_index); + + let target_node = nodes + .get_mut(&edge.target_index) + .expect("Target node must exist"); + target_node.incoming_edges.insert(edge_index); + + (edge_index, edge) + })) +} + +impl DataGraph { + /// Test + pub fn from_nodes_dataframes<'a, N: Into>>( + nodes: N, + ) -> GraphsterResult { + match nodes.into() { + NodeDataFrameInput::Single(dataframe_tuple) => { + Self::_from_single_nodes_dataframe(dataframe_tuple) + } + NodeDataFrameInput::Multiple(dataframe_tuples) => { + Self::_from_multiple_nodes_dataframes(dataframe_tuples) + } + } + } + + pub fn from_nodes_and_edges_dataframes< + 'a, + N: Into>, + E: Into>, + >( + nodes: N, + edges: E, + ) -> GraphsterResult { + match (nodes.into(), edges.into()) { + ( + NodeDataFrameInput::Single(nodes_dataframe_tuple), + EdgeDataFrameInput::Single(edges_dataframe_tuple), + ) => Self::_from_single_nodes_single_edges_dataframe( + nodes_dataframe_tuple, + edges_dataframe_tuple, + ), + ( + NodeDataFrameInput::Single(nodes_dataframe_tuple), + EdgeDataFrameInput::Multiple(edges_dataframe_tuples), + ) => Self::_from_single_nodes_multiple_edges_dataframe( + nodes_dataframe_tuple, + edges_dataframe_tuples, + ), + ( + NodeDataFrameInput::Multiple(nodes_dataframe_tuples), + EdgeDataFrameInput::Single(edges_dataframe_tuple), + ) => Self::_from_multiple_nodes_single_edges_dataframe( + nodes_dataframe_tuples, + edges_dataframe_tuple, + ), + ( + NodeDataFrameInput::Multiple(nodes_dataframe_tuples), + EdgeDataFrameInput::Multiple(edges_dataframe_tuples), + ) => Self::_from_multiple_nodes_multiple_edges_dataframe( + nodes_dataframe_tuples, + edges_dataframe_tuples, + ), + } + } + + pub fn add_nodes_dataframes<'a, N: Into>>( + &mut self, + nodes: N, + ) -> GraphsterResult<()> { + match nodes.into() { + NodeDataFrameInput::Single(nodes_dataframe_tuple) => { + self._add_single_nodes_dataframe(nodes_dataframe_tuple) + } + NodeDataFrameInput::Multiple(nodes_dataframe_tuples) => { + self._add_multiple_nodes_dataframes(nodes_dataframe_tuples) + } + } + } + + pub fn add_edges_dataframes<'a, E: Into>>( + &mut self, + edges: E, + ) -> GraphsterResult<()> { + match edges.into() { + EdgeDataFrameInput::Single(edges_dataframe_tuple) => { + self._add_single_edges_dataframe(edges_dataframe_tuple) + } + EdgeDataFrameInput::Multiple(edges_dataframe_tuples) => { + self._add_multiple_edges_dataframes(edges_dataframe_tuples) + } + } + } + + fn _from_single_nodes_dataframe(nodes_dataframe: NodesDataFrame) -> GraphsterResult { + let nodes = create_nodes_from_single_dataframe(nodes_dataframe)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + Ok(Self { + nodes: nodes_map, + edges: HashMap::new(), + edge_index_counter: AtomicUsize::new(0), + }) + } + + fn _from_multiple_nodes_dataframes( + nodes_dataframes: Vec, + ) -> GraphsterResult { + let nodes = create_nodes_from_multiple_dataframes(nodes_dataframes)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + Ok(Self { + nodes: nodes_map, + edges: HashMap::new(), + edge_index_counter: AtomicUsize::new(0), + }) + } + + fn _from_single_nodes_single_edges_dataframe( + nodes_dataframe: NodesDataFrame, + edges_dataframe: EdgesDataFrame, + ) -> GraphsterResult { + let nodes = create_nodes_from_single_dataframe(nodes_dataframe)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + let mut edge_index_counter = AtomicUsize::new(0); + + let edges = create_edges_from_single_dataframe( + edges_dataframe, + &mut nodes_map, + &mut edge_index_counter, + )? + .collect::>()?; + + Ok(DataGraph { + nodes: nodes_map, + edges, + edge_index_counter, + }) + } + + fn _from_single_nodes_multiple_edges_dataframe( + nodes_dataframe: NodesDataFrame, + edges_dataframes: Vec, + ) -> GraphsterResult { + let mut nodes = dataframe_to_nodes(nodes_dataframe)? + .par_bridge() + .collect::>()?; + + let mut edge_index_counter = AtomicUsize::new(0); + + let edges = create_edges_from_multiple_dataframes( + edges_dataframes, + &mut nodes, + &mut edge_index_counter, + )? + .collect(); + + Ok(DataGraph { + nodes, + edges, + edge_index_counter, + }) + } + + fn _from_multiple_nodes_single_edges_dataframe( + nodes_dataframes: Vec, + edges_dataframe: EdgesDataFrame, + ) -> GraphsterResult { + let nodes = create_nodes_from_multiple_dataframes(nodes_dataframes)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + let mut edge_index_counter = AtomicUsize::new(0); + + let edges = create_edges_from_single_dataframe( + edges_dataframe, + &mut nodes_map, + &mut edge_index_counter, + )? + .collect::>()?; + + Ok(DataGraph { + nodes: nodes_map, + edges, + edge_index_counter, + }) + } + + fn _from_multiple_nodes_multiple_edges_dataframe( + nodes_dataframes: Vec, + edges_dataframes: Vec, + ) -> GraphsterResult { + let nodes = create_nodes_from_multiple_dataframes(nodes_dataframes)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + let mut edge_index_counter = AtomicUsize::new(0); + + let edges = create_edges_from_multiple_dataframes( + edges_dataframes, + &mut nodes_map, + &mut edge_index_counter, + )? + .collect(); + + Ok(DataGraph { + nodes: nodes_map, + edges, + edge_index_counter, + }) + } + + fn _add_single_nodes_dataframe( + &mut self, + nodes_dataframe: NodesDataFrame, + ) -> GraphsterResult<()> { + let nodes = create_nodes_from_single_dataframe(nodes_dataframe)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + if !self.nodes.contains_key(&node_index) { + return Err(GraphsterError::NodeAlreadyExists { node_index }); + } + + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + self.nodes.extend(nodes_map); + + Ok(()) + } + + fn _add_multiple_nodes_dataframes( + &mut self, + nodes_dataframes: Vec, + ) -> GraphsterResult<()> { + let nodes = create_nodes_from_multiple_dataframes(nodes_dataframes)?; + + let mut nodes_map = HashMap::::new(); + + for (node_index, node) in nodes { + if !self.nodes.contains_key(&node_index) { + return Err(GraphsterError::NodeAlreadyExists { node_index }); + } + + match nodes_map.entry(node_index) { + std::collections::hash_map::Entry::Occupied(entry) => { + return Err(GraphsterError::NodeAlreadyExists { + node_index: entry.key().clone(), + }); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(node); + } + } + } + + self.nodes.extend(nodes_map); + + Ok(()) + } + + fn _add_single_edges_dataframe( + &mut self, + edges_dataframe: EdgesDataFrame, + ) -> GraphsterResult<()> { + let edges = create_edges_from_single_dataframe( + edges_dataframe, + &mut self.nodes, + &mut self.edge_index_counter, + )? + .collect::>>()?; + + self.edges.extend(edges); + + Ok(()) + } + + fn _add_multiple_edges_dataframes( + &mut self, + edges_dataframes: Vec, + ) -> GraphsterResult<()> { + let edges = create_edges_from_multiple_dataframes( + edges_dataframes, + &mut self.nodes, + &mut self.edge_index_counter, + )?; + + self.edges.extend(edges); + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 8416420..bde4402 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] + pub mod datatypes; -mod errors; -mod graph; +pub mod errors; +pub mod graph; mod macros; - -pub use errors::GraphsterError; -pub use graph::DataGraph; +pub mod prelude; diff --git a/src/macros.rs b/src/macros.rs index 9fd8199..eb6b4f2 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,3 +1,4 @@ +#[doc(hidden)] #[macro_export] macro_rules! implement_from_wrapper { ($for:ty, $variant:ident, $from:ty) => { diff --git a/src/prelude.rs b/src/prelude.rs index e69de29..cf997fa 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -0,0 +1,3 @@ +pub use crate::datatypes::{AttributeKey, AttributeValue, Attributes}; +pub use crate::errors::GraphsterError; +pub use crate::graph::{DataGraph, EdgeIndex, NodeIndex};