From 8d091eae185eea7c864fe891a90970597de1b6af Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Sun, 12 Jan 2025 20:34:10 -0500 Subject: [PATCH] feat: bring vortex-tui into tree As we change/break format makes it easier to keep this tool up to date. Also helpful for debugging/exploring format --- .cargo/config.toml | 1 + Cargo.lock | 212 ++++++++++++++++++++++++-- Cargo.toml | 1 + vortex-cli/Cargo.toml | 29 ++++ vortex-cli/README.md | 58 +++++++ vortex-cli/src/browse/app.rs | 203 ++++++++++++++++++++++++ vortex-cli/src/browse/mod.rs | 83 ++++++++++ vortex-cli/src/browse/ui.rs | 62 ++++++++ vortex-cli/src/browse/ui/encodings.rs | 8 + vortex-cli/src/browse/ui/layouts.rs | 203 ++++++++++++++++++++++++ vortex-cli/src/main.rs | 34 +++++ vortex-cli/src/tree.rs | 31 ++++ 12 files changed, 915 insertions(+), 10 deletions(-) create mode 100644 vortex-cli/Cargo.toml create mode 100644 vortex-cli/README.md create mode 100644 vortex-cli/src/browse/app.rs create mode 100644 vortex-cli/src/browse/mod.rs create mode 100644 vortex-cli/src/browse/ui.rs create mode 100644 vortex-cli/src/browse/ui/encodings.rs create mode 100644 vortex-cli/src/browse/ui/layouts.rs create mode 100644 vortex-cli/src/main.rs create mode 100644 vortex-cli/src/tree.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index 19c5afd3d6..1ea71cef3a 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -15,3 +15,4 @@ rustflags = [ [alias] xtask = "run -p xtask --" +vx = "run -p vortex-cli --" diff --git a/Cargo.lock b/Cargo.lock index 85cafdc295..9e5a369810 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,12 +619,27 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.7" @@ -766,7 +781,21 @@ checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "strum", "strum_macros", - "unicode-width", + "unicode-width 0.2.0", +] + +[[package]] +name = "compact_str" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", ] [[package]] @@ -1002,7 +1031,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width", + "unicode-width 0.2.0", "windows-sys 0.59.0", ] @@ -1166,6 +1195,31 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.6.0", + "crossterm_winapi", + "mio", + "parking_lot", + "rustix", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -1203,6 +1257,41 @@ dependencies = [ "memchr", ] +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.95", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.95", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -1729,7 +1818,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2382,6 +2471,12 @@ dependencies = [ "syn 2.0.95", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -2422,7 +2517,7 @@ dependencies = [ "console", "number_prefix", "portable-atomic", - "unicode-width", + "unicode-width 0.2.0", "web-time", ] @@ -2432,6 +2527,19 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "instability" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf9fed6d91cfb734e7476a06bde8300a1b94e217e1b523b6f0cd1a01998c71d" +dependencies = [ + "darling", + "indoc", + "proc-macro2", + "quote", + "syn 2.0.95", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2516,7 +2624,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2710,6 +2818,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.2", +] + [[package]] name = "lz4_flex" version = "0.11.3" @@ -2790,6 +2907,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.52.0", ] @@ -3137,7 +3255,7 @@ checksum = "d2b0f8def1f117e13c895f3eda65a7b5650688da29d6ad04635f61bc7b92eebd" dependencies = [ "bytecount", "fnv", - "unicode-width", + "unicode-width 0.2.0", ] [[package]] @@ -3631,7 +3749,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3673,6 +3791,27 @@ dependencies = [ "getrandom", ] +[[package]] +name = "ratatui" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +dependencies = [ + "bitflags 2.6.0", + "cassowary", + "compact_str", + "crossterm", + "indoc", + "instability", + "itertools 0.13.0", + "lru", + "paste", + "strum", + "unicode-segmentation", + "unicode-truncate", + "unicode-width 0.2.0", +] + [[package]] name = "rayon" version = "1.10.0" @@ -3886,7 +4025,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4133,6 +4272,27 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -4285,6 +4445,9 @@ name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] [[package]] name = "strum_macros" @@ -4489,7 +4652,7 @@ dependencies = [ "getrandom", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4867,6 +5030,23 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-truncate" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" +dependencies = [ + "itertools 0.13.0", + "unicode-segmentation", + "unicode-width 0.1.14", +] + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.0" @@ -5090,6 +5270,18 @@ dependencies = [ "vortex-scalar", ] +[[package]] +name = "vortex-cli" +version = "0.21.1" +dependencies = [ + "bytes", + "clap", + "crossterm", + "ratatui", + "tokio", + "vortex", +] + [[package]] name = "vortex-datafusion" version = "0.21.1" @@ -5656,7 +5848,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6af2ec045d..3dad2a1d46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "vortex", "vortex-array", "vortex-buffer", + "vortex-cli", "vortex-datafusion", "vortex-datetime-dtype", "vortex-dtype", diff --git a/vortex-cli/Cargo.toml b/vortex-cli/Cargo.toml new file mode 100644 index 0000000000..d92d8d4d26 --- /dev/null +++ b/vortex-cli/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "vortex-cli" +description = "a small but might tool for working with Vortex files" +version.workspace = true +homepage.workspace = true +repository.workspace = true +authors.workspace = true +license.workspace = true +keywords.workspace = true +include.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true +categories.workspace = true + +[dependencies] +bytes = { workspace = true } +clap = { version = "4", features = ["derive"] } +crossterm = "0.28" +ratatui = "0.29" +tokio = { workspace = true, features = ["rt-multi-thread"] } +vortex = { workspace = true, features = ["tokio"] } + +[lints] +workspace = true + +[[bin]] +name = "vx" +path = "src/main.rs" diff --git a/vortex-cli/README.md b/vortex-cli/README.md new file mode 100644 index 0000000000..8d362eaa63 --- /dev/null +++ b/vortex-cli/README.md @@ -0,0 +1,58 @@ +# `vx` Vortex CLI + +A small, helpful CLI tool for exploring and analyzing Vortex files. + +* `browse`: Browse the structure of your Vortex file with a rich TUI +* `tree`: print the file contents as JSON + + +## Examples + +Using the `tree` subcommand to print the encoding tree for a file: + +``` +$ vx tree ./bench-vortex/data/tpch/1/vortex_compressed/nation.vortex + +root: vortex.struct(0x04)({n_nationkey=i64, n_name=utf8, n_regionkey=i64, n_comment=utf8?}, len=25) nbytes=3.04 kB (100.00%) + metadata: StructMetadata { validity: NonNullable } + n_nationkey: $vortex.primitive(0x03)(i64, len=25) nbytes=201 B (6.62%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=8): 200 B + n_name: $vortex.varbinview(0x06)(utf8, len=25) nbytes=461 B (15.18%) + metadata: VarBinViewMetadata { validity: NonNullable, buffer_lens: [27] } + views: $vortex.primitive(0x03)(u8, len=400) nbytes=401 B (13.20%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=1): 400 B + bytes_0: $vortex.primitive(0x03)(u8, len=27) nbytes=28 B (0.92%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=1): 27 B + n_regionkey: $vortex.dict(0x14)(i64, len=25) nbytes=83 B (2.73%) + metadata: DictMetadata { codes_ptype: U8, values_len: 5 } + values: $vortex.primitive(0x03)(i64, len=5) nbytes=41 B (1.35%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=8): 40 B + codes: $vortex.primitive(0x03)(u8, len=25) nbytes=26 B (0.86%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=1): 25 B + n_comment: $vortex.varbinview(0x06)(utf8?, len=25) nbytes=2.29 kB (75.44%) + metadata: VarBinViewMetadata { validity: AllValid, buffer_lens: [1857] } + views: $vortex.primitive(0x03)(u8, len=400) nbytes=401 B (13.20%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=1): 400 B + bytes_0: $vortex.primitive(0x03)(u8, len=1857) nbytes=1.86 kB (61.18%) + metadata: PrimitiveMetadata { validity: NonNullable } + buffer (align=1): 1.86 kB +``` + +Opening an interactive TUI to browse the sample file: + +``` +vx browse ./bench-vortex/data/tpch/1/vortex_compressed/nation.vortex +``` + +## Development + +TODO: + +- [ ] `cat` to print a Vortex file as JSON to stdout +- [ ] `compress` to ingest JSON/CSV/other formats that are Arrow-compatible diff --git a/vortex-cli/src/browse/app.rs b/vortex-cli/src/browse/app.rs new file mode 100644 index 0000000000..d0717a3a18 --- /dev/null +++ b/vortex-cli/src/browse/app.rs @@ -0,0 +1,203 @@ +use std::ops::Range; +use std::os::unix::fs::FileExt; +use std::path::Path; +use std::sync::Arc; + +use bytes::{Bytes, BytesMut}; +use ratatui::widgets::ListState; +use vortex::dtype::{DType, Field, Nullability, StructDType}; +use vortex::error::VortexResult; +use vortex::file::{ + read_initial_bytes, InitialRead, CHUNKED_LAYOUT_ID, COLUMNAR_LAYOUT_ID, FLAT_LAYOUT_ID, +}; +use vortex::flatbuffers::footer; +use vortex::io::{TokioFile, VortexReadAt}; +use vortex::stats::stats_from_bitset_bytes; +// Add a shared Tokio Runtime for use in the app. + +#[derive(Default, Copy, Clone, Eq, PartialEq)] +pub enum Tab { + /// The layout tree browser. + #[default] + Layout, + /// The encoding tree viewer + Encodings, + // TODO(aduffy): SQL query page powered by DF + // Query, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Encoding { + Flat, + Chunked, + Columnar, + Unknown, +} + +impl From for Encoding { + fn from(value: u16) -> Self { + if value == FLAT_LAYOUT_ID.0 { + Encoding::Flat + } else if value == CHUNKED_LAYOUT_ID.0 { + Encoding::Chunked + } else if value == COLUMNAR_LAYOUT_ID.0 { + Encoding::Columnar + } else { + Encoding::Unknown + } + } +} + +/// A pointer into the `Layout` hierarchy that can be advanced. +/// +/// The pointer wraps an InitialRead. +pub struct LayoutCursor { + path: Vec, + initial_read: InitialRead, + dtype: DType, + encoding: Encoding, +} + +impl LayoutCursor { + pub fn new(initial_read: InitialRead) -> Self { + Self { + dtype: initial_read.dtype(), + encoding: initial_read.fb_layout().encoding().into(), + initial_read, + path: Vec::new(), + } + } + + pub fn new_with_path(initial_read: InitialRead, path: Vec) -> Self { + let mut layout = initial_read.fb_layout(); + let mut dtype = initial_read.dtype(); + let mut encoding = Encoding::from(layout.encoding()); + + // Traverse the layout tree at each element of the path. + for component in path.iter().copied() { + // Find the DType of the child based on the DType of the current node. + dtype = match encoding { + Encoding::Chunked => { + if component == 0 && layout.metadata().is_some() { + let stats = stats_from_bitset_bytes( + layout.metadata().expect("extracting stats").bytes(), + ); + + // When Chunked layout has a metadata field set, it will have a DType with + // STRUCT type and one field for each of the statistics. + let struct_dtype = StructDType::new( + stats + .iter() + .map(|stat| Arc::from(stat.to_string().as_str())) + .collect::>>() + .into(), + stats + .iter() + .map(|stat| stat.dtype(&dtype)) + .collect::>(), + ); + DType::Struct(struct_dtype, Nullability::NonNullable) + } else { + // If there is no metadata, all children + dtype.clone() + } + } + Encoding::Columnar => dtype + .as_struct() + .expect("struct dtype") + .field_info(&Field::Index(component)) + .expect("struct dtype component access") + .dtype + .value() + .expect("dtype value"), + Encoding::Unknown => todo!("unknown DType"), + // Flat layouts have no children + Encoding::Flat => unreachable!(), + }; + + layout = layout.children().expect("children").get(component); + encoding = layout.encoding().into(); + } + + Self { + path, + initial_read, + dtype, + encoding, + } + } + + /// Create a new LayoutCursor indexing into the n-th child of the layout at the current + /// cursor position. + pub fn child(&self, n: usize) -> Self { + let mut path = self.path.clone(); + path.push(n); + + Self::new_with_path(self.initial_read.clone(), path) + } + + pub fn parent(&self) -> Self { + let mut path = self.path.clone(); + path.pop(); + + Self::new_with_path(self.initial_read.clone(), path) + } + + pub fn dtype(&self) -> &DType { + &self.dtype + } + + pub fn encoding(&self) -> Encoding { + self.encoding + } + + pub fn layout_fb(&self) -> footer::Layout { + let mut layout = self.initial_read.fb_layout(); + for component in self.path.iter().copied() { + layout = layout.children().expect("children").get(component); + } + + layout + } +} + +/// State saved across all Tabs. +/// +/// Holding them all allows us to switch between tabs without resetting view state. +pub struct AppState { + pub reader: TokioFile, + pub cursor: LayoutCursor, + pub current_tab: Tab, + + /// List state for the Layouts view + pub layouts_list_state: ListState, +} + +impl AppState { + // Read the given byte range. + // We're cheating by doing the reads synchronously since we're given byte offsets. + pub fn read_bytes_sync(&self, range: Range) -> Bytes { + let mut buf = BytesMut::zeroed((range.end - range.start).try_into().expect("zeroed")); + self.reader + .read_exact_at(&mut buf, range.start) + .expect("read_exact_at sync"); + + buf.freeze() + } +} + +/// Create an app backed from a file path. +pub async fn create_file_app(path: impl AsRef) -> VortexResult { + let reader = TokioFile::open(path)?; + let size = reader.size().await?; + let initial_read = read_initial_bytes(&reader, size).await?; + + let cursor = LayoutCursor::new(initial_read); + + Ok(AppState { + reader, + cursor, + current_tab: Tab::default(), + layouts_list_state: ListState::default().with_selected(Some(0)), + }) +} diff --git a/vortex-cli/src/browse/mod.rs b/vortex-cli/src/browse/mod.rs new file mode 100644 index 0000000000..cc9ef95530 --- /dev/null +++ b/vortex-cli/src/browse/mod.rs @@ -0,0 +1,83 @@ +use std::path::Path; + +use app::{create_file_app, AppState, Tab}; +use crossterm::event; +use crossterm::event::{Event, KeyCode, KeyEventKind}; +use ratatui::widgets::ListState; +use ratatui::DefaultTerminal; +use ui::render_app; +use vortex::error::VortexResult; + +use crate::TOKIO_RUNTIME; + +mod app; +mod ui; + +// Use the VortexResult and potentially launch a Backtrace. +fn run(mut terminal: DefaultTerminal, mut app: AppState) -> VortexResult<()> { + loop { + terminal.draw(|frame| render_app(&mut app, frame))?; + + if let Event::Key(key) = event::read()? { + if key.kind == KeyEventKind::Press { + match key.code { + KeyCode::Char('q') => break Ok(()), + KeyCode::Tab => { + // toggle between tabs + app.current_tab = match app.current_tab { + Tab::Layout => Tab::Encodings, + Tab::Encodings => Tab::Layout, + }; + } + KeyCode::Up => { + // We send the key-up to the list state if we're looking at + // the Layouts tab. + if app.current_tab == Tab::Layout { + app.layouts_list_state.scroll_up_by(1); + } + } + KeyCode::Down => { + if app.current_tab == Tab::Layout { + app.layouts_list_state.scroll_down_by(1); + } + } + KeyCode::Enter => { + if app.current_tab == Tab::Layout { + // Descend into the layout subtree for the selected child. + let selected = app.layouts_list_state.selected().unwrap_or_default(); + app.cursor = app.cursor.child(selected); + + // Reset the list scroll state. + app.layouts_list_state = ListState::default().with_selected(Some(0)); + } + } + KeyCode::Left => { + if app.current_tab == Tab::Layout { + // Ascend back up to the Parent node + app.cursor = app.cursor.parent(); + // Reset the list scroll state. + app.layouts_list_state = ListState::default().with_selected(Some(0)); + } + } + // Most events not handled + _ => {} + } + } + } + } +} + +// TODO: add tui_logger and have a logs tab so we can see the log output from +// doing Vortex things.¬ + +pub fn exec_tui(file: impl AsRef) -> VortexResult<()> { + let app = TOKIO_RUNTIME.block_on(create_file_app(file))?; + + let mut terminal = ratatui::init(); + terminal.clear()?; + + run(terminal, app)?; + + ratatui::restore(); + Ok(()) +} diff --git a/vortex-cli/src/browse/ui.rs b/vortex-cli/src/browse/ui.rs new file mode 100644 index 0000000000..48701d9f31 --- /dev/null +++ b/vortex-cli/src/browse/ui.rs @@ -0,0 +1,62 @@ +mod encodings; +mod layouts; + +use encodings::encodings_ui; +use layouts::render_layouts; +use ratatui::prelude::*; +use ratatui::widgets::{Block, BorderType, Borders, Tabs}; + +use super::app::{AppState, Tab}; + +pub fn render_app(app: &mut AppState, frame: &mut Frame) { + // Render the outer tab view, then render the inner frame view. + let shell = Block::default() + .borders(Borders::ALL) + .border_type(BorderType::Rounded) + .border_style(Style::default().magenta()) + .title_top("vx-browse") + .title_bottom("press q to quit | ← to go up a level | ENTER to go down a level") + .title_alignment(Alignment::Center); + + // The rest of the app is rendered inside the shell. + let inner_area = shell.inner(frame.area()); + + frame.render_widget(shell, frame.area()); + + // Split the inner area into a Tab view area and the rest of the screen. + let [tab_view, app_view] = Layout::vertical([ + // Tab bar area - 1 line + Constraint::Length(1), + // Rest of the interior space for app view + Constraint::Min(0), + ]) + .areas(inner_area); + + // Display a tab indicator. + let selected_tab = match app.current_tab { + Tab::Layout => 0, + Tab::Encodings => 1, + }; + + let tabs = Tabs::new([ + "File Layout", + "Arrays", + // TODO(aduffy): add SQL query interface + // "Query", + ]) + .style(Style::default().bold().white()) + .highlight_style(Style::default().bold().black().on_white()) + .select(Some(selected_tab)); + + frame.render_widget(tabs, tab_view); + + // Render the view for the current tab. + match app.current_tab { + Tab::Layout => { + render_layouts(app, app_view, frame.buffer_mut()); + } + Tab::Encodings => { + frame.render_widget(encodings_ui(app), app_view); + } + } +} diff --git a/vortex-cli/src/browse/ui/encodings.rs b/vortex-cli/src/browse/ui/encodings.rs new file mode 100644 index 0000000000..11488d8dfb --- /dev/null +++ b/vortex-cli/src/browse/ui/encodings.rs @@ -0,0 +1,8 @@ +use ratatui::prelude::Widget; +use ratatui::widgets::Paragraph; + +use crate::browse::app::AppState; + +pub fn encodings_ui(_app_state: &AppState) -> impl Widget { + Paragraph::new("TODO: Encodings View").centered() +} diff --git a/vortex-cli/src/browse/ui/layouts.rs b/vortex-cli/src/browse/ui/layouts.rs new file mode 100644 index 0000000000..f75759ce69 --- /dev/null +++ b/vortex-cli/src/browse/ui/layouts.rs @@ -0,0 +1,203 @@ +use std::sync::Arc; + +use bytes::Bytes; +use ratatui::buffer::Buffer; +use ratatui::layout::{Constraint, Layout, Rect}; +use ratatui::style::{Color, Style, Stylize}; +use ratatui::text::Text; +use ratatui::widgets::{ + Block, BorderType, Borders, List, ListState, Paragraph, StatefulWidget, Widget, +}; +use vortex::dtype::{DType, Field}; +use vortex::error::{vortex_bail, VortexResult}; +use vortex::ipc::messages::{BufMessageReader, DecoderMessage}; +use vortex::sampling_compressor::ALL_ENCODINGS_CONTEXT; +use vortex::stats::stats_from_bitset_bytes; +use vortex::{ArrayData, Context}; + +use crate::browse::app::{AppState, Encoding, LayoutCursor}; + +/// Render the Layouts tab. +pub fn render_layouts(app_state: &mut AppState, area: Rect, buf: &mut Buffer) { + let [header_area, detail_area] = + Layout::vertical([Constraint::Length(10), Constraint::Min(1)]).areas(area); + + // Render the header area. + render_layout_header(&app_state.cursor, header_area, buf); + + // Render the list view if the layout has children + if app_state.cursor.encoding() == Encoding::Flat { + render_array(app_state, detail_area, buf); + } else { + render_children_list( + &app_state.cursor, + detail_area, + buf, + &mut app_state.layouts_list_state, + ); + } +} + +fn render_layout_header(cursor: &LayoutCursor, area: Rect, buf: &mut Buffer) { + // We want the header to have some padding, and all elements to be horizontally aligned. + // let [area] = Layout::default() + // .constraints([Constraint::Min(0)]) + // .margin(10) + // .areas(area); + + let layout_kind = match cursor.encoding() { + Encoding::Flat => "FLAT".to_string(), + Encoding::Chunked => "CHUNKED".to_string(), + Encoding::Columnar => "COLUMNAR".to_string(), + Encoding::Unknown => "UNKNOWN".to_string(), + }; + + // If using a FlatLayout, read the array and parse the metadata. + + let row_count = cursor.layout_fb().row_count(); + + let mut rows = vec![ + Text::from(format!("Kind: {layout_kind}")).bold(), + Text::from(format!("Row Count: {row_count}")).bold(), + Text::from(format!("Schema: {}", cursor.dtype())) + .bold() + .green(), + ]; + + if cursor.encoding() == Encoding::Chunked { + // Push any columnar stats. + if cursor.layout_fb().metadata().is_none() { + rows.push(Text::from("No chunk statistics found")); + } else { + let available_stats = stats_from_bitset_bytes( + cursor + .layout_fb() + .metadata() + .expect("layout_fb metadata") + .bytes(), + ); + let mut line = String::new(); + line.push_str("Statistics: "); + for stat in available_stats { + line.push_str(stat.to_string().as_str()); + line.push(' '); + } + + rows.push(Text::from(line)); + } + } + + let container = Block::new() + .title("Layout Info") + .borders(Borders::ALL) + .border_type(BorderType::Rounded) + .border_style(Style::default().fg(Color::DarkGray)); + + let inner_area = container.inner(area); + + container.render(area, buf); + + Widget::render(List::new(rows), inner_area, buf); +} + +// Render the inner Array for a FlatLayout +fn render_array(app: &AppState, area: Rect, buf: &mut Buffer) { + // FlatLayout must have exactly 1 buffer. + let buffer = app + .cursor + .layout_fb() + .buffers() + .expect("layout_fb buffers") + .get(0); + let bytes = app.read_bytes_sync(buffer.begin()..buffer.end()); + let array = array_from_bytes( + bytes, + ALL_ENCODINGS_CONTEXT.clone(), + app.cursor.dtype().clone(), + ) + .expect("array_from_bytes"); + + // Show the metadata as JSON. (show count of encoded bytes as well) + // let metadata_size = array.metadata_bytes().unwrap_or_default().len(); + let container = Block::new() + .title("Array Info") + .borders(Borders::ALL) + .border_type(BorderType::Rounded) + .border_style(Style::default().fg(Color::DarkGray)); + + let widget_area = container.inner(area); + + container.render(area, buf); + + Paragraph::new(array.tree_display().to_string()).render(widget_area, buf); +} + +fn render_children_list( + cursor: &LayoutCursor, + area: Rect, + buf: &mut Buffer, + state: &mut ListState, +) { + // TODO: add selection state. + let layout = cursor.layout_fb(); + if let Some(child_layouts) = layout.children() { + let list_items: Vec = (0..child_layouts.len()) + .map(|idx| child_name(cursor, idx)) + .collect(); + + let container = Block::new() + .title("Child Layouts") + .borders(Borders::ALL) + .border_type(BorderType::Rounded) + .border_style(Style::default().fg(Color::DarkGray)); + + let inner_area = container.inner(area); + + container.render(area, buf); + + // Render the List view. + // TODO: add state so we can scroll + StatefulWidget::render( + List::new(list_items).highlight_style(Style::default().black().on_white().bold()), + inner_area, + buf, + state, + ); + } +} + +fn child_name(cursor: &LayoutCursor, nth: usize) -> String { + match cursor.encoding() { + Encoding::Columnar => { + let field_info = cursor + .dtype() + .as_struct() + .expect("struct dtype") + .field_info(&Field::Index(nth)) + .expect("struct dtype component"); + let field_name = field_info.name; + let field_dtype = field_info.dtype.value().expect("dtype value"); + format!("Column {nth} - {field_name} ({field_dtype})") + } + Encoding::Chunked => { + if cursor.layout_fb().metadata().is_none() { + format!("Chunk {nth}") + } else if nth == 0 { + "Chunk Statistics".to_string() + } else { + format!("Chunk {}", nth - 1) + } + } + Encoding::Flat => format!("Page {nth}"), + Encoding::Unknown => format!("Unknown {nth}"), + } +} + +fn array_from_bytes(buf: Bytes, ctx: Arc, dtype: DType) -> VortexResult { + let mut reader = BufMessageReader::new(buf); + match reader.next().transpose()? { + Some(DecoderMessage::Array(array_parts)) => array_parts.decode(ctx, dtype), + Some(msg) => vortex_bail!("Expected Array message, got {:?}", msg), + None => vortex_bail!("Expected Array message, got EOF"), + } +} diff --git a/vortex-cli/src/main.rs b/vortex-cli/src/main.rs new file mode 100644 index 0000000000..91199c77ac --- /dev/null +++ b/vortex-cli/src/main.rs @@ -0,0 +1,34 @@ +#![allow(clippy::expect_used)] +mod browse; +mod tree; + +use std::path::PathBuf; +use std::sync::LazyLock; + +use browse::exec_tui; +use clap::Parser; +use tokio::runtime::Runtime; +use tree::exec_tree; + +static TOKIO_RUNTIME: LazyLock = + LazyLock::new(|| Runtime::new().expect("Tokio Runtime::new()")); + +#[derive(clap::Parser)] +struct Cli { + #[clap(subcommand)] + command: Commands, +} + +#[derive(Debug, clap::Subcommand)] +enum Commands { + Tree { file: PathBuf }, + Browse { file: PathBuf }, +} + +fn main() { + let cli = Cli::parse(); + match cli.command { + Commands::Tree { file } => exec_tree(file).expect("exec_tre"), + Commands::Browse { file } => exec_tui(file).expect("exec_tui"), + } +} diff --git a/vortex-cli/src/tree.rs b/vortex-cli/src/tree.rs new file mode 100644 index 0000000000..dca3ee0b9d --- /dev/null +++ b/vortex-cli/src/tree.rs @@ -0,0 +1,31 @@ +use std::path::Path; + +use vortex::error::VortexResult; +use vortex::file::{LayoutContext, LayoutDeserializer, VortexReadBuilder}; +use vortex::io::TokioFile; +use vortex::sampling_compressor::ALL_ENCODINGS_CONTEXT; + +use crate::TOKIO_RUNTIME; + +pub fn exec_tree(file: impl AsRef) -> VortexResult<()> { + let opened = TokioFile::open(file)?; + + let full = TOKIO_RUNTIME.block_on(async move { + let builder: vortex::file::handle::VortexReadHandle = VortexReadBuilder::new( + opened, + LayoutDeserializer::new( + ALL_ENCODINGS_CONTEXT.clone(), + LayoutContext::default().into(), + ), + ) + .build() + .await?; + + // If the array is chunked, stream the chunks. Otherwise, we print the tree view of the entire array this way. + builder.into_stream().read_all().await + })?; + + println!("{}", full.tree_display()); + + Ok(()) +}