From a6669bfd679d8edabad8ad23cd363cb4c34e2ef0 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Tue, 17 Sep 2024 01:14:28 -0400 Subject: [PATCH] EBML: Start defining structure for writes --- lofty/src/ebml/element_reader.rs | 26 +- lofty/src/ebml/mod.rs | 2 +- lofty/src/ebml/read.rs | 7 +- lofty/src/ebml/read/segment.rs | 4 +- lofty/src/ebml/read/segment_attachments.rs | 2 +- lofty/src/ebml/read/segment_tracks.rs | 4 +- lofty/src/ebml/tag/attached_file.rs | 2 +- lofty/src/ebml/tag/target.rs | 13 + .../ebml/tag/write/elements/attached_file.rs | 81 +++ lofty/src/ebml/tag/write/elements/mod.rs | 2 + lofty/src/ebml/tag/write/elements/target.rs | 102 ++++ lofty/src/ebml/tag/write/mod.rs | 48 ++ lofty/src/ebml/tag/write/type_encodings.rs | 110 ++++ lofty/src/ebml/vint.rs | 523 +++++++++++------- lofty/src/lib.rs | 1 + lofty_attr/src/lib.rs | 8 +- 16 files changed, 695 insertions(+), 240 deletions(-) create mode 100644 lofty/src/ebml/tag/write/elements/attached_file.rs create mode 100644 lofty/src/ebml/tag/write/elements/mod.rs create mode 100644 lofty/src/ebml/tag/write/elements/target.rs create mode 100644 lofty/src/ebml/tag/write/type_encodings.rs diff --git a/lofty/src/ebml/element_reader.rs b/lofty/src/ebml/element_reader.rs index 9b0143d38..94bb1e2d9 100644 --- a/lofty/src/ebml/element_reader.rs +++ b/lofty/src/ebml/element_reader.rs @@ -1,4 +1,4 @@ -use crate::ebml::vint::VInt; +use crate::ebml::vint::{ElementId, VInt}; use crate::error::Result; use crate::macros::{decode_err, try_vec}; @@ -10,8 +10,8 @@ use lofty_attr::ebml_master_elements; #[derive(Copy, Clone, Eq, PartialEq, Debug)] pub struct ElementHeader { - pub(crate) id: VInt, - pub(crate) size: VInt, + pub(crate) id: ElementId, + pub(crate) size: VInt<u64>, } impl ElementHeader { @@ -20,8 +20,8 @@ impl ElementHeader { R: Read, { Ok(Self { - id: VInt::parse_from_element_id(reader, max_id_length)?, - size: VInt::parse(reader, max_vint_length)?, + id: ElementId::parse(reader, max_id_length)?, + size: VInt::<u64>::parse(reader, max_vint_length)?, }) } } @@ -41,7 +41,7 @@ pub enum ElementDataType { #[derive(Copy, Clone, Debug)] struct MasterElement { id: ElementIdent, - children: &'static [(VInt, ChildElementDescriptor)], + children: &'static [(ElementId, ChildElementDescriptor)], } #[derive(Copy, Clone, Debug)] @@ -247,7 +247,7 @@ const ROOT_DEPTH: u8 = 1; #[derive(Copy, Clone, Debug)] struct Depth { level: u8, - length: VInt, + length: VInt<u64>, } #[derive(Copy, Clone, Debug)] @@ -302,7 +302,7 @@ impl ElementReaderContext { self.masters.get((self.depth - 1) as usize).copied() } - fn current_master_length(&self) -> VInt { + fn current_master_length(&self) -> VInt<u64> { assert!(self.depth > 0); self.current_master() .expect("should have current master element") @@ -316,7 +316,7 @@ impl ElementReaderContext { } } - fn remaining_lock_length(&self) -> VInt { + fn remaining_lock_length(&self) -> VInt<u64> { assert!(self.locked && !self.lock_depths.is_empty()); let lock_depth = *self.lock_depths.last().unwrap(); @@ -326,8 +326,8 @@ impl ElementReaderContext { #[derive(Debug)] pub(crate) enum ElementReaderYield { - Master((ElementIdent, VInt)), - Child((ChildElementDescriptor, VInt)), + Master((ElementIdent, VInt<u64>)), + Child((ChildElementDescriptor, VInt<u64>)), Unknown(ElementHeader), Eof, } @@ -412,7 +412,7 @@ where self.ctx.max_size_length = len } - fn push_new_master(&mut self, master: MasterElement, size: VInt) -> Result<()> { + fn push_new_master(&mut self, master: MasterElement, size: VInt<u64>) -> Result<()> { log::debug!("New master element: {:?}", master.id); if self.ctx.depth == MAX_DEPTH { @@ -662,7 +662,7 @@ where // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.8 // A Binary Element MUST declare a length in octets from zero to VINTMAX. - if element_length > VInt::MAX { + if element_length > VInt::<u64>::MAX { decode_err!(@BAIL Ebml, "Binary element length is too large") } diff --git a/lofty/src/ebml/mod.rs b/lofty/src/ebml/mod.rs index b8ebd32e3..b10b5320f 100644 --- a/lofty/src/ebml/mod.rs +++ b/lofty/src/ebml/mod.rs @@ -11,7 +11,7 @@ use lofty_attr::LoftyFile; pub use properties::EbmlProperties; pub use tag::*; -pub use vint::VInt; +pub use vint::*; /// An EBML file #[derive(LoftyFile, Default)] diff --git a/lofty/src/ebml/read.rs b/lofty/src/ebml/read.rs index e79068f23..0b0325058 100644 --- a/lofty/src/ebml/read.rs +++ b/lofty/src/ebml/read.rs @@ -9,7 +9,7 @@ mod segment_tracks; use super::EbmlFile; use crate::config::ParseOptions; use crate::ebml::element_reader::{ElementHeader, ElementIdent, ElementReader, ElementReaderYield}; -use crate::ebml::vint::VInt; +use crate::ebml::vint::ElementId; use crate::ebml::EbmlProperties; use crate::error::Result; use crate::macros::decode_err; @@ -18,6 +18,9 @@ use std::io::{Read, Seek}; const SUPPORTED_DOC_TYPES: &[&str] = &["matroska", "webm"]; +const CRC32_ID: ElementId = ElementId(0xBF); +const VOID_ID: ElementId = ElementId(0xEC); + pub(super) fn read_from<R>(reader: &mut R, parse_options: ParseOptions) -> Result<EbmlFile> where R: Read + Seek, @@ -45,7 +48,7 @@ where // CRC-32 (0xBF) and Void (0xEC) elements can occur at the top level. // This is valid, and we can just skip them. ElementReaderYield::Unknown(ElementHeader { - id: VInt(id @ (0xBF | 0xEC)), + id: id @ (CRC32_ID | VOID_ID), size, }) => { log::debug!("Skipping global element: {:X}", id); diff --git a/lofty/src/ebml/read/segment.rs b/lofty/src/ebml/read/segment.rs index 524e1db17..606b8cd6f 100644 --- a/lofty/src/ebml/read/segment.rs +++ b/lofty/src/ebml/read/segment.rs @@ -3,7 +3,7 @@ use crate::config::ParseOptions; use crate::ebml::element_reader::{ElementHeader, ElementIdent, ElementReader, ElementReaderYield}; use crate::ebml::properties::EbmlProperties; use crate::ebml::tag::EbmlTag; -use crate::ebml::VInt; +use crate::ebml::ElementId; use crate::error::Result; use std::io::{Read, Seek}; @@ -72,7 +72,7 @@ where // elements, so we can just skip any useless ones. children_reader.skip_element(ElementHeader { - id: VInt(id as u64), + id: ElementId(id as u64), size, })?; }, diff --git a/lofty/src/ebml/read/segment_attachments.rs b/lofty/src/ebml/read/segment_attachments.rs index 309d0edb3..6bb10d1fe 100644 --- a/lofty/src/ebml/read/segment_attachments.rs +++ b/lofty/src/ebml/read/segment_attachments.rs @@ -74,7 +74,7 @@ where uid = Some(children_reader.read_unsigned_int(size)?); }, ElementIdent::FileReferral => { - referral = Some(children_reader.read_string(size)?); + referral = Some(children_reader.read_binary(size)?); }, ElementIdent::FileUsedStartTime => { used_start_time = Some(children_reader.read_unsigned_int(size)?); diff --git a/lofty/src/ebml/read/segment_tracks.rs b/lofty/src/ebml/read/segment_tracks.rs index 25da6ba9e..9210e77d6 100644 --- a/lofty/src/ebml/read/segment_tracks.rs +++ b/lofty/src/ebml/read/segment_tracks.rs @@ -10,7 +10,7 @@ use std::io::{Read, Seek}; pub(super) fn read_from<R>( children_reader: &mut ElementChildIterator<'_, R>, parse_options: ParseOptions, - properties: &mut EbmlProperties, + _properties: &mut EbmlProperties, ) -> Result<()> where R: Read + Seek, @@ -44,7 +44,7 @@ const AUDIO_TRACK_TYPE: u64 = 2; fn read_track_entry<R>( children_reader: &mut ElementChildIterator<'_, R>, - parse_options: ParseOptions, + _parse_options: ParseOptions, audio_tracks: &mut Vec<AudioTrack>, ) -> Result<()> where diff --git a/lofty/src/ebml/tag/attached_file.rs b/lofty/src/ebml/tag/attached_file.rs index 22158c52b..704cf7eeb 100644 --- a/lofty/src/ebml/tag/attached_file.rs +++ b/lofty/src/ebml/tag/attached_file.rs @@ -25,7 +25,7 @@ pub struct AttachedFile { /// Unique ID representing the file, as random as possible. pub uid: u64, /// A binary value that a track/codec can refer to when the attachment is needed. - pub referral: Option<String>, + pub referral: Option<Vec<u8>>, /// The timestamp at which this optimized font attachment comes into context. /// /// This is expressed in Segment Ticks which is based on `TimestampScale`. This element is diff --git a/lofty/src/ebml/tag/target.rs b/lofty/src/ebml/tag/target.rs index 760177f30..8228f96f7 100644 --- a/lofty/src/ebml/tag/target.rs +++ b/lofty/src/ebml/tag/target.rs @@ -102,3 +102,16 @@ impl From<TargetType> for Target { } } } + +impl Target { + // TargetType::Album is the default value. If nothing else is set, it is valid to write + // a zero-sized Targets element. + pub(super) fn is_empty_candidate(&self) -> bool { + self.target_type == TargetType::Album + && self.name.is_none() + && self.track_uids.is_none() + && self.edition_uids.is_none() + && self.chapter_uids.is_none() + && self.attachment_uids.is_none() + } +} diff --git a/lofty/src/ebml/tag/write/elements/attached_file.rs b/lofty/src/ebml/tag/write/elements/attached_file.rs new file mode 100644 index 000000000..81ee803c2 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/attached_file.rs @@ -0,0 +1,81 @@ +use crate::ebml::tag::write::{write_element, ElementWriterCtx, WriteableElement}; +use crate::ebml::{AttachedFile, ElementId, VInt}; +use crate::io::FileLike; + +const FileDescription_ID: ElementId = ElementId(0x467E); +const FileName_ID: ElementId = ElementId(0x466E); +const FileMediaType_ID: ElementId = ElementId(0x4660); +const FileData_ID: ElementId = ElementId(0x465C); +const FileUID_ID: ElementId = ElementId(0x46AE); +const FileReferral_ID: ElementId = ElementId(0x4675); +const FileUsedStartTime_ID: ElementId = ElementId(0x4661); +const FileUsedEndTime_ID: ElementId = ElementId(0x4662); + +impl WriteableElement for AttachedFile { + const ID: ElementId = ElementId(0x61A7); + + fn write_element<F: FileLike>( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + self.validate()?; + + let mut element_children = Vec::new(); + if let Some(description) = &self.description { + write_element( + ctx, + FileDescription_ID, + &description.as_str(), + &mut element_children, + )?; + } + + write_element( + ctx, + FileName_ID, + &self.file_name.as_str(), + &mut element_children, + )?; + + write_element( + ctx, + FileMediaType_ID, + &self.mime_type.as_str(), + &mut element_children, + )?; + + write_element( + ctx, + FileData_ID, + &self.file_data.as_slice(), + &mut element_children, + )?; + + let uid = VInt::<u64>::try_from(self.uid)?; + write_element(ctx, FileUID_ID, &uid, &mut element_children)?; + + if let Some(referral) = &self.referral { + write_element( + ctx, + FileReferral_ID, + &referral.as_slice(), + &mut element_children, + )?; + } + + if let Some(start_time) = &self.used_start_time { + let vint = VInt::<u64>::try_from(*start_time)?; + write_element(ctx, FileUsedStartTime_ID, &vint, &mut element_children)?; + } + + if let Some(end_time) = &self.used_end_time { + let vint = VInt::<u64>::try_from(*end_time)?; + write_element(ctx, FileUsedEndTime_ID, &vint, &mut element_children)?; + } + + write_element(ctx, Self::ID, &element_children.as_slice(), writer)?; + + Ok(()) + } +} diff --git a/lofty/src/ebml/tag/write/elements/mod.rs b/lofty/src/ebml/tag/write/elements/mod.rs new file mode 100644 index 000000000..0baa74293 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/mod.rs @@ -0,0 +1,2 @@ +pub(super) mod attached_file; +pub(super) mod target; diff --git a/lofty/src/ebml/tag/write/elements/target.rs b/lofty/src/ebml/tag/write/elements/target.rs new file mode 100644 index 000000000..a26362558 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/target.rs @@ -0,0 +1,102 @@ +use crate::ebml::tag::write::{write_element, EbmlWriteExt, ElementWriterCtx, WriteableElement}; +use crate::ebml::{ElementId, Target, TargetType, VInt}; +use crate::io::FileLike; + +const TargetTypeValue_ID: ElementId = ElementId(0x68CA); +const TargetType_ID: ElementId = ElementId(0x63CA); +const TagTrackUID_ID: ElementId = ElementId(0x63C5); +const TagEditionUID_ID: ElementId = ElementId(0x63C9); +const TagChapterUID_ID: ElementId = ElementId(0x63C4); +const TagAttachmentUID_ID: ElementId = ElementId(0x63C6); + +impl WriteableElement for Target { + const ID: ElementId = ElementId(0x63C0); + + fn write_element<F: FileLike>( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + if self.is_empty_candidate() { + writer.write_id(ctx, Self::ID)?; + writer.write_size(ctx, VInt::<u64>::ZERO)?; + return Ok(()); + } + + let mut element_children = Vec::new(); + if self.target_type == TargetType::Album { + write_element( + ctx, + TargetTypeValue_ID, + &[].as_slice(), + &mut element_children, + )?; + } else { + let vint = VInt::<u64>::try_from(self.target_type as u64)?; + write_element(ctx, TargetTypeValue_ID, &vint, &mut element_children)?; + } + + if let Some(name) = &self.name { + write_element(ctx, TargetType_ID, &name.as_str(), &mut element_children)?; + } + + if let Some(track_uids) = &self.track_uids { + for &uid in track_uids { + let vint = VInt::<u64>::try_from(uid)?; + write_element(ctx, TagTrackUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(edition_uids) = &self.edition_uids { + for &uid in edition_uids { + let vint = VInt::<u64>::try_from(uid)?; + write_element(ctx, TagEditionUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(chapter_uids) = &self.chapter_uids { + for &uid in chapter_uids { + let vint = VInt::<u64>::try_from(uid)?; + write_element(ctx, TagChapterUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(attachment_uids) = &self.attachment_uids { + for &uid in attachment_uids { + let vint = VInt::<u64>::try_from(uid)?; + write_element(ctx, TagAttachmentUID_ID, &vint, &mut element_children)?; + } + } + + write_element(ctx, Self::ID, &element_children.as_slice(), writer)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::io::Cursor; + + #[test_log::test] + fn write_empty_default() { + let target = Target::default(); + + let mut buf = Cursor::new(Vec::new()); + target + .write_element( + ElementWriterCtx { + max_id_len: 4, + max_size_len: 8, + }, + &mut buf, + ) + .unwrap(); + + let expected = vec![0x63, 0xC0, 0x80]; + + assert_eq!(buf.into_inner(), expected); + } +} diff --git a/lofty/src/ebml/tag/write/mod.rs b/lofty/src/ebml/tag/write/mod.rs index e69de29bb..27ace950a 100644 --- a/lofty/src/ebml/tag/write/mod.rs +++ b/lofty/src/ebml/tag/write/mod.rs @@ -0,0 +1,48 @@ +mod elements; +mod type_encodings; + +use crate::ebml::{ElementId, VInt}; +use crate::error::Result; +use crate::io::FileLike; + +use std::io::Write; + +use type_encodings::ElementEncodable; + +#[derive(Copy, Clone)] +pub(crate) struct ElementWriterCtx { + pub(crate) max_id_len: u8, + pub(crate) max_size_len: u8, +} + +pub(crate) trait EbmlWriteExt: Write + Sized { + fn write_id(&mut self, ctx: ElementWriterCtx, id: ElementId) -> Result<()> { + id.write_to(Some(ctx.max_id_len), self)?; + Ok(()) + } + + fn write_size(&mut self, ctx: ElementWriterCtx, size: VInt<u64>) -> Result<()> { + VInt::<u64>::write_to(size.value(), None, Some(ctx.max_size_len), self)?; + Ok(()) + } +} + +impl<T> EbmlWriteExt for T where T: Write {} + +pub(crate) trait WriteableElement { + const ID: ElementId; + + fn write_element<F: FileLike>(&self, ctx: ElementWriterCtx, writer: &mut F) -> Result<()>; +} + +pub(crate) fn write_element<W: Write, E: ElementEncodable>( + ctx: ElementWriterCtx, + id: ElementId, + element: &E, + writer: &mut W, +) -> Result<()> { + writer.write_id(ctx, id)?; + element.write_to(ctx, writer)?; + + Ok(()) +} diff --git a/lofty/src/ebml/tag/write/type_encodings.rs b/lofty/src/ebml/tag/write/type_encodings.rs new file mode 100644 index 000000000..13d643a05 --- /dev/null +++ b/lofty/src/ebml/tag/write/type_encodings.rs @@ -0,0 +1,110 @@ +use super::{EbmlWriteExt, ElementWriterCtx}; +use crate::ebml::{TagValue, VInt}; +use crate::error::Result; +use std::io::Write; + +use byteorder::WriteBytesExt; + +pub(crate) trait ElementEncodable { + fn len(&self) -> Result<VInt<u64>>; + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()>; +} + +impl ElementEncodable for VInt<u64> { + fn len(&self) -> Result<VInt<u64>> { + Ok(VInt(u64::from(self.octet_length()))) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, self.len()?)?; + VInt::<u64>::write_to(self.value(), None, None, writer)?; + Ok(()) + } +} + +impl ElementEncodable for VInt<i64> { + fn len(&self) -> Result<VInt<u64>> { + Ok(VInt(u64::from(self.octet_length()))) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, self.len()?)?; + VInt::<i64>::write_to(self.value() as u64, None, None, writer)?; + Ok(()) + } +} + +impl ElementEncodable for f32 { + fn len(&self) -> Result<VInt<u64>> { + Ok(VInt(size_of::<f32>() as u64)) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + if *self == 0.0 { + VInt::<u64>::write_to(VInt::<u64>::ZERO.value(), None, None, writer)?; + return Ok(()); + } + + writer.write_size(ctx, self.len()?)?; + writer.write_f32::<byteorder::BigEndian>(*self)?; + Ok(()) + } +} + +impl ElementEncodable for f64 { + fn len(&self) -> Result<VInt<u64>> { + Ok(VInt(size_of::<f64>() as u64)) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + if *self == 0.0 { + VInt::<u64>::write_to(VInt::<u64>::ZERO.value(), None, None, writer)?; + return Ok(()); + } + + writer.write_size(ctx, self.len()?)?; + writer.write_f64::<byteorder::BigEndian>(*self)?; + Ok(()) + } +} + +impl ElementEncodable for &[u8] { + fn len(&self) -> Result<VInt<u64>> { + VInt::try_from(<[u8]>::len(self) as u64) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, <&[u8] as ElementEncodable>::len(self)?)?; + writer.write_all(self)?; + Ok(()) + } +} + +impl ElementEncodable for &str { + fn len(&self) -> Result<VInt<u64>> { + VInt::try_from(str::len(self) as u64) + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, <&str as ElementEncodable>::len(self)?)?; + writer.write_all(self.as_bytes())?; + Ok(()) + } +} + +impl ElementEncodable for TagValue<'_> { + fn len(&self) -> Result<VInt<u64>> { + match self { + TagValue::String(s) => <&str as ElementEncodable>::len(&&**s), + TagValue::Binary(b) => <&[u8] as ElementEncodable>::len(&&**b), + } + } + + fn write_to<W: Write>(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + match self { + TagValue::String(s) => <&str as ElementEncodable>::write_to(&&**s, ctx, writer), + TagValue::Binary(b) => <&[u8] as ElementEncodable>::write_to(&&**b, ctx, writer), + } + } +} diff --git a/lofty/src/ebml/vint.rs b/lofty/src/ebml/vint.rs index e555f6a11..ac19b1b3e 100644 --- a/lofty/src/ebml/vint.rs +++ b/lofty/src/ebml/vint.rs @@ -1,289 +1,382 @@ use crate::error::Result; use crate::macros::err; +use std::fmt::UpperHex; -use std::io::Read; +use std::io::{Read, Write}; use std::ops::{Add, Sub}; use byteorder::{ReadBytesExt, WriteBytesExt}; +macro_rules! impl_vint { + ($($t:ty),*) => { + $( + paste::paste! { + #[allow(trivial_numeric_casts)] + impl VInt<$t> { + /// The maximum value that can be represented by a `VInt` + pub const MAX: $t = <$t>::MAX >> (<$t>::BITS as u64 - Self::USABLE_BITS); + /// The minimum value that can be represented by a `VInt` + pub const MIN: $t = <$t>::MIN; + /// A `VInt` with a value of 0 + pub const ZERO: Self = Self(0); + + /// Gets the inner value of the `VInt` + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + #[doc = " let vint = VInt::<" $t ">::try_from(2)?;"] + /// assert_eq!(vint.value(), 2); + /// # Ok(()) } + /// ``` + pub fn value(&self) -> $t { + self.0 + } + + /// Parse a `VInt` from a reader + /// + /// `max_length` can be used to specify the maximum number of octets the number should + /// occupy, otherwise it should be `8`. + /// + /// # Errors + /// + /// * The int cannot fit within the maximum width of 54 bits + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// // This octet count (9) is too large to represent + /// let mut invalid_vint_reader = &[0b0000_0000_1]; + #[doc = " let invalid_vint = VInt::<" $t ">::parse(&mut &invalid_vint_reader[..], 8);"] + /// assert!(invalid_vint.is_err()); + /// + /// // This octet count (4) is too large to represent given our `max_length` + /// let mut invalid_vint_reader2 = &[0b0001_1111]; + #[doc = " let invalid_vint2 = VInt::<" $t ">::parse(&mut &invalid_vint_reader2[..], 3);"] + /// assert!(invalid_vint2.is_err()); + /// + /// // This value is small enough to represent + /// let mut valid_vint_reader = &[0b1000_0010]; + #[doc = " let valid_vint = VInt::<" $t ">::parse(&mut &valid_vint_reader[..], 8)?;"] + /// assert_eq!(valid_vint.value(), 2); + /// # Ok(()) } + /// ``` + pub fn parse<R>(reader: &mut R, max_length: u8) -> Result<Self> + where + R: Read, + { + Ok(Self(parse_vint(reader, max_length, false)? as $t)) + } + + /// Represents the length of the `VInt` in octets + /// + /// NOTE: The value returned will always be <= 8 + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// // Anything <= 254 will fit into a single octet + /// let vint = VInt::try_from(100u64)?; + /// assert_eq!(vint.octet_length(), 1); + /// + /// // A larger number will need to + /// let vint = VInt::try_from(500_000u64)?; + /// assert_eq!(vint.octet_length(), 3); + /// # Ok(()) } + /// ``` + pub fn octet_length(&self) -> u8 { + octet_length(self.0 as u64) + } + + /// Converts the `VInt` into a byte Vec + /// + /// * `min_length` can be used to specify the minimum number of octets the number should + /// occupy. + /// * `max_length` can be used to specify the maximum number of octets the number should + /// occupy. + /// + /// # Errors + /// + /// * The octet length is greater than `max_length` (if provided) + /// * `min_length` is greater than `max_length` OR `8` + /// * Unable to write to the buffer + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let vint = VInt::try_from(10u64)?; + /// let bytes = vint.as_bytes(None, None)?; + /// + /// assert_eq!(bytes, &[0b1000_1010]); + /// # Ok(()) } + /// ``` + pub fn as_bytes(&self, min_length: Option<u8>, max_length: Option<u8>) -> Result<Vec<u8>> { + let mut ret = Vec::with_capacity(8); + VInt::<$t>::write_to(self.0 as u64, min_length, max_length, &mut ret)?; + Ok(ret) + } + + #[inline] + pub(crate) fn saturating_sub(self, other: $t) -> Self { + let v = self.0.saturating_sub(other); + if v < Self::MIN { + return Self(Self::MIN); + } + + Self(v) + } + } + + impl Add for VInt<$t> { + type Output = Self; + + fn add(self, other: Self) -> Self::Output { + let val = self.0 + other.0; + assert!(val <= Self::MAX, "VInt overflow"); + + Self(val) + } + } + + impl Sub for VInt<$t> { + type Output = Self; + + fn sub(self, other: Self) -> Self::Output { + Self(self.0 - other.0) + } + } + + impl PartialEq<$t> for VInt<$t> { + fn eq(&self, other: &$t) -> bool { + self.0 == *other + } + } + + impl TryFrom<$t> for VInt<$t> { + type Error = crate::error::LoftyError; + + fn try_from(value: $t) -> Result<Self> { + if value > Self::MAX { + err!(BadVintSize); + } + + Ok(Self(value)) + } + } + } + )* + }; +} + /// An EMBL variable-size integer /// /// A `VInt` is an unsigned integer composed of up to 8 octets, with 7 usable bits per octet. /// -/// To ensure safe construction of `VInt`s, users must create them through [`VInt::parse`] or [`VInt::from_u64`]. +/// To ensure safe construction of `VInt`s, users must create them through the `TryFrom` implementations or [`VInt::parse`]. #[repr(transparent)] #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] -pub struct VInt(pub(crate) u64); +pub struct VInt<T>(pub(crate) T); -impl VInt { +impl<T> VInt<T> { // Each octet will shave a single bit off each byte const USABLE_BITS_PER_BYTE: u64 = 7; const MAX_OCTET_LENGTH: u64 = 8; const USABLE_BITS: u64 = Self::MAX_OCTET_LENGTH * Self::USABLE_BITS_PER_BYTE; - /// The maximum value that can be represented by a `VInt` - pub const MAX: u64 = u64::MAX >> (u64::BITS as u64 - Self::USABLE_BITS); - /// The minimum value that can be represented by a `VInt` - pub const MIN: u64 = 0; - /// A `VInt` with a value of 0 - pub const ZERO: Self = Self(0); - - /// Create a signed `VInt` from a `u64` - /// - /// # Errors - /// - /// * `uint` cannot fit within the maximum width of 56 bits - /// - /// # Examples - /// - /// ```rust - /// use lofty::ebml::VInt; - /// - /// # fn main() -> lofty::error::Result<()> { - /// // This value is too large to represent - /// let invalid_vint = VInt::from_u64(u64::MAX); - /// assert!(invalid_vint.is_err()); - /// - /// // This value is small enough to represent - /// let valid_vint = VInt::from_u64(500)?; - /// # Ok(()) } - /// ``` - pub fn from_u64(uint: u64) -> Result<Self> { - if uint > Self::MAX { + pub(crate) fn write_to<W>( + mut value: u64, + min_length: Option<u8>, + max_length: Option<u8>, + writer: &mut W, + ) -> Result<()> + where + W: Write, + { + let octets = std::cmp::max(octet_length(value), min_length.unwrap_or(0)); + if octets > max_length.unwrap_or(Self::MAX_OCTET_LENGTH as u8) { err!(BadVintSize); } - Ok(Self(uint)) + // Add the octet length + value |= 1 << (octets * (Self::USABLE_BITS_PER_BYTE as u8)); + + let mut byte_shift = (octets - 1) as i8; + while byte_shift >= 0 { + writer.write_u8((value >> (byte_shift * 8)) as u8)?; + byte_shift -= 1; + } + + Ok(()) } +} - /// Gets the inner value of the `VInt` - /// - /// # Examples - /// - /// ```rust - /// use lofty::ebml::VInt; - /// - /// # fn main() -> lofty::error::Result<()> { - /// let vint = VInt::from_u64(2)?; - /// assert_eq!(vint.value(), 2); - /// # Ok(()) } - /// ``` - pub fn value(&self) -> u64 { - self.0 +impl_vint!(u64, i64); + +fn parse_vint<R>(reader: &mut R, max_length: u8, retain_marker: bool) -> Result<u64> +where + R: Read, +{ + let start = reader.read_u8()?; + let octet_length = verify_length(start, max_length)?; + + let mut bytes_read = 1; + + let mut val = u64::from(start); + if !retain_marker { + val ^= 1 << start.ilog2(); } - /// Parse a `VInt` from a reader - /// - /// `max_length` can be used to specify the maximum number of octets the number should - /// occupy, otherwise it should be `8`. - /// - /// # Errors - /// - /// * `uint` cannot fit within the maximum width of 54 bits - /// - /// # Examples - /// - /// ```rust - /// use lofty::ebml::VInt; - /// - /// # fn main() -> lofty::error::Result<()> { - /// // This octet count (9) is too large to represent - /// let mut invalid_vint_reader = &[0b0000_0000_1]; - /// let invalid_vint = VInt::parse(&mut &invalid_vint_reader[..], 8); - /// assert!(invalid_vint.is_err()); - /// - /// // This octet count (4) is too large to represent given our `max_length` - /// let mut invalid_vint_reader2 = &[0b0001_1111]; - /// let invalid_vint2 = VInt::parse(&mut &invalid_vint_reader2[..], 3); - /// assert!(invalid_vint2.is_err()); - /// - /// // This value is small enough to represent - /// let mut valid_vint_reader = &[0b1000_0010]; - /// let valid_vint = VInt::parse(&mut &valid_vint_reader[..], 8)?; - /// assert_eq!(valid_vint.value(), 2); - /// # Ok(()) } - /// ``` - pub fn parse<R>(reader: &mut R, max_length: u8) -> Result<Self> - where - R: Read, - { - let start = reader.read_u8()?; - let octet_length = Self::verify_length(start, max_length)?; - - let mut bytes_read = 1; - let mut val = u64::from(start) ^ (1 << start.ilog2()) as u64; - while bytes_read < octet_length { - bytes_read += 1; - val = (val << 8) | u64::from(reader.read_u8()?); - } + while bytes_read < octet_length { + bytes_read += 1; + val = (val << 8) | u64::from(reader.read_u8()?); + } - Ok(Self(val)) + Ok(val) +} + +// Verify that the octet length is nonzero and <= 8 +fn verify_length(first_byte: u8, max_length: u8) -> Result<u32> { + // A value of 0b0000_0000 indicates either an invalid VInt, or one with an octet length > 8 + if first_byte == 0b0000_0000 { + err!(BadVintSize); + } + + let octet_length = (VInt::<()>::MAX_OCTET_LENGTH as u32) - first_byte.ilog2(); + if octet_length > 8 || octet_length as u8 > max_length { + err!(BadVintSize); } - /// Parse a `VInt` from a reader, given the element ID + Ok(octet_length) +} + +fn octet_length(mut value: u64) -> u8 { + let mut octets = 0; + loop { + octets += 1; + + value >>= VInt::<()>::USABLE_BITS_PER_BYTE; + if value == 0 { + break; + } + } + + octets +} + +/// An EBML element ID +/// +/// An `ElementId` is a [`VInt`], but with the following conditions: +/// +/// * The `VINT_MARKER` is retained after parsing +/// * When encoding, the minimum number of octets must be used +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct ElementId(pub(crate) u64); + +impl ElementId { + /// Parse an `ElementId` from a reader /// - /// An element ID is parsed similarly to a normal `VInt`, but the `VINT_MARKER` is not - /// removed. + /// An element ID is parsed similarly to a normal [`VInt`], but the `VINT_MARKER` is retained. /// /// # Errors /// - /// * `uint` cannot fit within the maximum width + /// * The ID cannot fit within the maximum width /// /// # Examples /// /// ```rust - /// use lofty::ebml::VInt; + /// use lofty::ebml::ElementId; /// /// # fn main() -> lofty::error::Result<()> { /// // Parse the EBML header element ID /// let mut reader = &[0x1A, 0x45, 0xDF, 0xA3][..]; - /// let vint = VInt::parse_from_element_id(&mut reader, 8)?; - /// assert_eq!(vint.value(), 0x1A45DFA3); + /// let id = ElementId::parse(&mut reader, 8)?; + /// assert_eq!(id, 0x1A45DFA3); /// # Ok(()) } - pub fn parse_from_element_id<R>(reader: &mut R, max_id_length: u8) -> Result<Self> + pub fn parse<R>(reader: &mut R, max_id_length: u8) -> Result<Self> where R: Read, { - let start = reader.read_u8()?; - let octet_length = Self::verify_length(start, max_id_length)?; - - let mut bytes_read = 1; - let mut val = u64::from(start); - while bytes_read < octet_length { - bytes_read += 1; - val = (val << 8) | u64::from(reader.read_u8()?); - } - + let val = parse_vint(reader, max_id_length, true)?; Ok(Self(val)) } - // Verify that the octet length is nonzero and <= 8 - fn verify_length(first_byte: u8, max_length: u8) -> Result<u32> { - // A value of 0b0000_0000 indicates either an invalid VInt, or one with an octet length > 8 - if first_byte == 0b0000_0000 { - err!(BadVintSize); - } - - let octet_length = (Self::MAX_OCTET_LENGTH as u32) - first_byte.ilog2(); - if octet_length > 8 || octet_length as u8 > max_length { - err!(BadVintSize); - } - - Ok(octet_length) - } - - /// Represents the length of the `VInt` in octets - /// - /// NOTE: The value returned will always be <= 8 + /// Get the inner value of the `ElementId` /// /// # Examples /// /// ```rust - /// use lofty::ebml::VInt; + /// use lofty::ebml::ElementId; /// /// # fn main() -> lofty::error::Result<()> { - /// // Anything <= 254 will fit into a single octet - /// let vint = VInt::from_u64(100)?; - /// assert_eq!(vint.octet_length(), 1); - /// - /// // A larger number will need to - /// let vint = VInt::from_u64(500_000)?; - /// assert_eq!(vint.octet_length(), 3); + /// let id = ElementId::parse(&mut &[0x1A, 0x45, 0xDF, 0xA3][..], 8)?; + /// assert_eq!(id.value(), 0x1A45DFA3); /// # Ok(()) } - /// ``` - pub fn octet_length(&self) -> u8 { - let mut octets = 0; - let mut v = self.0; - loop { - octets += 1; - - v >>= Self::USABLE_BITS_PER_BYTE; - if v == 0 { - break; - } - } - - octets + pub fn value(&self) -> u64 { + self.0 } - /// Converts the `VInt` into a byte Vec + /// Converts the `ElementId` into a byte Vec /// - /// `length` can be used to specify the number of bytes to use to write the integer. If unspecified, - /// the integer will be represented in the minimum number of bytes. + /// Unlike a [`VInt`], an `ElementId` **MUST** be encoded with the shortest possible octet length. + /// + /// * `max_length` can be used to specify the maximum number of octets the number should + /// occupy. /// /// # Errors /// - /// * `length` > 8 or `length` == 0 + /// * The octet length is greater than `max_length` (if provided) /// * Unable to write to the buffer /// /// # Examples /// /// ```rust - /// use lofty::ebml::VInt; + /// use lofty::ebml::ElementId; + /// + /// const EBML_ID: [u8; 4] = [0x1A, 0x45, 0xDF, 0xA3]; /// /// # fn main() -> lofty::error::Result<()> { - /// let vint = VInt::from_u64(10)?; - /// let bytes = vint.as_bytes(None)?; + /// let id = ElementId::parse(&mut &EBML_ID[..], 8)?; + /// let bytes = id.as_bytes(None)?; /// - /// assert_eq!(bytes, &[0b1000_1010]); + /// assert_eq!(bytes, &EBML_ID); /// # Ok(()) } /// ``` - pub fn as_bytes(&self, length: Option<u8>) -> Result<Vec<u8>> { - let octets: u8; - if let Some(length) = length { - if length > (Self::MAX_OCTET_LENGTH as u8) || length == 0 { - err!(BadVintSize); - } - - octets = length; - } else { - octets = self.octet_length() - } - - let mut ret = Vec::with_capacity(octets as usize); - - let mut val = self.value(); - - // Add the octet length - val |= 1 << (octets * (Self::USABLE_BITS_PER_BYTE as u8)); - - let mut byte_shift = (octets - 1) as i8; - while byte_shift >= 0 { - ret.write_u8((val >> (byte_shift * 8)) as u8)?; - byte_shift -= 1; - } - - Ok(ret) + pub fn as_bytes(self, max_length: Option<u8>) -> Result<Vec<u8>> { + let mut buf = Vec::with_capacity(8); + self.write_to(max_length, &mut buf)?; + Ok(buf) } - #[inline] - pub(crate) fn saturating_sub(self, other: u64) -> Self { - Self(self.0.saturating_sub(other)) - } -} - -impl Sub for VInt { - type Output = Self; - - fn sub(self, other: Self) -> Self::Output { - Self(self.0 - other.0) + // Same as writing a VInt, but we need to remove the VINT_MARKER from the value first + pub(crate) fn write_to<W: Write>(self, max_length: Option<u8>, writer: &mut W) -> Result<()> { + let mut val = self.0; + val ^= 1 << val.ilog2(); + VInt::<()>::write_to(val, None, max_length, writer)?; + Ok(()) } } -impl Add for VInt { - type Output = Self; - - fn add(self, other: Self) -> Self::Output { - let val = self.0 + other.0; - assert!(val <= Self::MAX, "VInt overflow"); - - Self(val) +impl PartialEq<u64> for ElementId { + fn eq(&self, other: &u64) -> bool { + self.0 == *other } } -impl PartialEq<u64> for VInt { - fn eq(&self, other: &u64) -> bool { - self.0 == *other +impl UpperHex for ElementId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::UpperHex::fmt(&self.0, f) } } @@ -331,7 +424,7 @@ mod tests { fn bytes_to_vint() { for representation in VALID_REPRESENTATIONS_OF_2 { assert_eq!( - VInt::parse(&mut Cursor::new(representation), 8) + VInt::<u64>::parse(&mut Cursor::new(representation), 8) .unwrap() .value(), 2 @@ -342,9 +435,10 @@ mod tests { #[test_log::test] fn vint_to_bytes() { for representation in VALID_REPRESENTATIONS_OF_2 { - let vint = VInt::parse(&mut Cursor::new(representation), 8).unwrap(); + let vint = VInt::<u64>::parse(&mut Cursor::new(representation), 8).unwrap(); assert_eq!( - vint.as_bytes(Some(representation.len() as u8)).unwrap(), + vint.as_bytes(Some(representation.len() as u8), None) + .unwrap(), representation ); } @@ -352,25 +446,26 @@ mod tests { #[test_log::test] fn large_integers_should_fail() { - assert!(VInt::from_u64(u64::MAX).is_err()); + assert!(VInt::try_from(u64::MAX).is_err()); + assert!(VInt::try_from(i64::MAX).is_err()); let mut acc = 1000; for _ in 0..16 { - assert!(VInt::from_u64(u64::MAX - acc).is_err()); + assert!(VInt::try_from(u64::MAX - acc).is_err()); acc *= 10; } } #[test_log::test] fn maximum_possible_representable_vint() { - assert!(VInt::from_u64(u64::MAX >> 8).is_ok()); + assert!(VInt::try_from(u64::MAX >> 8).is_ok()); } #[test_log::test] fn octet_lengths() { let n = u64::MAX >> 8; for i in 1u8..=7 { - assert_eq!(VInt::from_u64(n >> (i * 7)).unwrap().octet_length(), 8 - i); + assert_eq!(VInt::try_from(n >> (i * 7)).unwrap().octet_length(), 8 - i); } } } diff --git a/lofty/src/lib.rs b/lofty/src/lib.rs index 64796fbf7..986b128af 100644 --- a/lofty/src/lib.rs +++ b/lofty/src/lib.rs @@ -104,6 +104,7 @@ // proc macro hacks extern crate self as lofty; + pub(crate) mod _this_is_internal {} pub mod config; diff --git a/lofty_attr/src/lib.rs b/lofty_attr/src/lib.rs index 47daafe20..ad96a45f2 100644 --- a/lofty_attr/src/lib.rs +++ b/lofty_attr/src/lib.rs @@ -87,7 +87,7 @@ pub fn ebml_master_elements(input: TokenStream) -> TokenStream { let id = child.info.id; let data_type = &child.info.data_type; quote! { - (VInt(#id), ChildElementDescriptor { + (ElementId(#id), ChildElementDescriptor { ident: ElementIdent::#readable_ident, data_type: ElementDataType::#data_type, }) @@ -96,7 +96,7 @@ pub fn ebml_master_elements(input: TokenStream) -> TokenStream { quote! { m.insert( - VInt(#id), + ElementId(#id), MasterElement { id: ElementIdent::#readable_ident, children: &[#( #children ),*][..] @@ -119,8 +119,8 @@ pub fn ebml_master_elements(input: TokenStream) -> TokenStream { #( #ident_variants )* } - fn master_elements() -> &'static ::std::collections::HashMap<VInt, MasterElement> { - static INSTANCE: ::std::sync::OnceLock<::std::collections::HashMap<VInt, MasterElement>> = ::std::sync::OnceLock::new(); + fn master_elements() -> &'static ::std::collections::HashMap<ElementId, MasterElement> { + static INSTANCE: ::std::sync::OnceLock<::std::collections::HashMap<ElementId, MasterElement>> = ::std::sync::OnceLock::new(); INSTANCE.get_or_init(|| { let mut m = ::std::collections::HashMap::new(); #( #elements_map_inserts )*