Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Directly Exportable dec!() as Macro by Example #688

Open
daniel-pfeiffer opened this issue Oct 24, 2024 · 9 comments
Open

Directly Exportable dec!() as Macro by Example #688

daniel-pfeiffer opened this issue Oct 24, 2024 · 9 comments

Comments

@daniel-pfeiffer
Copy link

daniel-pfeiffer commented Oct 24, 2024

Since the companion macro does very little to justify a separate crate and proc-macro, I’ve sketched a solution with macro by example. For fun I also propose dec!(a123, radix) and dec!(123 >> scale).

tldr: Parsing the normal, scientific and base 36 numbers at compile time, and hence in const seems feasible. That is probably the same as doing it only with core. However this restricts error messages and does not seem compatible with the current Error.

I’ve done a little dummy Decimal with a dummy const parser (that calculates a checksum just for feasability.) Instead of iterating over chars, it manually loops over bytes. This is good enough, as we’re only parsing Ascii chars.

Surprisingly, const fns aren’t evaluated at compile time – even when called with constant arguments. So I pack it into const {}.

mod rust_decimal {
    pub struct Decimal(&'static str, u8, u32); // just for demo
    type Error = &'static str; // just for demo
    impl Decimal {
        // num is 'static, only so it can be stored for dbg
        pub const fn try_parse_with_radix(num: &'static str, radix: u32) -> Result<Self, Error> {
            let mut sum = 0_u32;
            let mut src = num.as_bytes();
            while let [c, rest @ ..] = src {
                if let Some(digit) = (*c as char).to_digit(radix) {
                    sum += digit
                } else {
                    sum += match *c as char {
                        '.' => 37,
                        '-' => 38,
                        'b' | 'o' | 'x' => 39,
                        'e' => 40,
                        '_' => 0,
                        ' ' => 0, // stringify!(-$num) inserts a space
                        _ => return Err("Malformed number"),
                    }
                }
                src = rest
            }
            Ok(Self(num, radix as u8, sum))
        }

        pub const fn try_parse(num: &'static str) -> Result<Self, Error> {
            // must handle scientific when found, as num.contains(['e', 'E']) is not const
            Self::try_parse_with_radix(num, 16) // wrong radix, as 0x not parsed yet
        }

        pub const fn try_parse_with_scale(num: &'static str, scale: u32) -> Result<Self, Error> {
            // dummy impl
            Self::try_parse_with_radix(num, scale + 30)
        }

        pub fn dbg(&self) {
            println!("sum {:3}, radix {:2}, input \"{}\"", self.2, self.1, self.0);
        }
    }
}

macro_rules! dec {
    // inner helper
    (@ $dec:expr) => {
        const {
            match $dec {
                Ok(dec) => dec,
                // No args. All errors need to be formulated here.
                _ => panic!("Malformed number")
            }
        }
    };

    // While 1a is a literal, a1 is an ident. Negation is for num, so get it explicitly.
    (- $radix:literal # $num:ident) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-$num), $radix))
    };
    ($radix:literal # $num:ident) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    };
    (- $radix:literal # $num:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-$num), $radix))
    };
    ($radix:literal # $num:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    };

    ($num:literal >> $scale:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_scale(stringify!($num), $scale))
    };

    ($radix:literal: $num:ident) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    };
    ($radix:literal: -$num:ident) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-$num), $radix))
    };
    ($radix:literal: $num:ident >>> $scale:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(scale_not_implemented_yet), $radix))
    };
    ($radix:literal: -$num:ident >>> $scale:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-scale_not_implemented_yet), $radix))
    };
    ($radix:literal: $num:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    };
    ($radix:literal: $num:literal >>> $scale:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(scale_not_implemented_yet), $radix))
    };

    (- $radix:literal # $num:ident >> $scale:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-scale_also_not_implemented_yet), $radix))
    };
    /* Old idea doesn't combine well with >>
    ($num:ident, $radix:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    };
    (- $num:ident, $radix:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!(-$num), $radix))
    };
    ($num:literal, $radix:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse_with_radix(stringify!($num), $radix))
    }; */

    ($num:literal) => {
        dec!(@ rust_decimal::Decimal::try_parse(stringify!($num)))
    };
}

fn _main() {
    println!("Integers in various bases:");
    dec!(1).dbg();
    dec!(-1).dbg();
    dec!(0b10).dbg();
    dec!(-0b10).dbg();
    dec!(0o755).dbg();
    dec!(-0o755).dbg();
    dec!(0x1f).dbg();
    dec!(-0x1f).dbg();

    println!("\nScientific notation:");
    dec!(1e6).dbg();
    dec!(-1e6).dbg();
    dec!(1e-6).dbg();
    dec!(-1e-6).dbg();

    println!("\nFloat-like notation:");
    dec!(1.2).dbg();
    dec!(-1.2).dbg();
    dec!(1.2e6).dbg();
    dec!(-1.2e6).dbg();
    dec!(1.2e-6).dbg();
    dec!(-1.2e-6).dbg();

    println!("\nStructured notation:");
    dec!(1_000_000_000).dbg();
    dec!(1_000.200_000).dbg();
    dec!(-1_000.200_000).dbg();

    println!("\nDecimal shift right (π, as demo abuse radix for scale):");
    dec!(314159 >> 5).dbg();

    println!("\nNew radix: number syntax with >>> decimal shift right:");
    dec!(36: z1).dbg();
    dec!(36: z1 >>> 5).dbg();
    dec!(36: -z1).dbg();
    dec!(36: -z1 >>> 5).dbg();
    dec!(36: 1z).dbg();
    dec!(36: 1z >>> 5).dbg();
    dec!(36: -1z).dbg();
    dec!(36: -1z >>> 5).dbg();

    println!("\nShell radix#number syntax:");
    dec!(2#10).dbg();
    dec!(-2#10).dbg();
    dec!(8#755).dbg();
    dec!(-8#755).dbg();
    dec!(16#f1).dbg();
    dec!(-16#1f).dbg();
    dec!(36#1z).dbg();
    dec!(-36#z1).dbg();

    println!("\nShell radix#number syntax with decimal shift right:");
    dec!(-36#z1 >> 5).dbg();
    /* dec!(1a, 16).dbg();
    dec!(-1a, 36).dbg();
    dec!(a1, 11).dbg();
    dec!(-a1, 16).dbg(); */
}
@Tony-Samuels
Copy link
Collaborator

I guess it probably is possible and I'd happily take a look at an MR that implemented it. Even if its not super efficient, it would probably still be faster than building a proc-macro.

@Tony-Samuels
Copy link
Collaborator

As for version compat, we promise 4 minor versions, so right now we need to provide support for 1.78+. Once 1.83 is released though, we'll be able to move to 1.79+; so any MR proposing using newer syntax can still be raised, it'll just have to wait before it can be merged.

@daniel-pfeiffer
Copy link
Author

The macro itself is almost 0-cost. The parser will be highly efficient. Haven’t looked at splitting into 3 parts yet, but it’ll be at least as good as what you do now.

Is there any plan for Decimal<const PARTS: usize = 3> { parts: [u32; PARTS] … }? While I’m at it, I could consider generalising the parser, even if for now the output would still be non-generic.

In const I can only produce empty Error::ConversionTo(String::new()), so that’s neither nice nor a showstopper. In the V2.x future, for meaningful errors, the parser should return differentiated Error::RadixOutOfBounds(u32) or Error::BadByteAt(usize). The macro can then turn these into nice messages.

I have updated my example. Since the discussion on IRLO about Rust syntax for arbitrary bases is going nowhere, I came up with a new syntax idea, with both radix and scale being optional: dec!(radix: num <<< scale)/ dec!(radix: num >>> scale). I propose decimal left/right shift to have a fatter op than binary, but I can also do <</ >>.

@Tony-Samuels
Copy link
Collaborator

There's been discussion over a const generic Decimal, but I wouldn't start doing any work around it yet.

For dec! it's reasonable to panic imo on error, given that we're enforcing this as a compile-time macro via const { ... } there's no way to handle the error anyway.

As for radix/scale, I'd be open to improvements, but it's probably simplest for a replacement macro to just be a drop-in to start; and it can be extended later down the line.

@daniel-pfeiffer
Copy link
Author

daniel-pfeiffer commented Nov 20, 2024

Edit: Kinda finished.

I have added radix, because, being needed for parsing 0b, 0o & 0x, that was easy to generalise and makes dec!() more versatile. For consistency I have opted for the same syntax that the logging macros have for an optional target.

For the same reasons, I have also added scale. But I have avoided that fishy word, which I frankly find confusing. Of its many meanings, you use the one that’s not obvious to me. It would be, if it were scale_down_by_factor_10 (or at least scale_by to show you mean the verb.) What you’re doing here is called shifting in programming. Only you shift decimal digits, not bits. (That you do it symbolically, by storing the shift for later, is an implemetation detail.) I wish you’d just say: “we’re Decimal, so we shift by digits.” If you implemented the shift traits that way, I’d gladly use << & >>. Till that’s decided I disambiguate with pseudo operators <<< & >>>. Another way to look at it, is that this is exactly the same as float with exponent (what you call scientific.) Except you flip the sign.

So here’s what works:

    println!("Integers in various bases:");
    rust_decimal::Decimal::ZERO.dbg();
    rust_decimal::Decimal::ONE.dbg();
    rust_decimal::Decimal::TWO.dbg();
    dec!(1).dbg();
    dec!(+1).dbg();
    dec!(-1).dbg();
    dec!(0b10).dbg();
    dec!(+0b10).dbg();
    dec!(-0b10).dbg();
    dec!(0o755).dbg();
    dec!(-0o755).dbg();
    dec!(0x1f).dbg();
    dec!(-0x1f).dbg();

    println!("\nDecimal shift right (π, as demo):");
    dec!(314159 >>> 5).dbg();

    println!("\nNew radix: number syntax with >>> decimal shift right:");
    dec!(radix: 4, 33).dbg();
    dec!(radix: 4, 33 >>> 5).dbg(); // or better: dec!(radix: 4, exp: -5, 33) or dec!(radix: 4, tens_exp: -5, 33)?
    dec!(radix: 36, z1).dbg();
    dec!(radix: 36, z1 >>> 5).dbg();
    dec!(radix: 36, -z1).dbg();
    dec!(radix: 36, +z1).dbg();
    dec!(radix: 36, -z1 <<< 5).dbg(); // or better: dec!(radix: 36, exp: 5, -z1) or dec!(radix: 36, tens_exp: 5, -z1)?
    dec!(radix: 36, 1z).dbg();
    dec!(radix: 36, 1z >>> 5).dbg();
    dec!(radix: 36, -1z).dbg();
    dec!(radix: 36, -1z >>> 5).dbg();

    println!("\nStructured notation:");
    dec!(1_000_000_000).dbg();
    dec!(1_000.200_000).dbg();
    dec!(-1_000.200_000).dbg();

    println!("\nScientific notation:");
    dec!(1e6).dbg();
    dec!(-1e6).dbg();
    dec!(1e-6).dbg();
    dec!(-1e-6).dbg();

    println!("\nFloat-like notation:");
    dec!(1.2).dbg();
    dec!(-1.2).dbg();
    dec!(1.2e6).dbg();
    dec!(1.2e+6).dbg();
    dec!(+1.2e+6).dbg();
    dec!(-1.2e6).dbg();
    dec!(1.2e-6).dbg();
    dec!(-1.2e-6).dbg();
// str.rs

use crate::Decimal;

#[macro_export]
macro_rules! dec {
    ($(radix: $radix:expr,)? - $($rest:tt)+) => {
        $crate::dec_inner!(% $($radix,)? false $($rest)+)
    };
    // Can’t unify next 3 like above, as $(…)? $(+)? $($rest:tt)+ would be twice ambiguous.
    ($(radix: $radix:expr,)? + $($rest:tt)+) => {
        $crate::dec_inner!(% $($radix,)? true $($rest)+)
    };
    (radix: $radix:expr, $($rest:tt)+) => {
        $crate::dec_inner!(% $radix, true $($rest)+)
    };
    ($($rest:tt)+) => {
        $crate::dec_inner!(% true $($rest)+)
    };
}

#[macro_export]
#[doc(hidden)]
macro_rules! dec_inner {
    // The actual created code
    ($fn:ident $args:tt) => {
        const {
            use $crate::str::MacroResult::*;
            match $crate::str::$fn$args {
                Ok(dec) => dec,
                // Putting the panics into the macro expansion reports the right file & line.
                Empty => panic!("number is empty, must have an integer part"),
                FractionEmpty => panic!("consider adding a `0` after the period"),
                InvalidRadix(_) => panic!("invalid radix -- radix must be in the range 2 to 36 inclusive"),
                ExceedsMaximumPossibleValue => panic!("number too big"),
                LessThanMinimumPossibleValue => panic!("number too small"),
                ScaleExceedsMaximumPrecision(_) => panic!("scale (shift) too big"),
                Unparseable(b'.') => panic!("cannot parse decimal, unexpected `.`"),
                Unparseable(b'+') => panic!("cannot parse decimal, unexpected `+`"),
                Unparseable(b'-') => panic!("cannot parse decimal, unexpected `-`"),
                Unparseable(b'e' | b'E') => panic!("cannot parse decimal, unexpected `e` or `E`"),
                _ => panic!("cannot parse decimal, unexpected character"),
            }
        }
    };

    // Helpers for parsing after or without radix
    (% $radix:expr, $pos:ident $num:tt $($rest:tt)*) => {
        $crate::dec_inner!(parse_radix_dec($radix, $pos, stringify!($num), $crate::dec_inner!(@ $($rest)*)))
    };
    (% $pos:ident $num:tt $($rest:tt)*) => {
        $crate::dec_inner!(parse_dec($pos, stringify!($num), $crate::dec_inner!(@ $($rest)*)))
    };

    // Helpers for shifting (scale)
    (@) => { 0 };
    (@ >>> $scale:expr) => { $scale };
    (@ <<< $scale:expr) => { -($scale) };
}

// workaround for `Result<…String…>` not being droppable in `const {}`
#[doc(hidden)]
pub enum MacroResult {
    Ok(Decimal),
    Empty,
    FractionEmpty,
    InvalidRadix(u32),
    ExceedsMaximumPossibleValue,
    LessThanMinimumPossibleValue,
    ScaleExceedsMaximumPrecision(i32),
    Unparseable(u8),
}

// Can’t use `from_str_radix`, as that neither groks '_', nor allows to continue after '.' or 'e'.
// For multi-step (see test) return: number parsed, digits count, (offending byte, rest)
// num saturates at i128::MAX, which is currently not a valid Decimal
const fn parse_bytes_inner(
    radix: u32,
    src: &[u8],
    mut num: i128,
) -> (i128, u8, Option<(u8, &[u8])>) {
    let mut count = 0;
    let mut next = src;
    while let [byte, rest @ ..] = next {
        if let Some(digit) = (*byte as char).to_digit(radix) {
            count += 1;
            num = num
                .saturating_mul(radix as i128)
                .saturating_add(digit as i128);
        } else if *byte != b'_' {
            return (num, count, Some((*byte, rest)));
        }
        next = rest;
    }
    (num, count, None)
}

// translate bi-directional scale to rest of lib’s scale down-only and create Decimal
const fn to_decimal(is_positive: bool, mut num: i128, mut scale: i32) -> MacroResult {
    // Why is scale unsigned? :-(
    if scale < 0 {
        // todo, put the const for max possible shift left
        if scale < -28 {
            return MacroResult::ScaleExceedsMaximumPrecision(scale);
        };
        let shift_factor = 10_i128.pow(scale.unsigned_abs());
        let Some(shifted) = num.checked_mul(shift_factor) else {
            return MacroResult::ExceedsMaximumPossibleValue;
        };
        num = shifted;
        scale = 0;
    }
    match Decimal::try_from_i128_with_scale(if is_positive { num } else { -num }, scale as u32) {
        Ok(dec) => MacroResult::Ok(dec),
        // todo deal with all Errs that can come from the real fn
        Err(()) => MacroResult::ScaleExceedsMaximumPrecision(scale),
    }
}

const fn parse_bytes(radix: u32, is_positive: bool, src: &[u8], scale: i32) -> MacroResult {
    match parse_bytes_inner(radix, src, 0) {
        (.., Some((byte, _))) => MacroResult::Unparseable(byte),
        (_, 0, _) => MacroResult::Empty,
        (num, ..) => to_decimal(is_positive, num, scale),
    }
}

// parse normal (radix 10) numbers with optional float-like .fraction and 10’s exponent
const fn parse_10(is_positive: bool, src: &[u8], mut scale: i32) -> MacroResult {
    // parse 1st part (upto optional . or e)
    let (mut num, len, mut more) = parse_bytes_inner(10, src, 0);
    // Numbers can’t be empty (before optional . or e)
    if len == 0 {
        return MacroResult::Empty;
    }

    // parse optional fraction
    if let Some((b'.', rest)) = more {
        let (whole_num, extra_scale, _more) = parse_bytes_inner(10, rest, num);
        more = _more;
        // May only be empty if no exp
        if extra_scale == 0 && more.is_some() {
            return MacroResult::FractionEmpty;
        }
        num = whole_num;
        scale += extra_scale as i32
    }

    // parse optional 10’s exponent
    if let Some((b'e' | b'E', mut rest)) = more {
        let exp_is_positive = if let [sign @ b'-' | sign @ b'+', signed @ ..] = rest {
            rest = signed;
            *sign == b'+'
        } else {
            true
        };
        // if this gives Some more, we’ll return that below
        let (exp, _, _more) = parse_bytes_inner(10, rest, 0);
        more = _more;
        // dummy value, more than MAX not storable
        if exp > i32::MAX as i128 {
            return MacroResult::ScaleExceedsMaximumPrecision(i32::MAX);
        }
        // scale has flipped sign
        if exp_is_positive {
            scale -= exp as i32
        } else {
            scale += exp as i32
        }
    }

    if let Some((byte, _)) = more {
        MacroResult::Unparseable(byte)
    } else {
        to_decimal(is_positive, num, scale)
    }
}

// dec!() entrypoint with radix
pub const fn parse_radix_dec(radix: u32, is_positive: bool, src: &str, scale: i32) -> MacroResult {
    if 2 <= radix && radix <= 36 {
        parse_bytes(radix, is_positive, src.as_bytes(), scale)
    } else {
        MacroResult::InvalidRadix(radix)
    }
}

// dec!() entrypoint without radix
pub const fn parse_dec(is_positive: bool, src: &str, scale: i32) -> MacroResult {
    match src.as_bytes() {
        [b'0', b'b', src @ ..] => parse_bytes(2, is_positive, src, scale),
        [b'0', b'o', src @ ..] => parse_bytes(8, is_positive, src, scale),
        [b'0', b'x', src @ ..] => parse_bytes(16, is_positive, src, scale),
        src => parse_10(is_positive, src, scale)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    pub fn test_parse_bytes_inner() {
        let pbi = |radix, src| parse_bytes_inner(radix, src, 0).0;
        assert_eq!(pbi(2, b"111"), 0b111);
        assert_eq!(pbi(8, b"177"), 0o177);
        assert_eq!(pbi(10, b"199"), 199);
        assert_eq!(pbi(16, b"1ff"), 0x1ff);
        assert_eq!(pbi(36, b"1_zzz"), i128::from_str_radix("1zzz", 36).unwrap());

        assert_eq!(
            pbi(16, b"7fff_ffff_ffff_ffff_ffff_ffff_ffff_fffE"),
            i128::MAX - 1
        );
        assert_eq!(
            pbi(16, b"7fff_ffff_ffff_ffff_ffff_ffff_ffff_fffF"),
            i128::MAX
        );
        // must saturate at MAX
        assert_eq!(
            pbi(16, b"Ffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff"),
            i128::MAX
        );

        // Assemble floaty: number parsed, digits count, (offending byte, rest)
        assert!(matches!(
            parse_bytes_inner(10, b"01_234.567_8", 0),
            (1234, 5, Some((b'.', b"567_8")))
        ));
        // … and feed it back in, to get whole number & scale
        assert!(matches!(
            parse_bytes_inner(10, b"567_8", 1234),
            (12345678, 4, None)
        ));
    }
}

Here’s a dummy Decimal::try_from_i128_with_scale for later plugging it into the real thing.

// decimal.rs

pub struct Decimal(i128, u32); // just for testing

impl Decimal {
    // Two helper fns, the 1st a placeholder for the real one.
    pub const fn try_from_i128_with_scale(num: i128, scale: u32) -> Result<Self, ()> {
        Ok(Self(num, scale))
    }
    pub fn dbg(&self) {
        println!("{:12}, scale {:3}", self.0, self.1);
    }

    // Are these still useful?
    pub const ZERO: Decimal = crate::dec!(0);
    pub const ONE: Decimal = crate::dec!(1);
    pub const NEGATIVE_ONE: Decimal = crate::dec!(-1);
    pub const TWO: Decimal = crate::dec!(2);
    pub const TEN: Decimal = crate::dec!(10);
    pub const ONE_HUNDRED: Decimal = crate::dec!(100);
    pub const ONE_THOUSAND: Decimal = crate::dec!(1000);
}

@daniel-pfeiffer
Copy link
Author

daniel-pfeiffer commented Nov 23, 2024

Now that this is “finished”, there are a few loose ends (besides more tests and doc comments:)

  1. BIG: the more specific arithmetic fns, like checked_mul don’t have a trait, partially making generics as useless as Java’s. And even if they did, trait methods still can’t be const. The only way to use the smallest fitting size, would be to replicate all these functions (by macro.) But even then, my current 1-pass parser (for the string parsing – Rust already went over it and tokenised) would find the 10’s exponent too late. But that might require a wider type. This brings me back to my original idea: parse into [u32; 3]. But not sure how to implement that, so it’s not around the corner.

  2. ROUND: this doesn’t seem to be documented on any of your high level parsing fns, so not sure what you’re trying to achieve. Nor why it’s done only in some cases. For now, I don’t do that. On a related topic, it would be easy to eliminate trailing fractional 0s. Not sure if you do that?

  3. Your constants would become much prettier, if converted like I show. But, even if my macro is rather efficient, it can’t beat the hand-crafted values you provide. Not sure if it would be measurable compilation-time wise. If you think so, it might be better to put the macro variants only into a test, which would check for identical results.

  4. I have enhanced the panics, but without details they can still be mysterious. I might add a non-const try_dec!() that returns Result<Decimal>. Adding a hint that this is just a debugging helper. Or dec!(DEBUG …) could produce similar code without const, but with parametrised panics – alas only at run time.

  5. Rust macros are wonderful when fed correctly. But they don’t report the one place where the syntax error is. Rather they then complain that they can’t match a whole thing. Therefore I’d move the whole number parsing into the functions, including the optional sign. (This would be necessary anyway, if this is to replace your old parser.) To achieve that, I’d have to switch to the alternate syntax suggested above: dec!(12e5) == dec!(exp: 5, 0xc) == dec!(radix: 2, exp: 5, 1100)

@Tony-Samuels
Copy link
Collaborator

Tony-Samuels commented Nov 26, 2024

I think at this point it might be worth a review by paupino. Can you raise an MR and carry the questions across to it?

@daniel-pfeiffer
Copy link
Author

I’ve discovered a problem – super easy to fix with rustc telling you how, but nonetheless a small semver breakage. Users should follow your documented way of explicitly using rust_decimal_macros::dec. If they don’t, instead saving 2 chars by wildcarding it and also wildcarding the upcoming rust_decimal::* (in either order) this happens:

error[E0659]: `dec` is ambiguous
  --> src/main.rs:36:5
   |
36 |     dec!(1);
   |     ^^^ ambiguous name
   |
   = note: ambiguous because of multiple glob imports of a name in the same module
note: `dec` could refer to the macro imported here
  --> src/main.rs:8:5
   |
8  | use rust_decimal::*;
   |     ^^^^^^^^^^^^^^^
   = help: consider adding an explicit import of `dec` to disambiguate
note: `dec` could also refer to the macro imported here
  --> src/main.rs:9:5
   |
9  | use rust_decimal_macros::*;
   |     ^^^^^^^^^^^^^^^^^^^
   = help: consider adding an explicit import of `dec` to disambiguate

Since the announcement would encourage to drop rust_decimal_macros, I see this as minor. And should they have any reason to continue using the old macro, all they need to do is use it explicitly by name. This is backward compatible, should they fall back rust_decimal for another reason. Just in case, the announcement might mention this.

@daniel-pfeiffer
Copy link
Author

daniel-pfeiffer commented Nov 30, 2024

@paupino This is not quite ready for a PR, pending your nod off. To sum up the previous discussion, and fixing some worries, this is now a feature complete, very versatile drop in for your proc-macro. It is completely compile-time, even slightly more so than the old one.

Efficiency

I am wondering, whether all your manipulation of multiple u32s can be efficient. Granted, i128 on a smaller architecture probably does the same thing, but at least only in 2 steps, and hopefully very optimised. The following variants should be benchmarked across all operations. Ideally the optimal representation should then be chosen by me and everywhere, to avoid conversions.

  1. With variable sized Int<96> etc. under discussion, the optimal future might be an improved range and precision Decimal { exp: i16, bits: Int<112> } or whatever distribution you like. This can already be done now, by repurposing the top bits of i128 for exp (signed hopefully!) Any unshifted number (incl. 0) would just stand for itself. Otherwise exp needs to be copied out and the sign extended over it (all 0s or all 1s) to do arithmetic. Afterwards exp would be put back in. If this is viable, my parser is almost already there!

  2. The old parser being generic over BIG seems currently impossible in const. If you want that, it would mean duplicating my functions. And a non-radix number would have to first be parsed backwards, to see if it is float-like with e exp, which might influence the needed size. Feasible but a bit messy…

  3. If you want to stick with u32s, the parser could use a constified variant of ops::array::mul_by_10 with overflow seeded with the digit to shift in. However it seems rather expensive to always extend to the next bigger size, and resplit it, when an overflow is probably the exception. That behaviour could be gated with the following array of limits upto which a plain * radix + digit can’t overflow:

const NO_OVERFLOW: [u32; 37] = { // all radices from 2..37, with 0 & 1 wasted
    let mut result = [0; 37];
    let mut radix = 2; // no iter in const
    while radix < 37 {
        result[radix as usize] = (u32::MAX - radix + 1) / radix; // radix + 1 subtracts biggest digit
        radix += 1;
    }
    result
};

Implementation

There is currently no rounding, as that should probably not happen in precise source code. Later, when also using this to parse numbers from various sources at run-time, rounding can be added. Option syntax is easy to change, if you prefer ’=’ or ’;’.

use crate::Decimal;

/// Transform a literal number directly to a `Decimal` at compile time. Any Rust number format works.
///
/// - `dec!(1)`, `dec!(-1)`, `dec!(1_999)`, `dec!(- 1_999)`
/// - `dec!(0b1)`, `dec!(-0b1_1111)`, `dec!(0o1)`, `dec!(-0o1_777)`, `dec!(0x1)`, `dec!(-0x1_Ffff)`
/// - `dec!(1.)`, `dec!(-1.111_009)`, `dec!(1e6)`, `dec!(-1.2e+6)`, `dec!(12e-6)`, `dec!(-1.2e-6)`
///
/// ### Option `radix:`
///
/// You can give it integers (not float-like) in any radix from 2 to 36 inclusive, using the letters too:
/// `dec!(radix: 2, 100) == 4`, `dec!(radix: 3, -1_222) == -53`, `dec!(radix: 36, z1) == 1261`,
/// `dec!(radix: 36, -1_xyz) == -90683`
///
/// ### Option `exp:`
///
/// This is the same as the `e` 10’s exponent in float syntax (except as a Rust expression it doesn’t accept
/// a unary `+`.) You need this for other radices. Currently it must be between -28 and +28 inclusive:
/// `dec!(radix: 2, exp: 5, 10) == 200_000`, `dec!(exp: -3, radix: 8, -1_777) == dec!(-1.023)`
///
/// ### Inner attribute `#![run_time]`
///
/// Normally this macro is performed at compile time. Alas that only allows limited error messages. If you
/// put this attribute before any other parameters, evaluation is deferred. This gives richer error messages,
/// useful if you don’t understand why your input is not being accepted.
///
/// Furthermore the expressions you pass to the options, which are normally `const`, become dynamic:
/// `dec!(#![run_time] radix: my_radix, exp: my_exp, 10)`
#[macro_export]
macro_rules! dec {
    (#![run_time] $($rest:tt)+) => {
        $crate::dec_inner!([run_time, , ] $($rest)+)
    };
    ($($rest:tt)+) => {
        $crate::dec_inner!([ , , ] $($rest)+)
    };
}

#[macro_export]
#[doc(hidden)]
macro_rules! dec_inner {
    // TT muncher of options
    ([$($run_time:ident)?, , $($exp:expr)?] radix: $radix:expr, $($rest:tt)+) => {
        $crate::dec_inner!([$($run_time)?, $radix, $($exp)?] $($rest)+)
    };
    ([$($run_time:ident)?, $($radix:expr)?, ] exp: $exp:expr, $($rest:tt)+) => {
        $crate::dec_inner!([$($run_time)?, $($radix)?, $exp] $($rest)+)
    };
    ([$($run_time:ident)?, $($radix:expr)?, ] $($rest:tt)+) => {
        $crate::dec_inner!([$($run_time)?, $($radix)?, 0] $($rest)+)
    };

    // TT munching finished
    ([$($run_time:ident)?, , $exp:expr] $($rest:tt)+) => {
        $crate::dec_inner!($($run_time)? parse_dec(stringify!($($rest)+), $exp))
    };
    ([$($run_time:ident)?, $radix:expr, $exp:expr] $($rest:tt)+) => {
        $crate::dec_inner!($($run_time)? parse_radix_dec($radix, stringify!($($rest)+), $exp))
    };

    // Intermediate step under run_time
    (run_time $fn:ident $args:tt) => {
        $crate::dec_inner!(@ @ $fn $args;
            InvalidRadix(r) => panic!("invalid radix {r} -- radix must be in the range 2 to 36 inclusive"),
            InvalidExp(s) => panic!("exp {s} out of bounds"),
            Unparseable(b) => if b < 128 {
                    panic!("cannot parse decimal, unexpected {:?}",
                        char::from_u32(b as u32).unwrap())
                } else {
                    panic!("cannot parse decimal, unexpected non-Ascii 1st byte '\\x{b:x}'")
                }
        )
    };
    // Intermediate step compile-time
    ($fn:ident $args:tt) => {
        $crate::dec_inner!(@ const @ $fn $args;
            InvalidRadix(_) => panic!("invalid radix -- radix must be in the range 2 to 36 inclusive"),
            InvalidExp(_) => panic!("exp out of bounds"),
            Unparseable(b'.') => panic!("cannot parse decimal, unexpected '.'"),
            Unparseable(b'+') => panic!("cannot parse decimal, unexpected '+'"),
            Unparseable(b'-') => panic!("cannot parse decimal, unexpected '-'"),
            Unparseable(b'e' | b'E') => panic!("cannot parse decimal, unexpected 'e' or 'E'"),
            Unparseable(b' ' | b'\t' | b'\n') => panic!("cannot parse decimal, unexpected white space"),
            _ => panic!("cannot parse decimal, unexpected character")
        )
    };
    // The actual created code
    (@ $($const:ident)? @ $fn:ident $args:tt; $($panic:tt)+) => {
        $($const)? {
            use $crate::str::MacroResult::*;
            match $crate::str::$fn$args {
                Ok(dec) => dec,
                // Putting the panics into the macro expansion reports the right file & line.
                Empty => panic!("number is empty, must have an integer part"),
                FractionEmpty => panic!("consider adding a `0` after the period"),
                ExceedsMaximumPossibleValue => panic!("number too big"),
                LessThanMinimumPossibleValue => panic!("number too small"),
                $($panic)+
            }
        }
    };
}

// workaround for `Result<…String…>` not being droppable in `const {}`
#[doc(hidden)]
pub enum MacroResult {
    Ok(Decimal),
    Empty,
    FractionEmpty,
    InvalidRadix(u32),
    ExceedsMaximumPossibleValue,
    LessThanMinimumPossibleValue,
    InvalidExp(i32),
    Unparseable(u8),
}

// Can’t use `from_str_radix`, as that neither groks '_', nor allows to continue after '.' or 'e'.
// For multi-step (see test) return: number parsed, digits count, (offending byte, rest)
// num saturates at i128::MAX, which is currently not a valid Decimal
const fn parse_bytes_inner(
    radix: u32,
    src: &[u8],
    mut num: i128,
) -> (i128, u8, Option<(u8, &[u8])>) {
    let mut count = 0;
    let mut next = src;
    while let [byte, rest @ ..] = next {
        if let Some(digit) = (*byte as char).to_digit(radix) {
            count += 1;
            num = num
                .saturating_mul(radix as i128)
                .saturating_add(digit as i128);
        } else if *byte != b'_' || count == 0 {
            return (num, count, Some((*byte, rest)));
        }
        next = rest;
    }
    (num, count, None)
}

// translate bi-directional exp to rest of lib’s scale down-only and create Decimal
const fn to_decimal(is_positive: bool, mut num: i128, mut exp: i32) -> MacroResult {
    // Why is scale unsigned? :-(
    if exp >= 0 {
        // todo, put the const for max possible shift left
        if exp > 28 {
            return MacroResult::InvalidExp(exp);
        };
        let shift_factor = 10_i128.pow(exp as u32);
        let Some(shifted) = num.checked_mul(shift_factor) else {
            return MacroResult::ExceedsMaximumPossibleValue;
        };
        num = shifted;
        exp = 0;
    }
    match Decimal::try_from_i128_with_scale(
        if is_positive { num } else { -num },
        exp.unsigned_abs(),
    ) {
        Ok(dec) => MacroResult::Ok(dec),
        // todo deal with all Errs that can come from the real fn
        Err(()) => MacroResult::InvalidExp(exp),
    }
}

const fn parse_bytes(radix: u32, is_positive: bool, src: &[u8], exp: i32) -> MacroResult {
    match parse_bytes_inner(radix, src, 0) {
        (.., Some((byte, _))) => MacroResult::Unparseable(byte),
        (_, 0, _) => MacroResult::Empty,
        (num, ..) => to_decimal(is_positive, num, exp),
    }
}

// parse normal (radix 10) numbers with optional float-like .fraction and 10’s exponent
const fn parse_10(is_positive: bool, src: &[u8], mut exp: i32) -> MacroResult {
    // parse 1st part (upto optional . or e)
    let (mut num, len, mut more) = parse_bytes_inner(10, src, 0);
    // Numbers can’t be empty (before optional . or e)
    if len == 0 {
        return MacroResult::Empty;
    }

    // parse optional fraction
    if let Some((b'.', rest)) = more {
        let (whole_num, scale, _more) = parse_bytes_inner(10, rest, num);
        more = _more;
        // May only be empty if no exp
        if scale == 0 && more.is_some() {
            return MacroResult::FractionEmpty;
        }
        num = whole_num;
        exp -= scale as i32
    }

    // parse optional 10’s exponent
    if let Some((b'e' | b'E', mut rest)) = more {
        let exp_is_positive = if let [sign @ b'-' | sign @ b'+', signed @ ..] = rest {
            rest = signed;
            *sign == b'+'
        } else {
            true
        };
        // if this gives Some more, we’ll return that below
        let (e_part, _, _more) = parse_bytes_inner(10, rest, 0);
        more = _more;
        // dummy value, more than MAX not storable
        if e_part > i32::MAX as i128 {
            return MacroResult::InvalidExp(i32::MAX);
        }
        if exp_is_positive {
            exp += e_part as i32
        } else {
            exp -= e_part as i32
        }
    }

    if let Some((byte, _)) = more {
        MacroResult::Unparseable(byte)
    } else {
        to_decimal(is_positive, num, exp)
    }
}

const fn parse_sign(src: &str) -> (bool, &[u8]) {
    let mut src = src.as_bytes();
    if let [b'-', signed @ ..] = src {
        src = signed;
        while let [b' ' | b'\t' | b'\n', rest @ ..] = src {
            src = rest;
        }
        (false, src)
    } else {
        (true, src)
    }
}

// dec!() entrypoint with radix
pub const fn parse_radix_dec(radix: u32, src: &str, exp: i32) -> MacroResult {
    if 2 <= radix && radix <= 36 {
        let (is_positive, src) = parse_sign(src);
        parse_bytes(radix, is_positive, src, exp)
    } else {
        MacroResult::InvalidRadix(radix)
    }
}

// dec!() entrypoint without radix
pub const fn parse_dec(src: &str, exp: i32) -> MacroResult {
    const fn skip_us(radix: u32, is_positive: bool, mut src: &[u8], exp: i32) -> MacroResult {
        while let [b'_', rest @ ..] = src {
            src = rest
        }
        parse_bytes(radix, is_positive, src, exp)
    }

    let (is_positive, src) = parse_sign(src);
    match src {
        [b'0', b'b', src @ ..] => skip_us(2, is_positive, src, exp),
        [b'0', b'o', src @ ..] => skip_us(8, is_positive, src, exp),
        [b'0', b'x', src @ ..] => skip_us(16, is_positive, src, exp),
        src => parse_10(is_positive, src, exp),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    pub fn test_parse_bytes_inner() {
        let pbi = |radix, src| parse_bytes_inner(radix, src, 0).0;
        assert_eq!(pbi(2, b"111"), 0b111);
        assert_eq!(pbi(8, b"177"), 0o177);
        assert_eq!(pbi(10, b"199"), 199);
        assert_eq!(pbi(16, b"1ff"), 0x1ff);
        assert_eq!(pbi(36, b"1_zzz"), i128::from_str_radix("1zzz", 36).unwrap());

        assert_eq!(
            pbi(16, b"7fff_ffff_ffff_ffff_ffff_ffff_ffff_fffE"),
            i128::MAX - 1
        );
        assert_eq!(
            pbi(16, b"7fff_ffff_ffff_ffff_ffff_ffff_ffff_fffF"),
            i128::MAX
        );
        // must saturate at MAX
        assert_eq!(
            pbi(16, b"Ffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff"),
            i128::MAX
        );

        // Assemble floaty: number parsed, digits count, (offending byte, rest)
        assert!(matches!(
            parse_bytes_inner(10, b"01_234.567_8", 0),
            (1234, 5, Some((b'.', b"567_8")))
        ));
        // … and feed it back in, to get whole number & exp
        assert!(matches!(
            parse_bytes_inner(10, b"567_8", 1234),
            (12345678, 4, None)
        ));
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants