Skip to content

Commit

Permalink
Allow for extended ASCII chars in attribute values (#113)
Browse files Browse the repository at this point in the history
Allow for extended ASCII chars in attribute values, making the parser no
longer fully conformant with RFC 2622. Since commonly used WHOIS
databases do not fully conform to the standard either, we need to follow
suit to be a useful parser.
  • Loading branch information
SRv6d authored Sep 28, 2024
1 parent bc15e4f commit 9d9f883
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rpsl-rs"
description = "An RFC 2622 conformant Routing Policy Specification Language (RPSL) parser with a focus on speed and correctness."
description = "A Routing Policy Specification Language (RPSL) parser with a focus on speed and correctness."
version = "1.0.1"
keywords = ["rpsl", "parser", "routing", "policy", "whois"]
categories = ["parsing", "database"]
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
</div>
<br>

An [RFC 2622] conformant Routing Policy Specification Language (RPSL) parser with a focus on speed and correctness.
A Routing Policy Specification Language (RPSL) parser with a focus on speed and correctness.

⚡️ 130-250x faster than other parsers\
📰 Complete implementation for multiline RPSL values\
Expand Down Expand Up @@ -173,7 +173,6 @@ To get started, please read the [contribution guidelines](.github/CONTRIBUTING.m

The source code of this project is licensed under the MIT License. For more information, see [LICENSE](LICENSE).

[RFC 2622]: https://datatracker.ietf.org/doc/html/rfc2622
[Object]: https://docs.rs/rpsl-rs/latest/rpsl/struct.Object.html
[Attribute]: https://docs.rs/rpsl-rs/latest/rpsl/struct.Attribute.html
[parse_object]: https://docs.rs/rpsl-rs/latest/rpsl/fn.parse_object.html
Expand Down
64 changes: 51 additions & 13 deletions src/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ impl<'a> Name<'a> {
fn unchecked(name: &'a str) -> Self {
Self(Cow::Borrowed(name))
}

fn validate(name: &str) -> Result<(), InvalidNameError> {
if name.trim().is_empty() {
return Err(InvalidNameError::Empty);
} else if !name.is_ascii() {
return Err(InvalidNameError::NonAscii);
} else if !name.chars().next().unwrap().is_ascii_alphabetic() {
return Err(InvalidNameError::NonAsciiAlphabeticFirstChar);
} else if !name.chars().last().unwrap().is_ascii_alphanumeric() {
return Err(InvalidNameError::NonAsciiAlphanumericLastChar);
}

Ok(())
}
}

impl FromStr for Name<'_> {
Expand All @@ -111,16 +125,7 @@ impl FromStr for Name<'_> {
/// # Errors
/// Returns an error if the name is empty or invalid.
fn from_str(name: &str) -> Result<Self, Self::Err> {
if name.trim().is_empty() {
return Err(InvalidNameError::Empty);
} else if !name.is_ascii() {
return Err(InvalidNameError::NonAscii);
} else if !name.chars().next().unwrap().is_ascii_alphabetic() {
return Err(InvalidNameError::NonAsciiAlphabeticFirstChar);
} else if !name.chars().last().unwrap().is_ascii_alphanumeric() {
return Err(InvalidNameError::NonAsciiAlphanumericLastChar);
}

Self::validate(name)?;
Ok(Self(Cow::Owned(name.to_string())))
}
}
Expand Down Expand Up @@ -215,9 +220,16 @@ impl<'a> Value<'a> {
}

fn validate(value: &str) -> Result<(), InvalidValueError> {
if !value.is_ascii() {
return Err(InvalidValueError::NonAscii);
} else if value.chars().any(|c| c.is_ascii_control()) {
value.chars().try_for_each(Self::validate_char)
}

/// Even though RFC 2622 requires values to be ASCII, in practice some WHOIS databases
/// (e.g. RIPE) do not enforce this so, to be useful in the real world, we don't either.
#[inline]
pub(crate) fn validate_char(c: char) -> Result<(), InvalidValueError> {
if !is_extended_ascii(c) {
return Err(InvalidValueError::NonExtendedAscii);
} else if c.is_ascii_control() {
return Err(InvalidValueError::ContainsControlChar);
}

Expand Down Expand Up @@ -413,6 +425,12 @@ where
}
}

/// Checks if the given char is part of the extended ASCII set.
#[inline]
fn is_extended_ascii(char: char) -> bool {
matches!(char, '\u{0000}'..='\u{00FF}')
}

#[cfg(test)]
mod tests {
use proptest::prelude::*;
Expand Down Expand Up @@ -561,6 +579,26 @@ mod tests {
assert_eq!(Value::from_str(s).unwrap(), Value::SingleLine(None));
}

proptest! {
#[test]
fn value_validation_any_non_control_extended_ascii_valid(
s in r"[\x00-\xFF]+"
.prop_filter("Must not contain control chars", |s| !s.chars().any(|c| c.is_ascii_control())))
{
Value::validate(&s).unwrap();
}

#[test]
fn value_validation_any_non_extended_ascii_is_err(s in r"[^\x00-\xFF]+") {
matches!(Value::validate(&s).unwrap_err(), InvalidValueError::NonExtendedAscii);
}

#[test]
fn value_validation_any_ascii_control_is_err(s in r"[\x00-\x1F\x7F]+") {
matches!(Value::validate(&s).unwrap_err(), InvalidValueError::ContainsControlChar);
}
}

#[rstest]
#[case(
Value::unchecked_single("32934"),
Expand Down
6 changes: 3 additions & 3 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use thiserror::Error;
pub enum InvalidNameError {
#[error("cannot be empty")]
Empty,
#[error("cannot contain non-ASCII characters")]
#[error("cannot contain characters that are not part of the extended ASCII set")]
NonAscii,
#[error("cannot start with a non-letter ASCII character")]
NonAsciiAlphabeticFirstChar,
Expand All @@ -16,8 +16,8 @@ pub enum InvalidNameError {

#[derive(Error, Debug)]
pub enum InvalidValueError {
#[error("cannot contain non-ASCII characters")]
NonAscii,
#[error("cannot contain characters that are not part of the extended ASCII set")]
NonExtendedAscii,
#[error("cannot contain ASCII control characters")]
ContainsControlChar,
}
Expand Down
16 changes: 13 additions & 3 deletions src/parser/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use winnow::{
PResult, Parser,
};

use crate::Attribute;
use crate::{Attribute, Value};

// A response code or message sent by the whois server.
// Starts with the "%" character and extends until the end of the line.
Expand Down Expand Up @@ -54,9 +54,9 @@ pub fn attribute_name<'s>(input: &mut &'s str) -> PResult<&'s str> {
.parse_next(input)
}

// An ASCII sequence of characters, excluding control.
// An extended ASCII sequence of characters, excluding control.
pub fn attribute_value<'s>(input: &mut &'s str) -> PResult<&'s str> {
take_while(0.., |c: char| c.is_ascii() && !c.is_ascii_control()).parse_next(input)
take_while(0.., |c| Value::validate_char(c).is_ok()).parse_next(input)
}

// Extends an attributes value over multiple lines.
Expand All @@ -77,6 +77,7 @@ pub fn consume_continuation_char(input: &mut &str) -> PResult<()> {

#[cfg(test)]
mod tests {
use proptest::prelude::*;
use rstest::*;

use super::*;
Expand Down Expand Up @@ -222,6 +223,15 @@ mod tests {
assert_eq!(*given, remaining);
}

proptest! {
/// Any non extended ASCII is not returned by the value parser.
#[test]
fn attribute_value_non_extended_ascii_not_parsed(s in r"[^\x00-\xFF]+") {
let parsed = attribute_value(&mut s.as_str()).unwrap();
prop_assert_eq!(parsed, "");
}
}

#[rstest]
#[case(
&mut " continuation value prefixed by a space\n",
Expand Down
13 changes: 10 additions & 3 deletions tests/test_parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,17 @@ mod strategies {

/// A valid attribute value.
///
/// An attribute value may consist of any ASCII characters excluding control and
/// must not start with, or consist entirely of whitespace.
/// Creates a string of extended ASCII characters while excluding control and not
/// starting with, or consisting entirely of whitespace.
fn attribute_value_content() -> impl Strategy<Value = String> {
proptest::string::string_regex("[!-~][ -~]*").unwrap()
proptest::string::string_regex(r"[\x20-\x7E\x80-\xFF]+")
.unwrap()
.prop_filter("Cannot start with whitespace", |s| {
!s.starts_with(|c: char| c.is_whitespace())
})
.prop_filter("Cannot consist of whitespace only", |s| {
!s.trim().is_empty()
})
}

/// An empty attribute value.
Expand Down

0 comments on commit 9d9f883

Please sign in to comment.