From ed8a57fee1c00d8e5843711c78fac6cdb5255395 Mon Sep 17 00:00:00 2001 From: Alec Smrekar Date: Tue, 12 Dec 2023 16:37:51 +0100 Subject: [PATCH] Decode the HTML before loading static assets --- src/lib.rs | 12 ++++++++++-- tests/validate.rs | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c33c7a8..de87860 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -983,6 +983,11 @@ fn valid_local_uri(user: &mut GooseUser, uri: &str) -> bool { } } +/// Decodes the HTML. Currently it just decodes the encoded ampersand character. +fn decode_html(html: &str) -> String { + html.replace("&", "&") +} + /// Extract all local static elements defined with a `src=` tag from the the provided html. /// /// While you can invoke this function directly, it's generally preferred to invoke @@ -991,9 +996,11 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec { // Use a case-insensitive regular expression to find all src= in the html, where // is the URL to local image and js assets. // @TODO: parse HTML5 srcset= also + + let html = decode_html(html); let src_elements = Regex::new(r#"(?i)src="(.*?)""#).unwrap(); let mut elements: Vec = Vec::new(); - for url in src_elements.captures_iter(html) { + for url in src_elements.captures_iter(html.as_str()) { if valid_local_uri(user, &url[1]) { elements.push(url[1].to_string()); } @@ -1008,9 +1015,10 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec { pub async fn get_css_elements(user: &mut GooseUser, html: &str) -> Vec { // Use a case-insensitive regular expression to find all href= in the html, where // is the URL to local css assets. + let html = decode_html(html); let css = Regex::new(r#"(?i)href="(.*?\.css.*?)""#).unwrap(); let mut elements: Vec = Vec::new(); - for url in css.captures_iter(html) { + for url in css.captures_iter(html.as_str()) { if valid_local_uri(user, &url[1]) { elements.push(url[1].to_string()); } diff --git a/tests/validate.rs b/tests/validate.rs index 2cd3d5f..c81fd3a 100644 --- a/tests/validate.rs +++ b/tests/validate.rs @@ -2,7 +2,10 @@ use gumdrop::Options; use httpmock::{Method::GET, MockServer}; use goose::config::GooseConfiguration; +use goose::goose::get_base_url; +use goose::metrics::GooseCoordinatedOmissionMitigation::Disabled; use goose::prelude::*; +use goose_eggs::load_static_elements; // Paths used in load tests performed during these tests. const PATH: &str = "/one"; @@ -212,3 +215,48 @@ async fn test_invalid_header_value() { } assert!(goose_metrics.errors.len() == 1); } + +#[tokio::test] +// Loads static elements and checks that characters are decoded properly. +async fn test_html_decoding() { + let html: &str = r#" + + + + + + + Title 1234ABCD + + +

Test text on the page.

+ + "#; + + let server = MockServer::start(); + + let mock_endpoint1 = server.mock(|when, then| { + when.method(GET) + .path("/test1.js") + .query_param("foo", "1") + .query_param("bar", "2"); + then.status(200).body("test"); + }); + let mock_endpoint2 = server.mock(|when, then| { + when.method(GET) + .path("/test2.js") + .query_param("foo", "1") + .query_param("bar", "2"); + then.status(200).body("test"); + }); + + let config: Vec<&str> = vec![]; + let mut configuration = GooseConfiguration::parse_args_default(&config).unwrap(); + configuration.co_mitigation = Some(Disabled); + let base_url = get_base_url(Some(server.base_url()), None, None).unwrap(); + let mut user = GooseUser::new(0, "".to_string(), base_url, &configuration, 0, None).unwrap(); + + load_static_elements(&mut user, html).await; + assert_eq!(mock_endpoint1.hits(), 1); + assert_eq!(mock_endpoint2.hits(), 1); +}