Skip to content

Commit

Permalink
Decode the HTML before loading static assets
Browse files Browse the repository at this point in the history
  • Loading branch information
alecsmrekar committed Dec 12, 2023
1 parent d8c2078 commit ed8a57f
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,11 @@ fn valid_local_uri(user: &mut GooseUser, uri: &str) -> bool {
}
}

/// Decodes the HTML. Currently it just decodes the encoded ampersand character.
fn decode_html(html: &str) -> String {
html.replace("&", "&")
}

/// Extract all local static elements defined with a `src=` tag from the the provided html.
///
/// While you can invoke this function directly, it's generally preferred to invoke
Expand All @@ -991,9 +996,11 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// Use a case-insensitive regular expression to find all src=<foo> in the html, where
// <foo> is the URL to local image and js assets.
// @TODO: parse HTML5 srcset= also

let html = decode_html(html);
let src_elements = Regex::new(r#"(?i)src="(.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in src_elements.captures_iter(html) {
for url in src_elements.captures_iter(html.as_str()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand All @@ -1008,9 +1015,10 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
pub async fn get_css_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// Use a case-insensitive regular expression to find all href=<foo> in the html, where
// <foo> is the URL to local css assets.
let html = decode_html(html);
let css = Regex::new(r#"(?i)href="(.*?\.css.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in css.captures_iter(html) {
for url in css.captures_iter(html.as_str()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand Down
48 changes: 48 additions & 0 deletions tests/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use gumdrop::Options;
use httpmock::{Method::GET, MockServer};

use goose::config::GooseConfiguration;
use goose::goose::get_base_url;
use goose::metrics::GooseCoordinatedOmissionMitigation::Disabled;
use goose::prelude::*;
use goose_eggs::load_static_elements;

// Paths used in load tests performed during these tests.
const PATH: &str = "/one";
Expand Down Expand Up @@ -212,3 +215,48 @@ async fn test_invalid_header_value() {
}
assert!(goose_metrics.errors.len() == 1);
}

#[tokio::test]
// Loads static elements and checks that characters are decoded properly.
async fn test_html_decoding() {
let html: &str = r#"
<!DOCTYPE html>
<head>
<!-- Check that encoded paths are decoded properly -->
<script type="text/javascript" src="/test1.js?foo=1&amp;bar=2"></script>
<!-- Check that decoded paths still work -->
<script type="text/javascript" src="/test2.js?foo=1&bar=2"></script>
<title>Title 1234ABCD</title>
</head>
<body>
<p>Test text on the page.</p>
</body>
"#;

let server = MockServer::start();

let mock_endpoint1 = server.mock(|when, then| {
when.method(GET)
.path("/test1.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});
let mock_endpoint2 = server.mock(|when, then| {
when.method(GET)
.path("/test2.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});

let config: Vec<&str> = vec![];
let mut configuration = GooseConfiguration::parse_args_default(&config).unwrap();
configuration.co_mitigation = Some(Disabled);
let base_url = get_base_url(Some(server.base_url()), None, None).unwrap();
let mut user = GooseUser::new(0, "".to_string(), base_url, &configuration, 0, None).unwrap();

load_static_elements(&mut user, html).await;
assert_eq!(mock_endpoint1.hits(), 1);
assert_eq!(mock_endpoint2.hits(), 1);
}

0 comments on commit ed8a57f

Please sign in to comment.