Skip to content

Commit

Permalink
Bugfix nofollow instructions in rel tags ignored when the tag has mor…
Browse files Browse the repository at this point in the history
…e than one value (#1362)

Signed-off-by: Julien Nioche <[email protected]>
  • Loading branch information
jnioche authored Oct 21, 2024
1 parent 6a1fafa commit 2fe8d92
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.metric.api.MultiCountMetric;
import org.apache.storm.task.OutputCollector;
Expand Down Expand Up @@ -269,7 +270,10 @@ public void execute(Tuple tuple) {
final URL baseURL = new URL(url);
for (Element link : links) {
// nofollow
boolean noFollow = "nofollow".equalsIgnoreCase(link.attr("rel"));
String[] relkeywords = link.attr("rel").split(" ");
boolean noFollow =
Stream.of(relkeywords).anyMatch(x -> x.equalsIgnoreCase("nofollow"));

// remove altogether
if (noFollow && robots_noFollow_strict) {
continue;
Expand Down
1 change: 1 addition & 0 deletions core/src/test/resources/digitalpebble.com.html
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
</script>

<a rel="nofollow" href="inexistent.html"/>
<a rel="nofollow somevalue" href="another_inexistent.html"/>

<body style="color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);"
alink="#000000" link="#000000" vlink="#000000">
Expand Down

0 comments on commit 2fe8d92

Please sign in to comment.