Skip to content

Commit

Permalink
Improve Selenium tests,fix #1115
Browse files Browse the repository at this point in the history
Signed-off-by: Julien Nioche <[email protected]>
  • Loading branch information
jnioche committed Nov 1, 2023
1 parent cfe61d7 commit ef31e50
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.thrift.annotation.Nullable;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.slf4j.LoggerFactory;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.digitalpebble.stormcrawler.protocol.AbstractHttpProtocol;
import com.digitalpebble.stormcrawler.protocol.HttpHeaders;
import com.digitalpebble.stormcrawler.protocol.ProtocolResponse;
import java.time.Instant;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.storm.Config;
import org.openqa.selenium.remote.RemoteWebDriver;
Expand All @@ -31,6 +32,9 @@ public abstract class SeleniumProtocol extends AbstractHttpProtocol {

private NavigationFilters filters;

public static final String MD_KEY_START = "selenium.protocol.start";
public static final String MD_KEY_END = "selenium.protocol.end";

@Override
public void configure(Config conf) {
super.configure(conf);
Expand All @@ -40,8 +44,14 @@ public void configure(Config conf) {

public ProtocolResponse getProtocolOutput(String url, Metadata metadata) throws Exception {
RemoteWebDriver driver;
while ((driver = getDriver()) == null) {}
while ((driver = getDriver()) == null) {
// get there if there has been an interrupted exception
// just try again
}
try {
final Metadata outputMeta = new Metadata();
outputMeta.addValue(MD_KEY_START, Instant.now().toString());

// This will block for the page load and any
// associated AJAX requests
driver.get(url);
Expand All @@ -62,9 +72,11 @@ public ProtocolResponse getProtocolOutput(String url, Metadata metadata) throws
return new ProtocolResponse(content, 307, m);
}

outputMeta.addValue(MD_KEY_END, Instant.now().toString());

// if no filters got triggered
byte[] content = driver.getPageSource().getBytes();
return new ProtocolResponse(content, 200, new Metadata());
return new ProtocolResponse(content, 200, outputMeta);

} finally {
// finished with this driver - return it to the queue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
package com.digitalpebble.stormcrawler.protocol.selenium;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.protocol.Protocol;
import com.digitalpebble.stormcrawler.protocol.ProtocolResponse;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.storm.Config;
import org.junit.After;
import org.apache.storm.utils.MutableObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
Expand All @@ -45,19 +45,16 @@ public class ProtocolTest {

private static final Logger LOG = LoggerFactory.getLogger(ProtocolTest.class);

private Protocol protocol;

private static final DockerImageName IMAGE =
DockerImageName.parse("selenium/standalone-chrome:116.0");
DockerImageName.parse("selenium/standalone-chrome:118.0");

@Rule
public BrowserWebDriverContainer<?> chrome =
new BrowserWebDriverContainer<>(IMAGE)
.withCapabilities(new ChromeOptions())
.withRecordingMode(VncRecordingMode.SKIP, null);

@Before
public void setupProtocol() {
public RemoteDriverProtocol getProtocol() {

LOG.info(
"Configuring protocol instance to connect to {}",
Expand Down Expand Up @@ -91,28 +88,74 @@ public void setupProtocol() {

conf.put("selenium.capabilities", capabilities);

protocol = new RemoteDriverProtocol();
RemoteDriverProtocol protocol = new RemoteDriverProtocol();
protocol.configure(conf);
return protocol;
}

@Test
// not working yet
public void test() {
Metadata m = new Metadata();
boolean noException = true;
try {
// find better examples later
ProtocolResponse response = protocol.getProtocolOutput("https://stormcrawler.net", m);
Assert.assertEquals(307, response.getStatusCode());
} catch (Exception e) {
noException = false;
LOG.info("Exception caught", e);
/**
* you can configure one instance of Selenium to talk to multiple drivers but can't have a
* multiple instances of the protocol. If there is only one instance and one target, you must
* wait...
*
* @throws InterruptedException
*/

// TODO find a way of not hitting a real URL
public void testBlocking() throws InterruptedException {
RemoteDriverProtocol protocol = getProtocol();

MutableBoolean noException = new MutableBoolean(true);

MutableObject endTimeFirst = new MutableObject();
MutableObject startTimeSecond = new MutableObject();

new Thread(
() -> {
try {
ProtocolResponse response =
protocol.getProtocolOutput(
"https://stormcrawler.net/", new Metadata());
endTimeFirst.setObject(
Instant.parse(
response.getMetadata()
.getFirstValue(
SeleniumProtocol.MD_KEY_END)));
} catch (Exception e) {
noException.setValue(false);
}
})
.start();

new Thread(
() -> {
try {
ProtocolResponse response =
protocol.getProtocolOutput(
"https://stormcrawler.net/", new Metadata());
startTimeSecond.setObject(
Instant.parse(
response.getMetadata()
.getFirstValue(
SeleniumProtocol.MD_KEY_START)));
} catch (Exception e) {
noException.setValue(false);
}
})
.start();

while (endTimeFirst.getObject() == null || startTimeSecond.getObject() == null) {
Thread.sleep(10);
}
Assert.assertEquals(true, noException);
}

@After
public void close() {
Instant etf = (Instant) endTimeFirst.getObject();
Instant sts = (Instant) startTimeSecond.getObject();

// check that the second call started AFTER the first one finished
Assert.assertEquals(true, etf.isBefore(sts));

Assert.assertEquals(true, noException.booleanValue());
protocol.cleanup();
}
}

0 comments on commit ef31e50

Please sign in to comment.