Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add form action #35

Merged
merged 12 commits into from
Aug 15, 2024
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Here is the list of available actions:
- `Scroll(selector, wait_options)` - scroll page
- `Screenshot(options)` - take screenshot
- `Har()` - to get the HAR file, pass the `har_recording=True` argument to `PuppeteerRequest` at the start of execution.
- `FormAction(input_mapping, submit_button)` - to fill out and submit forms on page.
- `RecaptchaSolver(solve_recaptcha)` - find or solve recaptcha on page
- `CustomJsAction(js_function)` - evaluate JS function on page

Expand Down Expand Up @@ -174,4 +175,4 @@ In this case RecaptchaMiddleware will just skip the request.
- [ ] headers and cookies management
- [ ] proxy support for puppeteer
- [x] error handling for requests
- [ ] har support
- [x] har support
2 changes: 2 additions & 0 deletions examples/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@
}

PUPPETEER_SERVICE_URL = "http://localhost:3000"

PUPPETEER_LOCAL = False
38 changes: 38 additions & 0 deletions examples/spiders/fill_form.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import scrapy
from scrapypuppeteer import PuppeteerRequest, PuppeteerScreenshotResponse
from scrapypuppeteer.actions import Screenshot, FillForm
import base64


class FormActionSpider(scrapy.Spider):
name = "fill_form"
start_urls = ["https://www.roboform.com/filling-test-all-fields"]

def start_requests(self):
for url in self.start_urls:
yield PuppeteerRequest(url, callback=self.form_action, close_page=False)

def form_action(self, response):
input_mapping = {
'input[name="02frstname"]': {"value": "SomeName", "delay": 50},
'input[name="05_company"]': {"value": "SomeCompany", "delay": 100},
'input[name="06position"]': {"value": "SomePosition", "delay": 100},
}

yield response.follow(
FillForm(input_mapping), close_page=False, callback=self.screenshot
)

def screenshot(self, response):
action = Screenshot(
options={
"fullPage": True,
}
)
yield response.follow(action, callback=self.make_screenshot, close_page=False)

@staticmethod
def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs):
data = response.screenshot
with open(f"screenshot.png", "wb") as fh:
fh.write(base64.b64decode(data))
29 changes: 29 additions & 0 deletions examples/spiders/har.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import scrapy
from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import Har


def write_to_file(file_path, content):
with open(file_path, "a", encoding="utf-8") as file:
file.write(content)


class HarSpider(scrapy.Spider):
name = "har"
start_urls = ["https://github.com/pyppeteer/pyppeteer"]

def start_requests(self):
for url in self.start_urls:
yield PuppeteerRequest(
url, callback=self.har, close_page=False, har_recording=True
)

def har(self, response):
yield response.follow(
Har(),
close_page=False,
callback=self.save_har,
)

def save_har(self, response):
write_to_file("result.har", response.har)
48 changes: 45 additions & 3 deletions scrapypuppeteer/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ class GoTo(PuppeteerServiceAction):
endpoint = "goto"

def __init__(
self, url: str, navigation_options: dict = None, wait_options: dict = None, har_recording: bool = False
self,
url: str,
navigation_options: dict = None,
wait_options: dict = None,
har_recording: bool = False,
):
self.url = url
self.navigation_options = navigation_options
Expand Down Expand Up @@ -223,15 +227,53 @@ def __init__(self, options: dict = None, **kwargs):

def payload(self):
return {"options": self.options}


class Har(PuppeteerServiceAction):
"""
The `Har` action is used to capture and retrieve the HTTP Archive (HAR) file,
which contains detailed information about network requests and responses
made during the session.

This action is called without any arguments. To generate the HAR file,
you must pass the `har_recording=True` argument to `PuppeteerRequest`
when initiating the request.
"""

endpoint = "har"

def payload(self):
return {}


class FillForm(PuppeteerServiceAction):
"""
Fill out and submit forms on a webpage.

Available options:

* ``input_mapping`` (dict): A dictionary where each key is a CSS selector, and
each value is another dictionary containing details about the input for that element.
Each entry in the dictionary should follow this structure:

* ``selector`` (str): The CSS selector for the input element (used as the key).
* ``value`` (str): The text to be inputted into the element.
* ``delay`` (int, optional): A delay (in milliseconds) between each keystroke
when inputting the text. Defaults to 0 if not provided.

* ``submit_button`` (str, optional): The CSS selector for the form's submit button.
If provided, the button will be clicked after filling in the form.
"""

endpoint = "fill_form"

def __init__(self, input_mapping: dict, submit_button: str = None):
self.input_mapping = input_mapping
self.submit_button = submit_button

def payload(self):
return {"inputMapping": self.input_mapping, "submitButton": self.submit_button}


class RecaptchaSolver(PuppeteerServiceAction):
"""
Expand Down
5 changes: 3 additions & 2 deletions scrapypuppeteer/browser_managers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from abc import ABC, abstractmethod


class BrowserManager(ABC):
@abstractmethod
def process_request(self, request, spider):
pass

@abstractmethod
def close_used_contexts(self):
pass

@abstractmethod
def process_response(self, middleware, request, response, spider):
pass
pass
Loading
Loading