ispras · MatthewZMSU · Aug 15, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 9, 2024
diff --git a/README.md b/README.md
@@ -84,6 +84,7 @@ Here is the list of available actions:
 - `Scroll(selector, wait_options)` - scroll page
 - `Screenshot(options)` - take screenshot
 - `Har()` - to get the HAR file, pass the `har_recording=True` argument to `PuppeteerRequest` at the start of execution.
+- `FormAction(input_mapping, submit_button)` - to fill out and submit forms on page.
 - `RecaptchaSolver(solve_recaptcha)` - find or solve recaptcha on page
 - `CustomJsAction(js_function)` - evaluate JS function on page
 
@@ -174,4 +175,4 @@ In this case RecaptchaMiddleware will just skip the request.
 - [ ] headers and cookies management
 - [ ] proxy support for puppeteer
 - [x] error handling for requests
-- [ ] har support
+- [x] har support
diff --git a/examples/settings.py b/examples/settings.py
@@ -10,3 +10,5 @@
 }
 
 PUPPETEER_SERVICE_URL = "http://localhost:3000"
+
+PUPPETEER_LOCAL = False
diff --git a/examples/spiders/fill_form.py b/examples/spiders/fill_form.py
@@ -0,0 +1,38 @@
+import scrapy
+from scrapypuppeteer import PuppeteerRequest, PuppeteerScreenshotResponse
+from scrapypuppeteer.actions import Screenshot, FillForm
+import base64
+
+
+class FormActionSpider(scrapy.Spider):
+    name = "fill_form"
+    start_urls = ["https://www.roboform.com/filling-test-all-fields"]
+
+    def start_requests(self):
+        for url in self.start_urls:
+            yield PuppeteerRequest(url, callback=self.form_action, close_page=False)
+
+    def form_action(self, response):
+        input_mapping = {
+            'input[name="02frstname"]': {"value": "SomeName", "delay": 50},
+            'input[name="05_company"]': {"value": "SomeCompany", "delay": 100},
+            'input[name="06position"]': {"value": "SomePosition", "delay": 100},
+        }
+
+        yield response.follow(
+            FillForm(input_mapping), close_page=False, callback=self.screenshot
+        )
+
+    def screenshot(self, response):
+        action = Screenshot(
+            options={
+                "fullPage": True,
+            }
+        )
+        yield response.follow(action, callback=self.make_screenshot, close_page=False)
+
+    @staticmethod
+    def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs):
+        data = response.screenshot
+        with open(f"screenshot.png", "wb") as fh:
+            fh.write(base64.b64decode(data))
diff --git a/examples/spiders/har.py b/examples/spiders/har.py
@@ -0,0 +1,29 @@
+import scrapy
+from scrapypuppeteer import PuppeteerRequest
+from scrapypuppeteer.actions import Har
+
+
+def write_to_file(file_path, content):
+    with open(file_path, "a", encoding="utf-8") as file:
+        file.write(content)
+
+
+class HarSpider(scrapy.Spider):
+    name = "har"
+    start_urls = ["https://github.com/pyppeteer/pyppeteer"]
+
+    def start_requests(self):
+        for url in self.start_urls:
+            yield PuppeteerRequest(
+                url, callback=self.har, close_page=False, har_recording=True
+            )
+
+    def har(self, response):
+        yield response.follow(
+            Har(),
+            close_page=False,
+            callback=self.save_har,
+        )
+
+    def save_har(self, response):
+        write_to_file("result.har", response.har)
diff --git a/scrapypuppeteer/actions.py b/scrapypuppeteer/actions.py
@@ -58,7 +58,11 @@ class GoTo(PuppeteerServiceAction):
     endpoint = "goto"
 
     def __init__(
-        self, url: str, navigation_options: dict = None, wait_options: dict = None, har_recording: bool = False
+        self,
+        url: str,
+        navigation_options: dict = None,
+        wait_options: dict = None,
+        har_recording: bool = False,
     ):
         self.url = url
         self.navigation_options = navigation_options
@@ -223,15 +227,53 @@ def __init__(self, options: dict = None, **kwargs):
 
     def payload(self):
         return {"options": self.options}
-    
-    
+
+
 class Har(PuppeteerServiceAction):
+    """
+    The `Har` action is used to capture and retrieve the HTTP Archive (HAR) file,
+    which contains detailed information about network requests and responses
+    made during the session.
+
+    This action is called without any arguments. To generate the HAR file,
+    you must pass the `har_recording=True` argument to `PuppeteerRequest`
+    when initiating the request.
+    """
+
     endpoint = "har"
 
     def payload(self):
         return {}
 
 
+class FillForm(PuppeteerServiceAction):
+    """
+    Fill out and submit forms on a webpage.
+
+    Available options:
+
+    * ``input_mapping`` (dict): A dictionary where each key is a CSS selector, and
+    each value is another dictionary containing details about the input for that element.
+    Each entry in the dictionary should follow this structure:
+
+    * ``selector`` (str): The CSS selector for the input element (used as the key).
+    * ``value`` (str): The text to be inputted into the element.
+    * ``delay`` (int, optional): A delay (in milliseconds) between each keystroke
+        when inputting the text. Defaults to 0 if not provided.
+
+    * ``submit_button`` (str, optional): The CSS selector for the form's submit button.
+    If provided, the button will be clicked after filling in the form.
+    """
+
+    endpoint = "fill_form"
+
+    def __init__(self, input_mapping: dict, submit_button: str = None):
+        self.input_mapping = input_mapping
+        self.submit_button = submit_button
+
+    def payload(self):
+        return {"inputMapping": self.input_mapping, "submitButton": self.submit_button}
+
 
 class RecaptchaSolver(PuppeteerServiceAction):
     """

diff --git a/scrapypuppeteer/browser_managers/__init__.py b/scrapypuppeteer/browser_managers/__init__.py
@@ -2,15 +2,16 @@
 
 from abc import ABC, abstractmethod
 
+
 class BrowserManager(ABC):
     @abstractmethod
     def process_request(self, request, spider):
         pass
-    
+
     @abstractmethod
     def close_used_contexts(self):
         pass
 
     @abstractmethod
     def process_response(self, middleware, request, response, spider):
-        pass
+        pass
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,3 +10,5 @@
		}

		PUPPETEER_SERVICE_URL = "http://localhost:3000"

		PUPPETEER_LOCAL = False