From 6a9ee75da6cbc3a4515bfb912f9446106d64ce4e Mon Sep 17 00:00:00 2001 From: AndrewKorzh <92707967+AndrewKorzh@users.noreply.github.com> Date: Fri, 26 Jul 2024 15:45:59 +0300 Subject: [PATCH] Getting rid of redundancy --- scrapypuppeteer/middleware.py | 33 +++++++++-------------------- scrapypuppeteer/scrappypyppeteer.py | 31 --------------------------- 2 files changed, 10 insertions(+), 54 deletions(-) diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index 1011fda..258fda5 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -48,31 +48,21 @@ def __init__(self): self.local_scrapy_pyppeteer = LocalScrapyPyppeteer() def process_request(self, request): - pyp_request = self.process_puppeteer_request(request) - return pyp_request - - def process_puppeteer_request(self, request: PuppeteerRequest): - action = request.action - service_url = 'http://_running_local_' - service_params = self._encode_service_params(request) - if service_params: - service_url += "?" + service_params - - meta = { + + action_request = ActionRequest( + url='http://_running_local_', + action=request.action, + cookies=request.cookies, + meta={ "puppeteer_request": request, "dont_obey_robotstxt": True, "proxy": None, - } - - action_request = ActionRequest( - url=service_url, - action=action, - cookies=request.cookies, - meta=meta, + }, ) - puppeteer_response = self.local_scrapy_pyppeteer.process_puppeteer_request(action_request) + + return self.local_scrapy_pyppeteer.process_puppeteer_request(action_request) + - return puppeteer_response @staticmethod def _encode_service_params(request): @@ -89,11 +79,8 @@ def close_used_contexts(self): self.local_scrapy_pyppeteer.context_manager.close_browser() - - class ServiceBrowserManager(BrowserManager): def __init__(self, service_base_url, include_meta, include_headers, crawler): - #### добавить передачу этих параметров #### self.service_base_url = service_base_url self.include_meta = include_meta self.include_headers = include_headers diff --git a/scrapypuppeteer/scrappypyppeteer.py b/scrapypuppeteer/scrappypyppeteer.py index 8247acd..0064614 100644 --- a/scrapypuppeteer/scrappypyppeteer.py +++ b/scrapypuppeteer/scrappypyppeteer.py @@ -42,9 +42,7 @@ class ContextManager: def __init__(self): - #self.browser = "browser" self.browser = syncer.sync(launch()) - #тут инициализация брацщера self.contexts = {} self.pages = {} self.context_page_map = {} @@ -60,11 +58,9 @@ async def open_new_page(self): context_id = uuid.uuid4().hex.upper() page_id = uuid.uuid4().hex.upper() - # --- Создание страницы и добавление её в структуру --- # self.contexts[context_id] = await self.browser.createIncognitoBrowserContext() self.pages[page_id] = await self.contexts[context_id].newPage() self.context_page_map[context_id] = page_id - #-------------------------------------------------------# return context_id, page_id @@ -86,7 +82,6 @@ def __del__(self): class LocalScrapyPyppeteer: -#class BrowserManager: def __init__(self): self.context_manager = ContextManager() @@ -134,12 +129,8 @@ async def async_goto(): cookies = action_request.cookies navigation_options = action_request.action.navigation_options await page.goto(url, navigation_options) - - #Wait options wait_options = action_request.action.payload().get("waitOptions", {}) or {} await self.wait_with_options(page, wait_options) - #Wait options - response_html = await page.content() puppeteer_html_response = PuppeteerHtmlResponse(service_url, @@ -163,14 +154,9 @@ async def async_click(): click_options = action_request.action.click_options or {} navigation_options = action_request.action.navigation_options or {} options = merged = {**click_options, **navigation_options} - await page.click(selector, options) - #navigation_options = action_request.action.navigation_options - #await page.waitForNavigation(navigation_options) - #Wait options wait_options = action_request.action.payload().get("waitOptions", {}) or {} await self.wait_with_options(page, wait_options) - #Wait options response_html = await page.content() service_url = action_request.url @@ -195,12 +181,8 @@ async def async_go_back(): cookies = action_request.cookies navigation_options = action_request.action.navigation_options await page.goBack(navigation_options) - - #Wait options wait_options = action_request.action.payload().get("waitOptions", {}) or {} await self.wait_with_options(page, wait_options) - #Wait options - response_html = await page.content() service_url = action_request.url puppeteer_html_response = PuppeteerHtmlResponse(service_url, @@ -224,12 +206,8 @@ async def async_go_forward(): cookies = action_request.cookies navigation_options = action_request.action.navigation_options await page.goForward(navigation_options) - - #Wait options wait_options = action_request.action.payload().get("waitOptions", {}) or {} await self.wait_with_options(page, wait_options) - #Wait options - response_html = await page.content() service_url = action_request.url puppeteer_html_response = PuppeteerHtmlResponse(service_url, @@ -251,16 +229,11 @@ def screenshot(self, action_request: ActionRequest): page = self.context_manager.get_page_by_id(context_id, page_id) async def async_screenshot(): - cookies = action_request.cookies - request_options = action_request.action.options or {} screenshot_options = {'encoding': 'binary'} screenshot_options.update(request_options) - screenshot_bytes = await page.screenshot(screenshot_options) screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8') - - service_url = action_request.url puppeteer_screenshot_response = PuppeteerScreenshotResponse(service_url, @@ -296,11 +269,8 @@ async def async_scroll(): """ await page.evaluate(script) - - #Wait options wait_options = action_request.action.payload().get("waitOptions", {}) or {} await self.wait_with_options(page, wait_options) - #Wait options response_html = await page.content() service_url = action_request.url @@ -317,7 +287,6 @@ async def async_scroll(): def action(self, action_request: ActionRequest): - raise ValueError("CustomJsAction is not available in local mode")