Skip to content

Commit

Permalink
Getting rid of redundancy
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewKorzh committed Jul 26, 2024
1 parent 2958815 commit 6a9ee75
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 54 deletions.
33 changes: 10 additions & 23 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,31 +48,21 @@ def __init__(self):
self.local_scrapy_pyppeteer = LocalScrapyPyppeteer()

def process_request(self, request):
pyp_request = self.process_puppeteer_request(request)
return pyp_request

def process_puppeteer_request(self, request: PuppeteerRequest):
action = request.action
service_url = 'http://_running_local_'
service_params = self._encode_service_params(request)
if service_params:
service_url += "?" + service_params

meta = {

action_request = ActionRequest(
url='http://_running_local_',
action=request.action,
cookies=request.cookies,
meta={
"puppeteer_request": request,
"dont_obey_robotstxt": True,
"proxy": None,
}

action_request = ActionRequest(
url=service_url,
action=action,
cookies=request.cookies,
meta=meta,
},
)
puppeteer_response = self.local_scrapy_pyppeteer.process_puppeteer_request(action_request)

return self.local_scrapy_pyppeteer.process_puppeteer_request(action_request)


return puppeteer_response

@staticmethod
def _encode_service_params(request):
Expand All @@ -89,11 +79,8 @@ def close_used_contexts(self):
self.local_scrapy_pyppeteer.context_manager.close_browser()




class ServiceBrowserManager(BrowserManager):
def __init__(self, service_base_url, include_meta, include_headers, crawler):
#### добавить передачу этих параметров ####
self.service_base_url = service_base_url
self.include_meta = include_meta
self.include_headers = include_headers
Expand Down
31 changes: 0 additions & 31 deletions scrapypuppeteer/scrappypyppeteer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@
class ContextManager:

def __init__(self):
#self.browser = "browser"
self.browser = syncer.sync(launch())
#тут инициализация брацщера
self.contexts = {}
self.pages = {}
self.context_page_map = {}
Expand All @@ -60,11 +58,9 @@ async def open_new_page(self):
context_id = uuid.uuid4().hex.upper()
page_id = uuid.uuid4().hex.upper()

# --- Создание страницы и добавление её в структуру --- #
self.contexts[context_id] = await self.browser.createIncognitoBrowserContext()
self.pages[page_id] = await self.contexts[context_id].newPage()
self.context_page_map[context_id] = page_id
#-------------------------------------------------------#

return context_id, page_id

Expand All @@ -86,7 +82,6 @@ def __del__(self):


class LocalScrapyPyppeteer:
#class BrowserManager:
def __init__(self):
self.context_manager = ContextManager()

Expand Down Expand Up @@ -134,12 +129,8 @@ async def async_goto():
cookies = action_request.cookies
navigation_options = action_request.action.navigation_options
await page.goto(url, navigation_options)

#Wait options
wait_options = action_request.action.payload().get("waitOptions", {}) or {}
await self.wait_with_options(page, wait_options)
#Wait options

response_html = await page.content()

puppeteer_html_response = PuppeteerHtmlResponse(service_url,
Expand All @@ -163,14 +154,9 @@ async def async_click():
click_options = action_request.action.click_options or {}
navigation_options = action_request.action.navigation_options or {}
options = merged = {**click_options, **navigation_options}

await page.click(selector, options)
#navigation_options = action_request.action.navigation_options
#await page.waitForNavigation(navigation_options)
#Wait options
wait_options = action_request.action.payload().get("waitOptions", {}) or {}
await self.wait_with_options(page, wait_options)
#Wait options
response_html = await page.content()
service_url = action_request.url

Expand All @@ -195,12 +181,8 @@ async def async_go_back():
cookies = action_request.cookies
navigation_options = action_request.action.navigation_options
await page.goBack(navigation_options)

#Wait options
wait_options = action_request.action.payload().get("waitOptions", {}) or {}
await self.wait_with_options(page, wait_options)
#Wait options

response_html = await page.content()
service_url = action_request.url
puppeteer_html_response = PuppeteerHtmlResponse(service_url,
Expand All @@ -224,12 +206,8 @@ async def async_go_forward():
cookies = action_request.cookies
navigation_options = action_request.action.navigation_options
await page.goForward(navigation_options)

#Wait options
wait_options = action_request.action.payload().get("waitOptions", {}) or {}
await self.wait_with_options(page, wait_options)
#Wait options

response_html = await page.content()
service_url = action_request.url
puppeteer_html_response = PuppeteerHtmlResponse(service_url,
Expand All @@ -251,16 +229,11 @@ def screenshot(self, action_request: ActionRequest):
page = self.context_manager.get_page_by_id(context_id, page_id)

async def async_screenshot():
cookies = action_request.cookies

request_options = action_request.action.options or {}
screenshot_options = {'encoding': 'binary'}
screenshot_options.update(request_options)

screenshot_bytes = await page.screenshot(screenshot_options)
screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8')


service_url = action_request.url

puppeteer_screenshot_response = PuppeteerScreenshotResponse(service_url,
Expand Down Expand Up @@ -296,11 +269,8 @@ async def async_scroll():
"""

await page.evaluate(script)

#Wait options
wait_options = action_request.action.payload().get("waitOptions", {}) or {}
await self.wait_with_options(page, wait_options)
#Wait options

response_html = await page.content()
service_url = action_request.url
Expand All @@ -317,7 +287,6 @@ async def async_scroll():


def action(self, action_request: ActionRequest):

raise ValueError("CustomJsAction is not available in local mode")


Expand Down

0 comments on commit 6a9ee75

Please sign in to comment.