Skip to content

Commit

Permalink
Merge pull request #826 from ScrapeGraphAI/fix-runtime-error
Browse files Browse the repository at this point in the history
fix: error on fetching the code
  • Loading branch information
VinciGit00 authored Nov 25, 2024
2 parents b98dd39 + 7285ab0 commit c84ff56
Showing 1 changed file with 4 additions and 12 deletions.
16 changes: 4 additions & 12 deletions scrapegraphai/docloaders/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,11 @@ async def ascrape_undetected_chromedriver(self, url: str) -> str:
async def ascrape_playwright(self, url: str) -> str:
"""
Asynchronously scrape the content of a given URL using Playwright's async API.
Args:
url (str): The URL to scrape.
Returns:
str: The scraped HTML content or an error message if an exception occurs.
"""
from playwright.async_api import async_playwright
from undetected_playwright import Malenia

logger.info(f"Starting scraping with {self.backend}...")
results = ""
attempt = 0

while attempt < self.RETRY_LIMIT:
Expand All @@ -127,16 +120,15 @@ async def ascrape_playwright(self, url: str) -> str:
await page.wait_for_load_state(self.load_state)
results = await page.content()
logger.info("Content scraped")
break
return results
except (aiohttp.ClientError, asyncio.TimeoutError, Exception) as e:
attempt += 1
logger.error(f"Attempt {attempt} failed: {e}")
if attempt == self.RETRY_LIMIT:
results = f"Error: Network error after {self.RETRY_LIMIT} attempts - {e}"
raise RuntimeError(f"Failed to fetch {url} after {self.RETRY_LIMIT} attempts: {e}")
finally:
await browser.close()

return results
if 'browser' in locals():
await browser.close()

async def ascrape_with_js_support(self, url: str) -> str:
"""
Expand Down

0 comments on commit c84ff56

Please sign in to comment.