Skip to content

Commit

Permalink
added incremental load
Browse files Browse the repository at this point in the history
  • Loading branch information
dishadas168 committed Nov 7, 2023
1 parent 03604b9 commit 3b2425c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
4 changes: 2 additions & 2 deletions scraper_azure_function/function_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def scrape_ebay_fns(req: func.HttpRequest) -> func.HttpResponse:
country = body['country']
tea_type = body['tea_type']

if scrape_date and country and tea_type:
if country and tea_type:

scrape_ebay_and_store(scrape_date, country, tea_type)
scrape_ebay_and_store(country, tea_type, scrape_date)
return func.HttpResponse(
f"This HTTP-triggered function "
f"executed successfully for date {scrape_date}, country {country} and tea type {tea_type}")
Expand Down
25 changes: 21 additions & 4 deletions scraper_azure_function/utils/scrape_ebay.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import hashlib
from base64 import b64encode
import pymongo
import calendar
from dateutil import tz

mongodb_uri="mongodb://ebay-storage-db:Z07RYTm2ZOMWBNeioffbKYDQ7Vgm4IfspqOX8SMk7TT0vDgnOadG1GelNUaeRBAvCIL5CjOM7zm8ACDbZGMfpQ==@ebay-storage-db.mongo.cosmos.azure.com:10255/?ssl=true&retrywrites=false&replicaSet=globaldb&maxIdleTimeMS=120000&appName=@ebay-storage-db@"

Expand All @@ -23,7 +25,24 @@ def make_uid(*args):
def encode_hyphen(string):
return string.replace("-", "%252D")

def scrape_ebay_and_store(scrape_date, country, tea_type):
def get_scrape_date(date_string):

from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/Chicago')
date_string = date_string.split(".")[0]
date_format = "%Y-%m-%dT%H:%M:%S"
dt_obj = datetime.strptime(date_string, date_format)
dt_obj = dt_obj.replace(tzinfo=from_zone)
dt_obj = dt_obj.astimezone(to_zone)
month = calendar.month_abbr[int(dt_obj.month)]

scrape_date = f"Sold {month} {dt_obj.day}, {dt_obj.year}"
return scrape_date

def scrape_ebay_and_store(country, tea_type, scrape_date):

if len(scrape_date) > 0:
scrape_date = get_scrape_date(scrape_date)

page=1
item_count = 0
Expand Down Expand Up @@ -51,9 +70,7 @@ def scrape_ebay_and_store(scrape_date, country, tea_type):

try:
item_data["date_sold"] = item.find('div', attrs={'class':'s-item__caption-section'}).find('span', attrs={'class':'POSITIVE'}).text
if item_data["date_sold"] != scrape_date:
print(item_data["date_sold"])
print(scrape_date)
if len(scrape_date) > 0 and item_data["date_sold"] != scrape_date:
continue
item_data["title"] = item.find('span', attrs={'role':'heading'}).text
item_data['subtitle'] = item.find('div', attrs={'class':'s-item__subtitle'}).find('span', attrs={'class':'SECONDARY_INFO'}).text
Expand Down

0 comments on commit 3b2425c

Please sign in to comment.