Skip to content

Commit

Permalink
Indeed country support (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson authored Sep 5, 2023
1 parent 1598d4f commit 1c264b8
Show file tree
Hide file tree
Showing 9 changed files with 1,404 additions and 533 deletions.
1,469 changes: 1,042 additions & 427 deletions JobSpy_Demo.ipynb

Large diffs are not rendered by default.

91 changes: 88 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ import pandas as pd
jobs: pd.DataFrame = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter"],
search_term="software engineer",
results_wanted=10
location="Dallas, TX",
results_wanted=10,

# country: only needed for indeed
country='USA'
)

if jobs.empty:
Expand Down Expand Up @@ -65,8 +69,10 @@ Optional
├── is_remote (bool)
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
├── country (enum): uses the corresponding subdomain on Indeed (e.g. Canada on Indeed is ca.indeed.com
```


### JobPost Schema
```plaintext
JobPost
Expand All @@ -81,12 +87,91 @@ JobPost
├── job_type (enum)
├── compensation (object)
│ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
│ ├── min_amount (float)
│ ├── max_amount (float)
│ ├── min_amount (int)
│ ├── max_amount (int)
│ └── currency (str)
└── date_posted (datetime)
```

## Supported Countries for Job Searching


### **LinkedIn**

LinkedIn searches globally. Use the `location` parameter

### **ZipRecruiter**

ZipRecruiter searches for jobs in US/Canada. Use the `location` parameter


### **Indeed**
For Indeed, you `location` along with `country` param

You can specify the following countries when searching on Indeed (use the exact name):

- Argentina
- Australia
- Austria
- Bahrain
- Belgium
- Brazil
- Canada
- Chile
- China
- Colombia
- Costa Rica
- Czech Republic
- Denmark
- Ecuador
- Egypt
- Finland
- France
- Germany
- Greece
- Hong Kong
- Hungary
- India
- Indonesia
- Ireland
- Israel
- Italy
- Japan
- Kuwait
- Luxembourg
- Malaysia
- Mexico
- Morocco
- Netherlands
- New Zealand
- Nigeria
- Norway
- Oman
- Pakistan
- Panama
- Peru
- Philippines
- Poland
- Portugal
- Qatar
- Romania
- Saudi Arabia
- Singapore
- South Africa
- South Korea
- Spain
- Sweden
- Switzerland
- Taiwan
- Thailand
- Turkey
- Ukraine
- United Arab Emirates
- UK
- USA
- Uruguay
- Venezuela
- Vietnam

## Frequently Asked Questions

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.0.3"
version = "1.1.0"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
39 changes: 16 additions & 23 deletions src/jobspy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
import pandas as pd
from typing import List, Tuple

from .jobs import JobType
from .jobs import JobType, Location
from .scrapers.indeed import IndeedScraper
from .scrapers.ziprecruiter import ZipRecruiterScraper
from .scrapers.linkedin import LinkedInScraper
from .scrapers import (
ScraperInput,
Site,
JobResponse,
)
from .scrapers import ScraperInput, Site, JobResponse, Country


SCRAPER_MAPPING = {
Expand All @@ -32,6 +28,7 @@ def scrape_jobs(
job_type: JobType = None,
easy_apply: bool = False, # linkedin
results_wanted: int = 15,
country: str = "usa",
) -> pd.DataFrame:
"""
Asynchronously scrapes job data from multiple job sites.
Expand All @@ -41,9 +38,12 @@ def scrape_jobs(
if type(site_name) == str:
site_name = _map_str_to_site(site_name)

country_enum = Country.from_string(country)

site_type = [site_name] if type(site_name) == Site else site_name
scraper_input = ScraperInput(
site_type=site_type,
country=country_enum,
search_term=search_term,
location=location,
distance=distance,
Expand Down Expand Up @@ -71,22 +71,15 @@ def scrape_site(site: Site) -> Tuple[str, JobResponse]:
for job in job_response.jobs:
data = job.dict()
data["site"] = site

# Formatting JobType
data["job_type"] = data["job_type"].value if data["job_type"] else None

# Formatting Location
location_obj = data.get("location")
if location_obj and isinstance(location_obj, dict):
data["city"] = location_obj.get("city", "")
data["state"] = location_obj.get("state", "")
data["country"] = location_obj.get("country", "USA")
data["company"] = data["company_name"]
if data["job_type"]:
# Take the first value from the job type tuple
data["job_type"] = data["job_type"].value[0]
else:
data["city"] = None
data["state"] = None
data["country"] = None
data["job_type"] = None

data["location"] = Location(**data["location"]).display_location()

# Formatting Compensation
compensation_obj = data.get("compensation")
if compensation_obj and isinstance(compensation_obj, dict):
data["interval"] = (
Expand All @@ -111,13 +104,13 @@ def scrape_site(site: Site) -> Tuple[str, JobResponse]:
desired_order = [
"site",
"title",
"company_name",
"city",
"state",
"company",
"location",
"job_type",
"interval",
"min_amount",
"max_amount",
"currency",
"job_url",
"description",
]
Expand Down
164 changes: 150 additions & 14 deletions src/jobspy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,160 @@


class JobType(Enum):
FULL_TIME = "fulltime"
PART_TIME = "parttime"
CONTRACT = "contract"
TEMPORARY = "temporary"
INTERNSHIP = "internship"

PER_DIEM = "perdiem"
NIGHTS = "nights"
OTHER = "other"
SUMMER = "summer"
VOLUNTEER = "volunteer"
FULL_TIME = (
"fulltime",
"períodointegral",
"estágio/trainee",
"cunormăîntreagă",
"tiempocompleto",
"vollzeit",
"voltijds",
"tempointegral",
"全职",
"plnýúvazek",
"fuldtid",
"دوامكامل",
"kokopäivätyö",
"tempsplein",
"vollzeit",
"πλήρηςαπασχόληση",
"teljesmunkaidő",
"tempopieno",
"tempsplein",
"heltid",
"jornadacompleta",
"pełnyetat",
"정규직",
"100%",
"全職",
"งานประจำ",
"tamzamanlı",
"повназайнятість",
"toànthờigian",
)
PART_TIME = ("parttime", "teilzeit")
CONTRACT = ("contract", "contractor")
TEMPORARY = ("temporary",)
INTERNSHIP = ("internship", "prácticas", "ojt(onthejobtraining)", "praktikum")

PER_DIEM = ("perdiem",)
NIGHTS = ("nights",)
OTHER = ("other",)
SUMMER = ("summer",)
VOLUNTEER = ("volunteer",)


class Country(Enum):
ARGENTINA = ("argentina", "ar")
AUSTRALIA = ("australia", "au")
AUSTRIA = ("austria", "at")
BAHRAIN = ("bahrain", "bh")
BELGIUM = ("belgium", "be")
BRAZIL = ("brazil", "br")
CANADA = ("canada", "ca")
CHILE = ("chile", "cl")
CHINA = ("china", "cn")
COLOMBIA = ("colombia", "co")
COSTARICA = ("costa rica", "cr")
CZECHREPUBLIC = ("czech republic", "cz")
DENMARK = ("denmark", "dk")
ECUADOR = ("ecuador", "ec")
EGYPT = ("egypt", "eg")
FINLAND = ("finland", "fi")
FRANCE = ("france", "fr")
GERMANY = ("germany", "de")
GREECE = ("greece", "gr")
HONGKONG = ("hong kong", "hk")
HUNGARY = ("hungary", "hu")
INDIA = ("india", "in")
INDONESIA = ("indonesia", "id")
IRELAND = ("ireland", "ie")
ISRAEL = ("israel", "il")
ITALY = ("italy", "it")
JAPAN = ("japan", "jp")
KUWAIT = ("kuwait", "kw")
LUXEMBOURG = ("luxembourg", "lu")
MALAYSIA = ("malaysia", "malaysia")
MEXICO = ("mexico", "mx")
MOROCCO = ("morocco", "ma")
NETHERLANDS = ("netherlands", "nl")
NEWZEALAND = ("new zealand", "nz")
NIGERIA = ("nigeria", "ng")
NORWAY = ("norway", "no")
OMAN = ("oman", "om")
PAKISTAN = ("pakistan", "pk")
PANAMA = ("panama", "pa")
PERU = ("peru", "pe")
PHILIPPINES = ("philippines", "ph")
POLAND = ("poland", "pl")
PORTUGAL = ("portugal", "pt")
QATAR = ("qatar", "qa")
ROMANIA = ("romania", "ro")
SAUDIARABIA = ("saudi arabia", "sa")
SINGAPORE = ("singapore", "sg")
SOUTHAFRICA = ("south africa", "za")
SOUTHKOREA = ("south korea", "kr")
SPAIN = ("spain", "es")
SWEDEN = ("sweden", "se")
SWITZERLAND = ("switzerland", "ch")
TAIWAN = ("taiwan", "tw")
THAILAND = ("thailand", "th")
TURKEY = ("turkey", "tr")
UKRAINE = ("ukraine", "ua")
UNITEDARABEMIRATES = ("united arab emirates", "ae")
UK = ("uk", "uk")
USA = ("usa", "www")
URUGUAY = ("uruguay", "uy")
VENEZUELA = ("venezuela", "ve")
VIETNAM = ("vietnam", "vn")

# internal for ziprecruiter
US_CANADA = ("usa/ca", "www")

# internal for linkeind
WORLDWIDE = ("worldwide", "www")

def __new__(cls, country, domain):
obj = object.__new__(cls)
obj._value_ = country
obj.domain = domain
return obj

@property
def domain_value(self):
return self.domain

@classmethod
def from_string(cls, country_str: str):
"""Convert a string to the corresponding Country enum."""
country_str = country_str.strip().lower()
for country in cls:
if country.value == country_str:
return country
valid_countries = [country.value for country in cls]
raise ValueError(
f"Invalid country string: '{country_str}'. Valid countries (only include this param for Indeed) are: {', '.join(valid_countries)}"
)


class Location(BaseModel):
country: str = "USA"
city: str = None
country: Country = None
city: Optional[str] = None
state: Optional[str] = None

def display_location(self) -> str:
location_parts = []
if self.city:
location_parts.append(self.city)
if self.state:
location_parts.append(self.state)
if self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE):
if self.country.value in ("usa", "uk"):
location_parts.append(self.country.value.upper())
else:
location_parts.append(self.country.value.title())
return ", ".join(location_parts)


class CompensationInterval(Enum):
YEARLY = "yearly"
Expand All @@ -37,7 +173,7 @@ class Compensation(BaseModel):
interval: CompensationInterval
min_amount: int = None
max_amount: int = None
currency: str = "USD"
currency: Optional[str] = "USD"


class JobPost(BaseModel):
Expand Down
Loading

0 comments on commit 1c264b8

Please sign in to comment.