-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetvacancies.py
77 lines (66 loc) · 3.71 KB
/
getvacancies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def main():
from bs4 import BeautifulSoup
import requests
import json
urlist = [
"https://www.rabota.md/ro/locuri-de-munca/devops-engineer-createq/92222",
"https://www.rabota.md/ro/locuri-de-munca/mid-senior-software-developer-3/36018",
"https://www.rabota.md/ro/locuri-de-munca/android-developer-ook-group/85491",
"https://www.rabota.md/ro/locuri-de-munca/1s-developer/86998",
"https://www.rabota.md/ro/locuri-de-munca/senior-php-developer-balti/51308",
"https://www.rabota.md/ro/locuri-de-munca/it-teacher/72552",
"https://www.rabota.md/ro/locuri-de-munca/senior-java-developer/36688",
"https://www.rabota.md/ro/locuri-de-munca/android-developer/93077",
"https://www.rabota.md/ro/locuri-de-munca/business-intelligence-analyst-english/86554",
"https://www.rabota.md/ro/locuri-de-munca/middle-ios-developer/79364",
"https://www.rabota.md/ro/locuri-de-munca/middle-java-developer-it-solutions-for-financial-sector/91429",
"https://www.rabota.md/ro/locuri-de-munca/c-developer-10-000-45-000-mdl/91559",
"https://www.rabota.md/ro/locuri-de-munca/unity-developer-10-000-45-000-mdl/63617",
"https://www.rabota.md/ro/locuri-de-munca/ios-developer-middle-20000-45000-mdl/38130",
"https://www.rabota.md/ro/locuri-de-munca/sistemnyy-administrator/65471",
"https://www.rabota.md/ro/locuri-de-munca/network-engineer-1000-1500-usd-motava/92882",
"https://www.rabota.md/ro/locuri-de-munca/full-stack-php-developer-nuacom/73386",
"https://www.rabota.md/ro/locuri-de-munca/lead-generator-limbile-germana-italiana-engleza/92343",
"https://www.rabota.md/ro/locuri-de-munca/office-manager/47503",
"https://www.rabota.md/ro/locuri-de-munca/english-speaking-travel-agent/68065",
"https://www.rabota.md/ro/locuri-de-munca/travel-accounting-specialist-net-10-500-mdl-bonus/88828",
"https://www.rabota.md/ro/locuri-de-munca/sales-for-online-school/82298"
]
def vacancy(url):
html_doc = requests.get(url).text
soup = BeautifulSoup(html_doc, 'html.parser')
vacancytxt = soup.find_all("div", {"class": "vacancy-content"})[0].text.strip()
sidebar = [element.text.strip() for element in
soup.find_all("div", {"class": "vip-vacancy-summary__col text-sm "
"text-gray-700"})]
keywords = {
"IT": ["Java", "C", "Python", "Developer", "Android", "IT", "API", "Web", "WEB", ".Net", "JavaScript",
"HTML", "CSS", "PHP", "Developer", "Network", "Engineer"],
"Medicine": ["spital", "doctor", "pacienti", "clinica", "medic", "medical"],
"Management": ["manager", "director", "administrare"],
"Languages": ["engleza", "italiana", "germana", "spaniola", "japoneza", "araba", "traduceri"]
}
def gettopic(text, keywords):
words = text.split(" ")
for i in words:
for j in keywords:
if i in keywords[j]:
return j
topic = gettopic(vacancytxt, keywords)
return {"vacancytxt": vacancytxt,
"location": sidebar[0],
"studies": sidebar[1],
"experience": sidebar[2],
"salary": sidebar[3],
"worktime": sidebar[4],
"workplace": sidebar[5],
"employer": sidebar[6],
"topic": topic
}
dictlist = []
for i in range(len(urlist)):
dictlist.append(vacancy(urlist[i]))
json_object = json.dumps(dictlist, indent=4)
with open("vacancies.json", "w") as f:
f.write(json_object)
main()