-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwint_scraper.py
81 lines (59 loc) · 1.74 KB
/
twint_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import twint, json, os, time
from pprint import pprint
TWEETS_DUMP_FILENAME = "abgeordnete_tweets.json"
META_DUMP_FILENAME = "abgeordnete_meta.json"
def searchUserTweets(username):
out_filename = "abgeordnete_tweets_" + username + ".json"
c = twint.Config()
c.Username = username
c.Store_json = True
c.Hide_output = True
c.Retweets = True
#c.Since = '2021-05-24'
#c.Until = '2021-05-31'
c.Output = out_filename
try:
twint.run.Search(c)
except Exception as e:
print(f"\n Skipped {username} because of:")
print(f"\n\t {e}")
return out_filename
def searchUserMeta(username):
out_filename = "abgeordnete_meta_" + username + ".json"
c = twint.Config()
c.Username = username
c.Store_json = True
c.Hide_output = True
c.Output = out_filename
try:
twint.run.Lookup(c)
except Exception as e:
print(f"\n Skipped {username} because of:")
print(f"\n\t {e}")
return out_filename
def appendPartyToJson(filename, party):
if not os.path.isfile(filename):
print(f"File {filename} is empty!")
return
final_json = {}
tweets_list = []
with open(filename) as rf:
jsonObjects = rf.readlines()
for obj in jsonObjects:
tweet = json.loads(obj)
#pprint(tweet)
tweet.update({'partei' : party})
tweets_list.append(tweet)
with open(TWEETS_DUMP_FILENAME, 'a', encoding='utf-8') as wf:
for tweet in tweets_list:
json.dump(tweet, wf, ensure_ascii=False, indent=4)
os.remove(filename)
with open('abgeordnete_usernamen_twitter.json') as rf:
partymemebers = json.load(rf)
for party in partymemebers.keys():
print(f"Processing party: {party} ...")
members = partymemebers[party]
for member in members:
print(f"Current account: {member}")
result_filename = searchUserTweets(member)
appendPartyToJson(result_filename, party)