forked from ReAGEnT-HTW-Berlin/Twint-API
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwint_scraper_v2.py
60 lines (43 loc) · 1.65 KB
/
twint_scraper_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Created By: Schander, 572893
import twint, json, os, time, pandas
# The CSV contains the following parties:
# "CDU", "AfD", "Die_Gruenen", "SPD", "FDP", "Die Linke"
def getKeywordsFromCSV(csv, party):
rowsOfParty = csv[csv['party'] == party]
names = rowsOfParty[["screen_name", "full_name"]].values.flatten().tolist()
names.insert(0, party) # also look for tweets with "party" as keyword, for example "CDU"
return names
# Search all tweets that write about a representative or the party itself
# For example: "Christian Lindner" or "c_lindner" or "FDP"
def searchPoliticalTweets(csv, mentionedParty):
out_filename = "political_tweets_" + mentionedParty + ".json"
names = getKeywordsFromCSV(csv, mentionedParty)
print("List of keywords: ", names)
for name in names:
print("Searching tweets with keyword: ", name)
c = twint.Config()
c.Search = name
# adjust limit as you like
#c.Limit = 2000
c.Retweets = False
# adjust timespace as you like
c.Since = '2021-11-15'
c.Until = '2021-12-31'
c.Store_json = True
c.Hide_output = True
c.Count = True
c.Output = out_filename
try:
twint.run.Search(c)
except Exception as e:
print(f"\n Skipped {name} because of:")
print(f"\n\t {e}")
return out_filename
##### START PROGRAM #####
csv_file = pandas.read_csv("Bundestag_Namen_Usernamen_Fraktion.csv", delimiter=";", engine="python")
parties = ["CDU", "SPD", "AfD", "FDP", "Die_Gruenen", "Die Linke"]
print("Tweet extraction started")
for party in parties:
searchPoliticalTweets(csv_file, party)
print(f"################################## {party} completed ##################################")
print("Tweet extraction finished")