-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjif_importer.py
71 lines (52 loc) · 1.99 KB
/
jif_importer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import settings
import os
import csv
from db import db
class JifImporter:
year_first = 2001
year_last = 2009
data_dir = os.path.join(settings.DATA_DIR, 'thomsonreuters')
def __init__(self):
pass
def run(self):
c = db().cursor()
for year in range(self.year_first, self.year_last+1):
file = os.path.join(self.data_dir,
'JournalHomeGrid-' + str(year) + '.csv')
for row in csv.reader(open(file, 'r')):
# Title row
if row[0].startswith('Journal Data Filtered By: '+
'Selected JCR Year: '+str(year)):
continue
# Header row
if row[0] == 'Rank':
continue
# Copyright row
if row[0].startswith('Copyright') or \
row[0].startswith('By exporting'):
continue
# Actual content
journal = row[2]
issn = row[3]
jif = row[5]
if jif == 'Not Available':
continue
# Normalize journal
journal = journal.lower() \
.replace(' ', '').replace(':', '').replace('-', '')
print(journal, year, jif)
c.execute("""
INSERT INTO jif SET
journal = %(journal)s,
issn = %(issn)s,
year = %(year)s,
jif = %(jif)s""", {
'journal': journal,
'issn': issn,
'year': year,
'jif': jif})
print("Committing...")
db().commit()
if __name__ == '__main__':
importer = JifImporter()
importer.run()