-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreporecap.py
82 lines (66 loc) · 2.96 KB
/
reporecap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""generate stats to summarize public repo growth across all Microsoft orgs
TO DO:
- automate the creation of the data file (import gitdata, etc.)
- automate the creation of the XLSX
"""
import collections
#-------------------------------------------------------------------------------
def get_totals(filename):
"""Read the specified data file and create a dictionary of the total public
repos created for each month. Dictionary has two types of keys: year+month
(e.g., '201702') or year+month+org (e.g., '201702microsoft'). The value is
the number of public repos created in that year/month.
Note that the data file was created with the following command:
c:> gitdata repos -o* -amsftgits -sa -nmicrosoft-repos.csv
-fowner.login/name/created_at/private -d -v
"""
ymtotals = collections.defaultdict()
for line in open(filename, 'r').readlines():
values = line.strip().split(',') # create list of values
if values[0] == 'owner_login' or values[3] != 'public':
continue # these rows ignored
orgname = values[0].lower()
year = values[2][:4]
month = values[2][5:7]
for key in [year + month, year + month + orgname]:
if key in ymtotals.keys():
ymtotals[key] += 1
else:
ymtotals[key] = 1
return ymtotals
#-------------------------------------------------------------------------------
def write_ymtotals(ymtotals, filename):
"""Write a CSV file summarizing cumulative totals for Azure, Microsoft, and
other orgs.
"""
yearmonths = sorted([key for key in ymtotals])
currentyear = yearmonths[0][:4] # first year
currentmonth = yearmonths[0][4:6] # first month
lastyear = yearmonths[-1][:4]
lastmonth = yearmonths[-1][4:6]
cumm_tot = 0
cumm_az = 0
cumm_ms = 0
with open(filename, 'w') as fhandle:
fhandle.write('year,month,microsoft,azure,other\n')
while True:
yearmonth = currentyear + currentmonth
cumm_tot += ymtotals.get(yearmonth, 0)
cumm_az += ymtotals.get(yearmonth + 'azure', 0)
cumm_ms += ymtotals.get(yearmonth + 'microsoft', 0)
print(currentyear, currentmonth, cumm_tot, cumm_az, cumm_ms)
with open(filename, 'a') as fhandle:
fhandle.write(currentyear + ',' + currentmonth + ',' + \
str(cumm_ms) + ',' + str(cumm_az) + ',' + \
str(cumm_tot - cumm_ms - cumm_az) + '\n')
if currentmonth == '12':
currentyear = str(int(currentyear) + 1).zfill(4)
currentmonth = '01'
else:
currentmonth = str(int(currentmonth) + 1).zfill(2)
if currentyear > lastyear or (currentyear == lastyear and currentmonth > lastmonth):
break
#-------------------------------------------------------------------------------
if __name__ == '__main__':
TOTALS = get_totals('microsoft-repos.csv')
write_ymtotals(TOTALS, 'publicrepototals.csv')