-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataFunctions.py
166 lines (154 loc) · 6.22 KB
/
dataFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
def getCVEIDs(cveJsonFile):
cveIDs = []
for cve in cveJsonFile['CVE_Items']:
#print(cve['cve']['CVE_data_meta']['ID'])
cveIDs.append(cve['cve']['CVE_data_meta']['ID'])
return cveIDs
def getCVEpubDateAndScore(cveJsonFile):
#todo discarding entries after 2002
#todo only metric V2 items (97 are not baseMetricV2), none are V3 DONE
CVE_pubDate_CVSS = []
for cve in cveJsonFile['CVE_Items']:
if 'baseMetricV2' in cve['impact']:
CVE_pubDate_CVSS.append(list([cve['publishedDate'],cve['impact']['baseMetricV2']['cvssV2']['baseScore']]))
if 'baseMetricV3' in cve['impact']:
CVE_pubDate_CVSS.append(list([cve['publishedDate'],cve['impact']['baseMetricV3']['cvssV3']['baseScore']]))
return CVE_pubDate_CVSS
def getCVSSbyYear(lst_pubdateAndCvss,year):
lst_pubDateANDcvssBYyear = []
for pubDateandcvss in lst_pubdateAndCvss:
if year in pubDateandcvss[0]:
lst_pubDateANDcvssBYyear.append(pubDateandcvss)
return lst_pubDateANDcvssBYyear
def getCVSSbyMultipleYears(lst_pubdateAndCvss,lst_years):
lst_pubDateANDcvssBYmultipleYears = []
for pubDateandcvss in lst_pubdateAndCvss:
for year in lst_years:
if year in pubDateandcvss[0]:
lst_pubDateANDcvssBYmultipleYears.append(pubDateandcvss)
return lst_pubDateANDcvssBYmultipleYears
def getCVSSavgByMonth(lst_pubdate_cvss_oneYear_oneMonth):
cvssSum = 0.0
cvssCount = 0
for pubDate_cvss in lst_pubdate_cvss_oneYear_oneMonth:
#print("sum "+str(cvssSum)+" & count "+str(cvssCount)+" of "+pubDate_cvss[0])
cvssSum += pubDate_cvss
cvssCount += 1
if cvssCount != 0: #todo check if cve count for this month is not zero
return cvssSum / cvssCount
else:
return 0
# we suppose we already have the json file, and have performed the necessary queries to come to this point
# but instead of now computing the avg cvss score per month for this year, we are going to count the cve's by their score
# dat struct: lst_pubDateANDcvssBYyear = [[pubdate,cvss],[pubdate,cvss]...]
# 0-1 1-2 2-3 3-4 4-5 5-6 6-7 7-8 8-9 9-10
def getCVSSCountBracketsByYear(lst_pubDateANDcvssBYyear):
lst_CVSS_count_brackets = [0]*10 # the first bracket contains the count for 0 to 1, the second bracket 1 to 2, ...
# for pubdateANDcvss in lst_pubDateANDcvssBYyear:
# if pubdateANDcvss[1]<=0:
# lst_CVSS_count_brackets[0]+=1
# else:
# lst_CVSS_count_brackets[int(np.floor(pubdateANDcvss[1]-0.0001))] += 1
# if pubdateANDcvss[1] > 4 and pubdateANDcvss[1] <=5:
# print(pubdateANDcvss[1])
# print(int(np.floor(pubdateANDcvss[1]+0.0001)))
# return lst_CVSS_count_brackets
for pubdateANDcvss in lst_pubDateANDcvssBYyear:
cvss = pubdateANDcvss[1]
if cvss >= 0 and cvss <= 1:
lst_CVSS_count_brackets[0] += 1
if cvss > 1 and cvss <= 2:
lst_CVSS_count_brackets[1] += 1
if cvss > 2 and cvss <= 3:
lst_CVSS_count_brackets[2] += 1
if cvss > 3 and cvss <= 4:
lst_CVSS_count_brackets[3] += 1
if cvss > 4 and cvss <= 5:
lst_CVSS_count_brackets[4] += 1
if cvss > 5 and cvss <= 6:
lst_CVSS_count_brackets[5] += 1
if cvss > 6 and cvss <= 7:
lst_CVSS_count_brackets[6] += 1
if cvss > 7 and cvss <= 8:
lst_CVSS_count_brackets[7] += 1
if cvss > 8 and cvss <= 9:
lst_CVSS_count_brackets[8] += 1
if cvss > 9 and cvss <= 10:
lst_CVSS_count_brackets[9] += 1
return lst_CVSS_count_brackets
def getAverageCVSScountByMonth(lst_pubdate_cvss_oneYear):
lst_avgCVSSbyMonth = []
JAN = []
FEB = []
MAR = []
APR = []
MAY = []
JUN = []
JUL = []
AUG = []
SEP = []
OCT = []
NOV = []
DEC = []
MONTHS = [JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]
for pubDateandcvss in lst_pubdate_cvss_oneYear:
month = pubDateandcvss[0].split('-')[1]
if month == '01':
JAN.append(pubDateandcvss[1])
elif month == '02':
FEB.append(pubDateandcvss[1])
elif month == '03':
MAR.append(pubDateandcvss[1])
elif month == '04':
APR.append(pubDateandcvss[1])
elif month == '05':
MAY.append(pubDateandcvss[1])
elif month == '06':
JUN.append(pubDateandcvss[1])
elif month == '07':
JUL.append(pubDateandcvss[1])
elif month == '08':
AUG.append(pubDateandcvss[1])
elif month == '09':
SEP.append(pubDateandcvss[1])
elif month == '10':
OCT.append(pubDateandcvss[1])
elif month == '11':
NOV.append(pubDateandcvss[1])
elif month == '12':
DEC.append(pubDateandcvss[1])
else:
print('wtf are you doing, wrong date format????')
for month in MONTHS:
lst_avgCVSSbyMonth.append(getCVSSavgByMonth(month))
#print(getCVSSavgByMonth(month))
return lst_avgCVSSbyMonth
def getCVEID_cnt_byYear(CVEIDs):
dct_cntByCVE_ID = {}
lst_cveStr_prefixes = ['CVE-199','CVE-200','CVE-201','CVE-202']
for cveID in CVEIDs:
#if cveStr1 in cveID:
for prefix in lst_cveStr_prefixes:
for i in range(10):
if prefix+str(i) in cveID:
if prefix+str(i) in dct_cntByCVE_ID:
dct_cntByCVE_ID[prefix+str(i)] += 1
else:
dct_cntByCVE_ID.update({prefix+str(i):1})
# for k,v in dct_cntByCVE_ID.items():
# print(k,'->',v)
return dct_cntByCVE_ID
def getCVE_count_by_pubdateYear(jsonCVEdata, lst_years):
dct_cntBypubDateyear = {}
for year in lst_years:
for cve in jsonCVEdata['CVE_Items']:
if str(year) in cve['publishedDate']:
if str(year) in dct_cntBypubDateyear:
dct_cntBypubDateyear[str(year)] += 1
else:
dct_cntBypubDateyear[str(year)] = 1
return dct_cntBypubDateyear
def sortIntStrings(lst_stringsThatAreInts):
numbers = [int(x) for x in lst_stringsThatAreInts]
return sorted(numbers)