This repository has been archived by the owner on Jul 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathprocess_log.py
executable file
·164 lines (129 loc) · 6.07 KB
/
process_log.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
process_log.py
process log files generated by clever.py
Copyright (C) 2017-2018, IBM Corp.
Copyright (C) 2017, Lily Weng <[email protected]>
and Huan Zhang <[email protected]>
This program is licenced under the Apache 2.0 licence,
contained in the LICENCE file in this directory.
"""
import os, glob
import sys
import scipy
import scipy.io as sio
from scipy.stats import weibull_min
import numpy as np
import pandas as pd
import argparse
import matplotlib.pyplot as plt
def readDebugLog2array(filename):
f = open(filename)
# last 3 lines are L0 verbosity, the first line is comment
#lines = f.readlines()[0:-3]
lines = f.readlines()[0:] # read all the lines and check if belongs to [DEBUG][L1] below
# if the files are ended with "_grep"
#lines = f.readlines()
f.close()
data_arr = {"[DEBUG][L1] id":[],"true_label":[],"target_label":[],"info":[],"bnd_norm":[],
"bnd":[],"ks":[],"pVal":[],"shape":[],"loc":[],"scale":[], "g_x0":[]}
#Ncols = len(lines[0].strip().split(','));
for line in lines:
# split ',' into columns
subline = line.strip().split(',')
#print(subline)
#print('-reading lines-')
# only save the info when the line is [DEBUG][L1]
if subline[0].split('=')[0] == '[DEBUG][L1] id ':
for elems in subline:
temp = elems.split('=');
key = temp[0].strip()
val = temp[1].strip()
# save key and val to array
data_arr[key].append(val)
return data_arr
table_results = {}
if __name__ == "__main__":
# parse command line parameters
parser = argparse.ArgumentParser(description='Process experiment data.')
parser.add_argument('data_folder', nargs='+', help='log file(s) or directory')
parser.add_argument('--save_pickle',
action='store_true',
help='save result to pickle')
args = vars(parser.parse_args())
print(args)
# process all the log files in the folder
flag_process_all_dir = False
# save result to pickle
is_save2pickle = args['save_pickle']
files = []
# the argument is a list of paths
for path in args['data_folder']:
# if the path is a directory, look for all log files inside
if os.path.isdir(path):
files.extend(glob.glob(os.path.join(path, "*.log")))
# if the path is a file, include it directly
else:
files.append(path)
print(files)
for file in files:
# datas is a dictionary
datas = readDebugLog2array(file)
# convert the dictionary to dataframe df
df = pd.DataFrame.from_dict(datas)
# convert the string type columns to numeric
df['bnd'] = pd.to_numeric(df['bnd'])
df['g_x0'] = pd.to_numeric(df['g_x0'])
df['ks'] = pd.to_numeric(df['ks'])
df['loc'] = pd.to_numeric(df['loc'])
df['pVal'] = pd.to_numeric(df['pVal'])
df['scale'] = pd.to_numeric(df['scale'])
df['shape'] = pd.to_numeric(df['shape'])
tag1 = os.path.basename(file).split('_')[1]
# cifar, mnist, imagenet
if (tag1 == 'cifar') or (tag1 == 'mnist'):
modelName = tag1 + '_' + os.path.basename(file).split('_')[2].split('.')[0]
else:
modelName = tag1
if modelName not in table_results:
nan = float('nan')
table_results[modelName] = {'least': {'1': [nan,nan], '2': [nan,nan], 'i': [nan,nan]},
'random':{'1': [nan,nan], '2': [nan,nan], 'i': [nan,nan]},
'top2': {'1': [nan,nan], '2': [nan,nan], 'i': [nan,nan]}}
for label in ['least','random','top2']:
for bnd_norm in ['1','2','i']:
df_out = df[(df["info"]==label) & (df["bnd_norm"]==bnd_norm)]
if not df_out.empty:
out_name = 'pickle_'+modelName+'_'+label+'_norm'+bnd_norm
if is_save2pickle:
# save selected df to pickle files
df_out.to_pickle(out_name)
# obtain statistics and print out
descrb_0 = df_out.describe()
descrb_1 = df_out[(df_out["pVal"]>0.05)&(df_out["shape"]<1000)].describe()
bnd_0 = descrb_0["bnd"]["mean"]
count_0 = descrb_0["bnd"]["count"]
bnd_1 = descrb_1["bnd"]["mean"]
count_1 = descrb_1["bnd"]["count"]
table_results[modelName][label][bnd_norm][0] = bnd_1
table_results[modelName][label][bnd_norm][1] = count_1 * 100.0 / count_0
print("[L0] model = {}, Nimg = {}, bnd_avg = {:.5g}, pVal>0.05 & shape<1000 gives"
"Nimg = {}, bnd_avg = {:.5g}, useable = {:.1f} %".format(out_name,count_0,bnd_0,count_1,bnd_1,count_1/count_0*100))
# print out table for easy pasting to LaTeX
output = sys.stdout
print('Generating LaTeX table...')
order=['mnist_2-layer', 'mnist_normal', 'mnist_distilled', 'mnist_brelu', 'cifar_2-layer', 'cifar_normal', 'cifar_distilled', 'cifar_brelu', 'inception', 'resnet', 'mobilenet']
def gen_table(elem_index, precision=3):
for label in ['least','random','top2']:
output.write("{:15s} &\t{:7s} &\t{:7s} &\n".format(label, '2', 'i'))
for model in order:
if model in table_results:
output.write("{:15s} &\t".format(model))
for bnd_norm in ['2','i']:
output.write(("{:7."+str(precision)+"f} &\t").format(table_results[model][label][bnd_norm][elem_index]))
output.write("\n")
print('\n%%% Table for bounds %%%')
gen_table(0)
print('\n%%% Table for p-values %%%')
gen_table(1,1)