-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataViz.py
71 lines (44 loc) · 1.97 KB
/
dataViz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import matplotlib.pyplot as plt
from loadData import ImportData
import seaborn as sns
# Data Processing
headers = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
"occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
"hours-per-week", "native-country", "goal"]
csv = 'http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
missing_value = ' ?'
data = ImportData(headers=headers, csv=csv, missing_value=missing_value, remove_goal=True, limiter_150=True)
class Viz():
def __init__(self, headers, csv, missing_value, data, limiter_150=False):
sns.set()
self.headers = headers
self.csv = csv
self.missing_value = missing_value
self.data = data
self.limiter_150 = limiter_150
def heatMap(self):
corr = data.df.corr()
sns.heatmap(self.data.df.corr(),
xticklabels=corr.columns.values,
yticklabels=corr.columns.values)
plt.show()
def scatterPlotTwoFeatures(self, x_label, y_label):
if self.limiter_150 == False:
raise ValueError("Limiter_150 must be True")
if not isinstance(x_label, str) or not isinstance(y_label, str):
raise ValueError("x_Label and y_Label must be of type String")
plt.scatter(self.data.df[x_label], self.data.df[y_label], c=["red", "blue"], label=["No", "Yes"])
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
def pairPlot(self):
sns.pairplot(data.df)
plt.show()
def histo(self, col):
sns.distplot(self.data.df[col])
plt.show()
def kernelDensity(self, col):
sns.distplot(self.data.df[col], hist=False, rug=True)
plt.show()
vizTest = Viz(headers=headers, csv=csv, missing_value=missing_value, data=data, limiter_150=True)
#vizTest.heatMap()