-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_emotion_graph.py
181 lines (147 loc) · 6.34 KB
/
make_emotion_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# 종교별로 시간 흐름에 따른 감성 수치 변화 그래프
# sentiment flow graph by time per religion
from matplotlib import font_manager, rc
import matplotlib.pyplot as plt
import numpy as np
import math, csv, json
dirname = "tf"
keyword = "모병제"
def append_dict(d1, d2): # dictionary + dictionary
for d in d1.keys():
if d in d2.keys():
for i in d1[d]: d2[d][i] = d1[d][i]
else: d2[d] = d1[d]
return d2
# csv 파일 생성
def makeCSV(tablename, y):
global dirname
f = open("./ttest/"+dirname+"/"+tablename+'.csv','w', newline='')
wr = csv.writer(f)
for e in y:
wr.writerow([e])
# 시간 흐름에 따른 감성 지수 그래프
def make_graph_flow(tablename, x, y,fig, graph_title = "Sentiment Graph"):
global dirname
plt.figure(fig, figsize=(18, 5))
font_name = font_manager.FontProperties(fname='./font/KoPubDotumMedium.ttf', size=20).get_name()
rc('font', family=font_name)
plt.title(graph_title,fontsize=25)
a =0
nx=[]
for s in range(len(x)):
nx.append(s)
a+=1
nx = np.array(nx)
ny = np.array(y)
plt.plot(nx, ny, 'bo', color='#2E2EFE', label="Sentiments" )
# make month x label
month_list, temp, year_temp = [], "", ''
for month in x:
month, year = month[4:6], month[2:4]
if temp != month :
temp = month
month_list.append(month)
if year_temp != year:
month_list[-1] = year+"/"+ month_list[-1]
year_temp = year
else: month_list.append("")
# 홀수 월만 표시
mlist = [a for a in month_list]
month_list = []
for mm in mlist:
if mm:
if int(mm.split("/")[-1]) % 2 == 1:
month_list.append( mm )
else: month_list.append("")
plt.ylim([0.0, 0.3])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Sentiment',fontsize=18)
plt.xticks(rotation=40,fontsize=15)
plt.yticks(fontsize=16)
plt.xticks(range(0,len(month_list)), month_list)
plt.legend()
plt.savefig("./graph/"+dirname+"/"+tablename+'-emotion-flow.png', dpi=400)
return 0
# json 파일 읽어서 자료구조 생성
def makeValue(data):
result = []
table_emo_count, table_emo_avg= 0, 0.0 # 감성 개수, 감성 총 합
for date in data.keys():
for art in data[date].keys():
table_emo_count += len(data[date][art]['emotions'])
for data_date in data.keys():
table_emo_avg += sum([ (sum(data[data_date][art]['emotions'])/(len(data[data_date][art]['emotions'])+1))\
*(len(data[data_date][art]['emotions'])/ table_emo_count) \
for art in data[data_date].keys() ]) #날짜당 감성 평균, 가중 평균
for date in data.keys(): #모든 날짜
day_emo_count = sum([len(data[date][artc]['emotions']) for artc in data[date].keys()]) #날짜당 총 댓글 수 / all comment per day
if day_emo_count ==0: continue
emotion, isInput = 0, True
day_emo_count = sum([ len(data[date][art]['emotions']) for art in data[date].keys() ]) #날짜당 총 댓글 수
for article in data[date].keys(): #모든 기사
if len(data[date])==1 and len(data[date][article]['emotions']) <1: # 기사가 1개 and 댓글이 0개인 경우 > 입력 안함
isInput=False
break
emotionList= data[date][article]['emotions']
if emotionList == []: continue
emotion_avg =sum(emotionList) / len(emotionList) #기사당 감성 평균 / emotion average per article
emotion += (len(emotionList)/day_emo_count) * emotion_avg # 날짜당 기사 가중 평균 / article emotion weighted average per day
least = 108 # 최소 댓글 수 = 기사당 평균 댓글 수로 설정
# 공식 참고 : https://www.quora.com/How-does-IMDbs-rating-system-work
emotion_ = (day_emo_count/(day_emo_count+least))*emotion + (least/(day_emo_count+least))*table_emo_avg
d_y, d_m, d_d = int(date[:4]), int(date[4:6]), int(date[6:])
if isInput: result.append([date, round(emotion_, 6), d_y, d_m, d_d ])
result_sort_day = sorted(result, key = lambda x: (x[2], x[3], x[-1])) # 정렬 : 1순위 년도, 2순위 월, 3순위 일
x,y = [],[]
for val in result_sort_day:
x.append(val[0])
y.append(val[1])
return x, y
def calc_mean_std(data):
import numpy as np
return round(np.mean(data), 4), round(np.std(data), 4)
fig = 0
avg_x, avg_y = [], []
# for tableList in tlist:
data = []
# 댓글 데이터 json 파일 저장 경로
path = "./data/predict-data/"
json_name = "tf_predict_volunteer_comment_data"
json_name = "kobert_predict_volunteer_comment_data"
with open(path+json_name+'.json', encoding="utf-8") as json_file:
data_ = json.load(json_file)
data = makeValue(data_)
x, y = data[0], data[1]
print(x[:10], y[:10])
# 감성 평균, 표준편차 text 파일 생성
with open( "./graph/"+dirname+"/"+json_name+"_stats.txt", "at", encoding="utf-8" ) as f:
title = keyword+" 감성 통계"
f.write(title+"\n")
mean, std = calc_mean_std(y)
f.write("avg: "+str(mean)+" std: "+str(std)+"\n")
f.write("--"*10+"\n")
# 그래프 생성
make_graph_flow(json_name, x, y, fig, graph_title = "KoBERT Sentiments Flow Graph")
with open( "./graph/"+dirname+"/"+json_name+"year_stats.txt", "at", encoding="utf-8" ) as f:
title = keyword+" 구간별 감성 통계"
f.write(title+"\n")
year_dict = {2018:[], 2019:[], 2020:[]}
# 년도 구분
for year, sents in zip(x, y):
y, m = int(year[:4]), int(year[4:6])
if y == 2018 :
year_dict[2018].append( sents )
elif y == 2019 and m <= 6:
year_dict[2018].append( sents )
elif y==2019 and m > 6:
year_dict[2019].append( sents )
elif y==2020 and m <= 6:
year_dict[2019].append( sents )
elif y==2020 and m > 6:
year_dict[2020].append( sents )
else: year_dict[2020].append( sents )
for years in year_dict.keys(): #년도별 평균 및 표준편차 계산
mean, std = calc_mean_std( year_dict[years] )
f.write(str(years)+" : avg: "+str(mean)+" std: "+str(std)+"\n")
# CSV 파일 생성
# makeCSV(keyword, y)