-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathk_simplex2vec.py
186 lines (151 loc) · 5.63 KB
/
k_simplex2vec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import gudhi
import numpy as np
from numpy import matlib
import random
import gensim
from gensim.models import Word2Vec
import scipy.sparse
# Some function that will be useful for the rest of the code
def signed_faces(s):
## returns the faces of the simplex s
ret = []
for i in range(0, len(s)):
ret.append(((-1)**i, s[0:i] + s[i+1:len(s)]))
return ret
def signed_cofaces(s, cplx):
# returns all cofaces (of codim 1) of simplex s
return [(sign(s, x), x) for (x, eps) in cplx.get_cofaces(s, 1)]
def sign(s, t): # Sign of s in t.
if len(t) != len(s) + 1:
return None
for i in range(0, len(s)):
if s[i] != t[i]:
return (-1)**i
return (-1)**len(s)
def hacky_get_idx(s, cplx):
# Get index of the simplices
i = cplx.filtration(s)
assert(i.is_integer())
return int(i)
def assemble(cplx, k, scheme = "uniform", laziness = None):
## Assmeble the transition matrix
# We are using this incredibly ugly hack to store the indices of the simplices
# as filtration values since keys are not accessible
# through the GUDHI Python API.
assert(cplx.num_simplices() < 16777217)
assert(scheme in ["uniform", "uniform-lazy", "uniform-multicount"])
if scheme == "uniform-lazy":
assert(laziness is not None)
assert(laziness >= 0 and laziness <= 1)
simplices = [s for (s, eps) in cplx.get_filtration()]
ordering = []
N = 0
for s in simplices:
if len(s) == k + 1:
cplx.assign_filtration(s, float(N))
ordering.append(s)
N += 1
else:
cplx.assign_filtration(s, np.inf)
cplx.initialize_filtration()
row_inds = []
col_inds = []
data = []
for (s, i) in cplx.get_filtration():
if i >= N:
break
assert(i.is_integer())
i = int(i)
# uniform, uniform-lazy, uniform-multicount
if scheme.startswith("uniform"):
s_faces = signed_faces(s)
s_cofaces = signed_cofaces(s, cplx)
s_up = []
for (a, t) in s_cofaces:
s_up += [(-a*b, u) for (b, u) in signed_faces(t)]
s_down = []
for (a, t) in s_faces:
s_down += [(-a*b, u) for (b, u) in signed_cofaces(t, cplx)]
## We are not considering orientations so we set all signs to 1
s_up = [(1, t) for (foo, t) in s_up]
s_down = [(1, t) for (foo, t) in s_down]
if scheme == "uniform-multicount":
s_neigh_idxs = [(a, hacky_get_idx(t, cplx)) for (a,t) in s_down + s_up]
else:
s_neigh_idxs = list(set([(a, hacky_get_idx(t, cplx)) for (a,t) in s_down + s_up]))
if scheme == "uniform-lazy":
if len(s_neigh_idxs) == 1:
probs = 0.0
else:
num_self_neigh = 0
for (sgn, j) in s_neigh_idxs:
if j == i:
num_self_neigh += 1
probs = (1.0-laziness)/(len(s_neigh_idxs) - num_self_neigh)
else:
probs = 1.0/len(s_neigh_idxs)
for (sgn, j) in s_neigh_idxs:
row_inds.append(i)
col_inds.append(j)
if scheme == "uniform-lazy" and j == i:
data.append(laziness)
else:
data.append(probs)
return scipy.sparse.csr_matrix((data, (row_inds, col_inds)), shape=(N, N))
def walk(smplx, walk_length, P):
## Performs a single random walk of fixed length starting at smplx
# smplx = starting simplex of the random walk
# P = precomputed transition matrix on the complex containing smplx
# walk_length = length of the random walk
c= np.arange(P.shape[0])
RW= []
RW.append(smplx)
for i in range(walk_length):
smplx=np.random.choice(c,size =1, p=P[smplx])[0]
RW.append(smplx)
return(RW)
def RandomWalks(walk_length, number_walks, P, seed = None):
## Performs a fixed number of random walks at each $k$-simplex
Walks=[] ## List where we store all random walks of length walk_length
for i in range(number_walks):
for smplx in range(P.shape[0]):
Walks.append(walk(smplx, walk_length, P))
if seed != None:
np.random.seed(seed)
np.random.shuffle(Walks)
else:
np.random.shuffle(Walks)
return Walks
def save_random_walks(Walks, filename):
## Writes the walks in a .txt file
file = open(filename, "a")
for walk in Walks:
L = str(walk).replace(',', '')[1:-1]+"\n"
file.write(L)
file.close()
def load_walks(filename):
## Loads a file with precomputed random walks
file = open(filename, 'r')
lines = file.readlines()
walks= list()
for line in lines:
walk = list()
line = line[0:-1]
newline= line.split('], [')
for el in newline:
step = [int(s) for s in el.split(', ')]
walk.append(step)
walks.append(walk[0])
return walks
def Embedding(Walks, emb_dim, epochs =5 ,filename ='k-simplex2vec_embedding.model'):
## Performs the embedding of the $k$-simplices using the gensim word2vec package
walks_str=[]
for i in range(len(Walks)):
ls_temp =[]
for j in range(len(Walks[i])):
string = str(Walks[i][j]).replace(' ','')
ls_temp.append(string)
walks_str.append(ls_temp)
model = Word2Vec(walks_str, size=emb_dim, window = 3, min_count=0, sg=1, workers=1, iter=epochs)
model.save(filename)
return model