-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhw2.py
112 lines (103 loc) · 3.49 KB
/
hw2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import fileinput, random, time
def concat(*args):
result = ''
for arg in args:
result = _concat(result, arg)
return result
def _concat(a, b):
la = len(a)
lb = len(b)
for i in range(la):
j = i
k = 0
while j < la and k < lb and a[j] == b[k]:
j += 1
k += 1
if j == la:
n = k
break
else:
n = 0
return a + b[n:]
def _overlap(m, n):
result = {}
prefix = ""
suffix = ""
overlap = 0
for i in xrange(len(m)):
for j in xrange(len(n)):
current_overlap = 0
if(m[i] == n[j]):
current_overlap = current_overlap + 1
while((i + current_overlap) < len(m)-1) and ((j + current_overlap) < len(n)-1):
if(m[i + current_overlap] == n[j + current_overlap]):
current_overlap = current_overlap + 1
else:
break
if(current_overlap > overlap):
overlap = current_overlap
if(i < j):
prefix = n
suffix = m
else:
prefix = m
suffix = n
result['overlap'] = overlap
result['prefix'] = prefix
result['suffix'] = suffix
return result
def gen_strings(input):
strings = []
random.seed()
size = random.randint(2, len(input))
sample = random.randint(20, len(input)+100)
print("Fragment size: " + str(size) + ", " + str(sample) + " fragments")
print("Coverage: " + str(float((sample * size))/float(len(input))))
while sample > 0:
temp_index = random.randint(0, len(input))
if(temp_index + size) < len(input):
strings.append(input[temp_index:temp_index + size])
sample = sample - 1
return strings
def duplicates(frags):
unique = {}
for frag in frags:
keys = unique.keys()
if(frag in keys):
unique[str(frag)] = unique[str(frag)] + 1
else:
unique[str(frag)] = 1
return unique
if __name__ == '__main__':
for line in fileinput.input():
start_time = time.time()
random.seed()
strings = gen_strings(line)
result = duplicates(strings).keys()
print("Unique fragments(" + str(len(result)) + "): " + str(result))
while(len(result) > 1):
stringm = ""
stringn = ""
overlap = 0
for r in result:
for s in result:
if(r != s):
temp = _overlap(r, s)
if(temp['overlap'] >= overlap):
overlap = temp['overlap']
stringm = temp['prefix']
stringn = temp['suffix']
result.append(concat(stringm, stringn))
if(len(stringm) > 0):
result.remove(stringm)
if(len(stringn) > 0):
result.remove(stringn)
print("Superstring: " + str(result[0]) + ", Length: " + str(len(result[0])))
print("Time to execute: " + str(time.time() - start_time) + ", Length of input: " + str(len(line)))
accuracy = 0
superstring = result[0]
for i in xrange(0, len(superstring)):
if(i < len(line)):
if(superstring[i] == line[i]):
accuracy = accuracy + 1
print("Accuracy: " + str(float(accuracy)/float(len(line))))