-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstrings.py
More file actions
162 lines (114 loc) · 3.61 KB
/
strings.py
File metadata and controls
162 lines (114 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import sys
sys.setrecursionlimit(30000)
from decorators import memoize
@memoize
def lcs_r(a, b):
# find the longest common subsequence of a and b
if not a or not b:
return 0
elif a[-1] == b[-1]:
return 1 + lcs_r(a[:-1], b[:-1])
else:
return max(lcs_r(a[:-1], b), lcs_r(a, b[:-1]))
# print(lcs_r("hellooaneihnoeatihhoaihnoahitnoaehtinhoaeioeathihnaoeihoeasihoeanihsoahsi", "mellowooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"))
def pattern_match_exact_naive(text, pattern):
segments = len(text) - len(pattern) + 1
matches = []
for i in range(segments):
if text[i:i + len(pattern)] == pattern:
matches.append(i)
return matches
# print(pattern_match_exact_naive('bananas', 'an'))
def pattern_match_multi_exact_naive(text, patterns):
matches = {}
for p in patterns:
matches[p] = pattern_match_exact_naive(text, p)
return matches
# print(pattern_match_multi_exact_naive('bananas', ['an', 'na']))
def prefix_trie_from(patterns):
""" Form a Prefix Trie from Patterns """
trie = {}
for pattern in patterns: # insert each pattern into the trie
node = trie
for c in pattern:
if c not in node: # add char node to trie, otherwise reuse shared
node[c] = {}
node = node[c]
node['$'] = None # detects patterns that are prefixes of others
return trie
def trie_match(prefix, trie):
for c in prefix:
if trie:
if c in trie:
trie = trie[c]
else:
return False
else:
return True
return True
def trie_match_multi(text, patterns): # exact
trie = prefix_trie_from(patterns)
match = []
# run each prefix of text through the trie
for start in range(len(text)):
match.append(trie_match(text[start:], trie))
#print(text[start], sep=' ')
return match
# print(trie_match_multi('panamabananas', ['banana', 'pan', 'and', 'nab', 'antenna', 'bandana', 'ananas', 'nana']))
def suffix_trie_text(text):
from collections import deque
text = deque(text)
trie = {}
start_pos = 0
while text:
pos = trie
for i, c in enumerate(text):
if c not in pos:
pos[c] = {}
if i == len(text) - 1: # last character
pos[c]['.'] = start_pos
pos = pos[c]
text.popleft()
start_pos += 1
return trie
# print(suffix_trie_text('abc'))
def num_leaves(tree):
if type(tree) is not dict:
return 1
else:
return sum(map(num_leaves, tree.values()))
def flatten(matrix):
return [item for row in matrix for item in row]
def leaf_values(tree):
if type(tree) is not dict:
return [tree]
else:
return flatten([leaf_values(x) for x in tree.values()])
def suffix_trie_match(trie, pattern):
for c in pattern:
if c in trie:
trie = trie[c]
else:
return []
return leaf_values(trie)
def suffix_trie_match_patterns(text, patterns):
trie = suffix_trie_text(text)
matches = {}
for p in patterns:
matches[p] = suffix_trie_match(trie, p)
print(trie)
return matches
print(suffix_trie_match_patterns('banana', ['a']))
# Naive Burrows-Wheeler Transform
def bwt_naive(s):
s += '$'
rotations = []
for i in range(len(s)):
rotations.append(s[i:] + s[:i])
rotations.sort()
s_bwt = ''.join([s[-1] for s in rotations])
return s_bwt
print(bwt_naive('banana'))
def bwt_inverse_naive(s):
c_first, c_last = sorted(s), s
pass