-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdictionary.py
More file actions
123 lines (92 loc) · 3.54 KB
/
dictionary.py
File metadata and controls
123 lines (92 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class Dictionary(object):
"""Dictionary is a custom words dictionary.
It extends standard Python dictionary where keys are in a form of
morphed form of original words and values are sets of original words
matching morphed ones.
Example:
Word ``dog`` will have 3 morphed values: '_og', 'd_g' and 'do_'.
Dictionary object with one word will look as following:
dictionary = {
'_og': set(['dog']),
'd_g': set(['dog']),
'do_': set(['dog']),
}
Thanks to that it's relatively easy finding all corresponding words
that are 'one letter' away.
Keep in mind that Dictionary objects keeps all words as a lowercase
only and as a principle the words comparisions are case insensitive.
"""
def __init__(self):
self._dictionary = {}
def insert(self, word):
"""Insert word into a Dictionary object.
For each morphed word assign original word.
:param: word: Word to be inserted
:type: string
"""
for morphed_word in self.morph_word(word):
wordset = self._dictionary.setdefault(morphed_word, set([]))
wordset.add(word)
def is_real_word(self, word):
"""Check if the word is in the dictionary.
Instead of keeping set of original words see if original word is
in values
assigned to any of the morphed_word.
:param word: Word to be checked
:type: string
:returns: False if word found in the dictionary, False otherwise
"""
for morphed_word in self.morph_word(word):
if (morphed_word not in self._dictionary or
word not in self._dictionary[morphed_word]):
return False
return True
def get_corresponding_words(self, word):
"""Get all words that have only one letter changed."
Iterate over all 'morphed' words and return a set of values tied to
each of the morphed value.
:param word: starting word
:returns: set of words 'one letter away' from asking word
:rtype: set
"""
result = set([])
for morphed_word in self.morph_word(word):
if self.is_real_word(word) is False:
break
for corresponding_word in self._dictionary[morphed_word]:
result.add(corresponding_word)
return result
@staticmethod
def morph_word(word):
"""Generate morphed combinations of the word
Return a generator that for each letter replaces it with a wildcard '_'
sign and yields new word to a caller.
For word 'marek' values: _arek, m_rek, ma_ek, mar_k, mare_
will be yielded.
:param: word to be morphed
:type: string
:returns: a generator yielding morphed combinations
:rtype: generator
"""
for i in range(len(word)):
yield word[0:i] + '_' + word[i+1:]
@staticmethod
def build(dict_file):
"""Build a dictionary from a file
:param: dict_file: filename of the input filename
:type: string
:returns: Dictionary with populated words
:rtype: Dictionary
"""
def handleFile(self):
try:
with open(dict_file, 'r') as f:
for line in f:
yield line.lower().strip('\n\t')
except IOError as e:
raise SystemExit(e)
d = Dictionary()
words = handleFile(dict_file)
for word in words:
d.insert(word)
return d