-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbigrams.py
More file actions
30 lines (22 loc) · 798 Bytes
/
bigrams.py
File metadata and controls
30 lines (22 loc) · 798 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from dicts import DefaultDict
# Given a list of chars or words, returns a dictionary of dictionaries,
# containing occurrence counts of bigrams.
def bigrams(LIST):
d = DefaultDict(DefaultDict(0))
for (w1, w2) in zip([None] + LIST, LIST + [None]):
d[w1][w2] += 1
return d
# returns a dictionary containing the count for each char/word (unigram) in the file
def unigrams(LIST):
d = DefaultDict(0)
for item in LIST:
d[item] += 1
return d
def file2bigrams_letter(filename):
return bigrams(list(open(filename).read()))
def file2unigrams_letter(filename):
return unigrams(list(open(filename).read()))
def file2bigrams_word(filename):
return bigrams(open(filename).read().split())
def file2unigrams_word(filename):
return unigrams(open(filename).read().split())