-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_analysis.py
More file actions
201 lines (173 loc) · 7.22 KB
/
word_analysis.py
File metadata and controls
201 lines (173 loc) · 7.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# @Author : Edlison
# @Date : 11/16/20 15:24
import os
from compiler_exception import WordAnalyseException
# Token表
class WordToken:
def __init__(self, index: int, name: str, code: int, addr=0):
"""
初始化词表
Args:
index: 编号
name: 名字
code: 编号
addr: 在符号表中的位置(如果不是标识符或常数则为0)
Returns:
@Author : Edlison
@Date : 11/16/20 20:13
"""
self.index = index
self.name = name
self.code = code
self.addr = addr
def __str__(self):
return 'index: {} \t name: {} \t code: {} \t addr: {}'.format(self.index, self.name, self.code, self.addr)
# 符号表
class WordSymbol(object):
def __init__(self, index: int, name: str, type: int):
"""
初始化符号表
Args:
index: 编号
name: 名字
type: 类型(0标识符,1常数)
Returns:
@Author : Edlison
@Date : 11/16/20 20:14
"""
self.index = index
self.name = name
self.type = type
def __str__(self):
return 'index: {} \t name: {} \t type: {}'.format(self.index, self.name, '标识符' if self.type == 0 else '常数')
class WordAnalyzer:
def __init__(self, table, input):
self.word_token = []
self.word_symbol = []
self.text = ''
self.table = {}
self._load_table(table)
self._load_input(input)
self._analyse()
def _load_input(self, path):
if not os.path.exists(path):
raise WordAnalyseException('文件不存在')
with open(path) as f:
text = f.read()
self.text = text
def _load_table(self, path):
if not os.path.exists(path):
raise WordAnalyseException('文件不存在')
with open(path) as f:
table = f.read()
table.strip()
self.table = eval(table)
def _is_keyword(self, s) -> bool:
if not self.table.get(s):
return False
if 0 <= self.table[s] < 25:
return True
else:
return False
def _is_op(self, s):
if not self.table.get(s):
return False
if 24 < self.table[s] < 39:
return True
else:
return False
def _is_delimiter(self, s):
if not self.table.get(s):
return False
if 43 < self.table[s] < 55:
return True
else:
return False
def _is_integer(self, s):
point_num = 0
for ch in s:
if ch == '.':
point_num += 1
if point_num is 0 and ('A' <= ch <= 'Z' or 'a' <= ch <= 'z'):
raise WordAnalyseException('常数中出现字母')
if point_num is 1 and ('A' <= ch <= 'Z' or 'a' <= ch <= 'z'):
raise WordAnalyseException('常数的小数部分出现字母')
if point_num is 2:
raise WordAnalyseException('出现两个小数点')
if point_num == 0:
return True
else:
return False
def _get_code(self, s):
if self.table.get(s):
return self.table[s]
def _is_exist(self, s):
for item in self.word_symbol:
if item.name == s:
return True
return False
def _get_addr(self, s):
for item in self.word_symbol:
if item.name == s:
return item.index
raise WordAnalyseException('未在常数表中出现')
def _analyse(self):
text_in_line = self.text.split('\n')
for line in text_in_line:
i = 0
while i < len(line):
if line[i] != ' ': # 字符不为空时继续
word = '' # 初始化当前word
if 'A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z': # word 以 字母开头
while i < len(line) and ('A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z' or '0' <= line[i] <= '9'):
word += line[i]
i += 1
if self._is_keyword(word): # 判断是不是关键字 不是的话只能是标识符
self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
else: # 是标识符
if not self._is_exist(word): # 标识符表中没有的话加入
self.word_symbol.append(WordSymbol(len(self.word_symbol), word, 0))
self.word_token.append(WordToken(len(self.word_token), word, self._get_code('id'), self._get_addr(word)))
elif '0' <= line[i] <= '9': # word 以 数字开头
while i < len(line) and ('0' <= line[i] <= '9' or line[i] == '.' or 'A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z'):
word += line[i]
i += 1
if self._is_integer(word): # 整数
if not self._is_exist(word):
self.word_symbol.append(WordSymbol(len(self.word_symbol), word, self._get_code(1)))
self.word_token.append(WordToken(len(self.word_token), word, self._get_code('整型'), self._get_addr(word)))
else: # 小数
if not self._is_exist(word):
self.word_symbol.append(WordSymbol(len(self.word_symbol), word, self._get_code(1)))
self.word_token.append(WordToken(len(self.word_token), word, self._get_code('实型'), self._get_addr(word)))
else: # word 以 字符开头
word += line[i]
word_plus = word
if i+1 < len(line):
word_plus += line[i+1]
i += 1
if self._is_op(word):
self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
elif self._is_op(word_plus):
i += 1
self.word_token.append(WordToken(len(self.word_token), word_plus, self._get_code(word_plus)))
elif self._is_delimiter(word):
self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
elif self._is_delimiter(word_plus):
i += 1
self.word_token.append(WordToken(len(self.word_token), word_plus, self._get_code(word_plus)))
else:
raise WordAnalyseException('非法字符')
else: # 字符为空时直接跳过
i += 1
def show(self):
print('Input text:\n', self.text)
print('\nWord Token')
for item in self.word_token:
print(item)
print('\nWord Symbol')
for item in self.word_symbol:
print(item)
if __name__ == '__main__':
wa = WordAnalyzer(table='./words_table.txt', input='./input_1.txt')
wa.show()