Compiler/word_analysis.py at main · Edlison/Compiler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# @Author  : Edlison
# @Date    : 11/16/20 15:24
import os
from compiler_exception import WordAnalyseException


# Token表
class WordToken:
    def __init__(self, index: int, name: str, code: int, addr=0):
        """
        初始化词表

        Args:
            index: 编号
            name: 名字
            code: 编号
            addr: 在符号表中的位置（如果不是标识符或常数则为0）

        Returns:

        @Author  : Edlison
        @Date    : 11/16/20 20:13
        """
        self.index = index
        self.name = name
        self.code = code
        self.addr = addr

    def __str__(self):
        return 'index: {} \t name: {} \t code: {} \t addr: {}'.format(self.index, self.name, self.code, self.addr)


# 符号表
class WordSymbol(object):
    def __init__(self, index: int, name: str, type: int):
        """
        初始化符号表

        Args:
            index: 编号
            name: 名字
            type: 类型（0标识符，1常数）

        Returns:

        @Author  : Edlison
        @Date    : 11/16/20 20:14
        """
        self.index = index
        self.name = name
        self.type = type

    def __str__(self):
        return 'index: {} \t name: {} \t type: {}'.format(self.index, self.name, '标识符' if self.type == 0 else '常数')


class WordAnalyzer:
    def __init__(self, table, input):
        self.word_token = []
        self.word_symbol = []
        self.text = ''
        self.table = {}

        self._load_table(table)
        self._load_input(input)
        self._analyse()

    def _load_input(self, path):
        if not os.path.exists(path):
            raise WordAnalyseException('文件不存在')
        with open(path) as f:
            text = f.read()
        self.text = text

    def _load_table(self, path):
        if not os.path.exists(path):
            raise WordAnalyseException('文件不存在')
        with open(path) as f:
            table = f.read()
        table.strip()
        self.table = eval(table)

    def _is_keyword(self, s) -> bool:
        if not self.table.get(s):
            return False
        if 0 <= self.table[s] < 25:
            return True
        else:
            return False

    def _is_op(self, s):
        if not self.table.get(s):
            return False
        if 24 < self.table[s] < 39:
            return True
        else:
            return False

    def _is_delimiter(self, s):
        if not self.table.get(s):
            return False
        if 43 < self.table[s] < 55:
            return True
        else:
            return False

    def _is_integer(self, s):
        point_num = 0
        for ch in s:
            if ch == '.':
                point_num += 1
            if point_num is 0 and ('A' <= ch <= 'Z' or 'a' <= ch <= 'z'):
                raise WordAnalyseException('常数中出现字母')
            if point_num is 1 and ('A' <= ch <= 'Z' or 'a' <= ch <= 'z'):
                raise WordAnalyseException('常数的小数部分出现字母')
            if point_num is 2:
                raise WordAnalyseException('出现两个小数点')
        if point_num == 0:
            return True
        else:
            return False

    def _get_code(self, s):
        if self.table.get(s):
            return self.table[s]

    def _is_exist(self, s):
        for item in self.word_symbol:
            if item.name == s:
                return True
        return False

    def _get_addr(self, s):
        for item in self.word_symbol:
            if item.name == s:
                return item.index
        raise WordAnalyseException('未在常数表中出现')

    def _analyse(self):
        text_in_line = self.text.split('\n')
        for line in text_in_line:
            i = 0
            while i < len(line):
                if line[i] != ' ':  # 字符不为空时继续
                    word = ''  # 初始化当前word
                    if 'A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z':  # word 以 字母开头
                        while i < len(line) and ('A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z' or '0' <= line[i] <= '9'):
                            word += line[i]
                            i += 1
                        if self._is_keyword(word):  # 判断是不是关键字 不是的话只能是标识符
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
                        else:  # 是标识符
                            if not self._is_exist(word):  # 标识符表中没有的话加入
                                self.word_symbol.append(WordSymbol(len(self.word_symbol), word, 0))
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code('id'), self._get_addr(word)))
                    elif '0' <= line[i] <= '9':  # word 以 数字开头
                        while i < len(line) and ('0' <= line[i] <= '9' or line[i] == '.' or 'A' <= line[i] <= 'Z' or 'a' <= line[i] <= 'z'):
                            word += line[i]
                            i += 1
                        if self._is_integer(word):  # 整数
                            if not self._is_exist(word):
                                self.word_symbol.append(WordSymbol(len(self.word_symbol), word, self._get_code(1)))
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code('整型'), self._get_addr(word)))
                        else:  # 小数
                            if not self._is_exist(word):
                                self.word_symbol.append(WordSymbol(len(self.word_symbol), word, self._get_code(1)))
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code('实型'), self._get_addr(word)))
                    else:  # word 以 字符开头
                        word += line[i]
                        word_plus = word
                        if i+1 < len(line):
                            word_plus += line[i+1]
                        i += 1
                        if self._is_op(word):
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
                        elif self._is_op(word_plus):
                            i += 1
                            self.word_token.append(WordToken(len(self.word_token), word_plus, self._get_code(word_plus)))
                        elif self._is_delimiter(word):
                            self.word_token.append(WordToken(len(self.word_token), word, self._get_code(word)))
                        elif self._is_delimiter(word_plus):
                            i += 1
                            self.word_token.append(WordToken(len(self.word_token), word_plus, self._get_code(word_plus)))
                        else:
                            raise WordAnalyseException('非法字符')
                else:  # 字符为空时直接跳过
                    i += 1

    def show(self):
        print('Input text:\n', self.text)
        print('\nWord Token')
        for item in self.word_token:
            print(item)
        print('\nWord Symbol')
        for item in self.word_symbol:
            print(item)


if __name__ == '__main__':
    wa = WordAnalyzer(table='./words_table.txt', input='./input_1.txt')
    wa.show()