-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvm.py
More file actions
358 lines (311 loc) · 13.2 KB
/
vm.py
File metadata and controls
358 lines (311 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import sys
import re
# Constants for special memory addresses
P_REG = 0 # Pointer register (memory address)
DATA_REG_A = 1 # Data register A
DATA_REG_X = 2 # Data register X
PROGRAM_START = 3 # Program start address
# Constants for instruction opcodes
OPCODES = {
"loada": 0x0, # Load the next word into the A register
"a2x": 0x1, # Move A to X
"x2a": 0x2, # Move X to A
"a2p": 0x3, # Move A to pointer register (address register)
"read": 0x4, # Read word from address in pointer register to A
"write": 0x5, # Write word in A to address in pointer register
"add": 0x6, # Add A and X, store result in A
"lshift": 0x7, # Left shift A
"rshift": 0x8, # Right shift A
"and": 0x9, # Binary AND: A = A & X
"or": 0xa, # Binary OR: A = A | X
"not": 0xb, # Binary NOT: Invert all bits of A
"jump": 0xc, # Jump to address in pointer register
"jumpz": 0xd, # Jump to address in pointer register if A == 0
"output": 0xe, # Output character
"halt": 0xf # Halt execution
}
# Inverse map for debug output
OPCODE_NAMES = {v: k for k, v in OPCODES.items()}
def assemble(filename):
# Initialize memory array
memory = [0] * 1000000
# Dictionary to store label addresses
labels = {}
# List to track locations needing patching with label addresses
patches = [] # (memory_address, label_name)
try:
with open(filename, 'r') as f:
lines = f.readlines()
except Exception as e:
print(f"Error opening file: {e}")
sys.exit(1)
# Current memory address for code generation
current_addr = 0
# Process each line
for line_num, line in enumerate(lines, 1):
# Remove comments and whitespace
line = re.sub(r';.*$', '', line).strip()
if not line:
continue
# Handle labels
if ':' in line:
parts = line.split(':', 1)
label = parts[0].strip()
labels[label] = current_addr
# If there's content after the label on the same line, process it
line = parts[1].strip()
if not line:
continue
# Process instructions and data directives
if line.startswith('.data'):
# Handle data directive
data_content = line[5:].strip()
# Handle string data
if data_content.startswith('"'):
end_quote = data_content.rfind('"')
if end_quote > 0:
string_content = data_content[1:end_quote]
# Store each character's ASCII value
for char in string_content:
memory[current_addr] = ord(char)
current_addr += 1
# Process additional values after the string
remaining = data_content[end_quote + 1:].strip()
if remaining:
if remaining.startswith(','):
remaining = remaining[1:].strip()
values = [v.strip() for v in remaining.split(',')]
for val in values:
try:
memory[current_addr] = int(val)
except ValueError:
patches.append((current_addr, val))
current_addr += 1
else:
# Handle numeric data
values = [v.strip() for v in data_content.split(',')]
for val in values:
try:
memory[current_addr] = int(val)
except ValueError:
patches.append((current_addr, val))
current_addr += 1
elif line.startswith('loada'):
# Handle loada instruction
memory[current_addr] = OPCODES["loada"]
current_addr += 1
# Process operand
parts = line.split(maxsplit=1)
if len(parts) > 1:
operand = parts[1].strip()
try:
memory[current_addr] = int(operand)
except ValueError:
patches.append((current_addr, operand))
current_addr += 1
else:
print(f"Warning: loada without operand at line {line_num}")
current_addr += 1 # Skip the operand location
elif any(line.startswith(op) for op in OPCODES.keys()):
# Handle other instructions
parts = line.split(maxsplit=1)
opcode = parts[0].strip()
if opcode in OPCODES:
memory[current_addr] = OPCODES[opcode]
current_addr += 1
else:
print(f"Warning: Unrecognized instruction '{opcode}' at line {line_num}")
else:
print(f"Warning: Unrecognized line format at line {line_num}: {line}")
# Apply patches - resolve label references
for addr, label in patches:
if label in labels:
memory[addr] = labels[label]
else:
print(f"Error: Undefined label '{label}' referenced at address {addr}")
sys.exit(1)
return memory, current_addr # Return the memory and end address
def disassemble(memory, start_addr=0, end_addr=None):
"""
Disassemble the program in memory in a traditional format.
"""
if end_addr is None:
# Try to determine end by finding the last non-zero value
for i in range(len(memory) - 1, 0, -1):
if memory[i] != 0:
end_addr = i + 1
break
if end_addr is None:
end_addr = 100 # Default to showing 100 words
# Special handling for first 3 addresses (registers)
if start_addr == 0:
for i in range(3):
print(f"{i:08x} {memory[i]:08x} {['P_REG', 'DATA_REG_A', 'DATA_REG_X'][i]}")
pc = PROGRAM_START
else:
pc = start_addr
# Disassemble the program
while pc < end_addr:
value = memory[pc]
addr_str = f"{pc:08x}"
# Handle instructions
if value in OPCODE_NAMES:
mnemonic = OPCODE_NAMES[value]
# Handle loada which takes an operand
if value == OPCODES["loada"] and pc + 1 < end_addr:
operand = memory[pc + 1]
print(f"{addr_str} {value:08x} {operand:08x} {mnemonic} {operand}")
pc += 2 # Skip operand
continue
else:
print(f"{addr_str} {value:08x} {mnemonic}")
else:
# Try to interpret as ASCII if in printable range
if 32 <= value <= 126: # Printable ASCII range
data_str = f"'{chr(value)}'"
else:
data_str = f"{value}"
print(f"{addr_str} {value:08x} {data_str}")
pc += 1
def execute_vm(memory, debug=False):
"""
Execute the program stored in memory.
"""
pc = PROGRAM_START # Program Counter starts at address 3
halted = False
instruction_count = 0
output_buffer = ""
if debug:
print("\nDebug trace:")
try:
while not halted and instruction_count < 10000: # Prevent infinite loops
# Fetch
instruction = memory[pc]
original_pc = pc # Store for debug output
pc += 1
instruction_count += 1
# Get instruction name for debug
inst_name = OPCODE_NAMES.get(instruction, f"UNKNOWN(0x{instruction:02x})")
# Execute
if instruction == OPCODES["loada"]:
# Load the next word into the A register
memory[DATA_REG_A] = memory[pc]
pc += 1
elif instruction == OPCODES["a2x"]:
# Move data from register A to X
memory[DATA_REG_X] = memory[DATA_REG_A]
elif instruction == OPCODES["x2a"]:
# Move data from register X to A
memory[DATA_REG_A] = memory[DATA_REG_X]
elif instruction == OPCODES["a2p"]:
# Move A to pointer register (address register)
memory[P_REG] = memory[DATA_REG_A]
elif instruction == OPCODES["read"]:
# Read word from memory at pointer register into data register A
addr = memory[P_REG]
memory[DATA_REG_A] = memory[addr]
elif instruction == OPCODES["write"]:
# Write word from data register A to memory at pointer register
addr = memory[P_REG]
memory[addr] = memory[DATA_REG_A]
elif instruction == OPCODES["add"]:
# Add A and X, store result in A
memory[DATA_REG_A] = (memory[DATA_REG_A] + memory[DATA_REG_X]) & 0xFFFFFFFF
elif instruction == OPCODES["lshift"]:
# Left shift A by 1 bit
memory[DATA_REG_A] = (memory[DATA_REG_A] << 1) & 0xFFFFFFFF
elif instruction == OPCODES["rshift"]:
# Right shift A by 1 bit
memory[DATA_REG_A] = (memory[DATA_REG_A] >> 1) & 0xFFFFFFFF
elif instruction == OPCODES["and"]:
# Binary AND: A = A & X
memory[DATA_REG_A] = (memory[DATA_REG_A] & memory[DATA_REG_X]) & 0xFFFFFFFF
elif instruction == OPCODES["or"]:
# Binary OR: A = A | X
memory[DATA_REG_A] = (memory[DATA_REG_A] | memory[DATA_REG_X]) & 0xFFFFFFFF
elif instruction == OPCODES["not"]:
# Binary NOT: Invert all bits of A
memory[DATA_REG_A] = (~memory[DATA_REG_A]) & 0xFFFFFFFF
elif instruction == OPCODES["jump"]:
# Jump to address in pointer register
new_pc = memory[P_REG]
pc = new_pc
elif instruction == OPCODES["jumpz"]:
# Jump to address in pointer register if A == 0
if memory[DATA_REG_A] == 0:
new_pc = memory[P_REG]
pc = new_pc
elif instruction == OPCODES["output"]:
# Print the lower 8 bits as ASCII - no newline
char = chr(memory[DATA_REG_A] & 0xFF)
output_buffer += char
elif instruction == OPCODES["halt"]:
# Halt execution
halted = True
else:
print(f"\nProgram terminated: Unknown instruction: 0x{instruction:02x} at address {original_pc}")
break
if debug:
# Print compact debug info with fixed width fields for alignment
print(f"0x{original_pc:08x} {inst_name:<14} p=0x{memory[P_REG]:08x} a=0x{memory[DATA_REG_A]:08x} x=0x{memory[DATA_REG_X]:08x}")
if instruction_count >= 10000:
print("\nExecution terminated: Maximum instruction count reached (possible infinite loop)")
except Exception as e:
print(f"\nProgram terminated with error: {str(e)}")
if debug:
print("\nEnd of debug trace")
# Print the final output buffer
print("\nProgram Output:")
print(output_buffer)
print("\nSummary:")
print(f"Total instructions executed: {instruction_count}")
print(f"Final register values: P_REG=0x{memory[P_REG]:08x}, DATA_REG_A=0x{memory[DATA_REG_A]:08x}, DATA_REG_X=0x{memory[DATA_REG_X]:08x}")
def format_binary(value, bit_width=8):
"""
Format a number as a binary string with the specified bit width.
"""
mask = (1 << bit_width) - 1
value = value & mask
return format(value, f'0{bit_width}b')
def format_memory_binary(memory, end_addr, bit_width=8):
"""
Format memory in binary format with specified pattern.
"""
parts = []
# Format registers
parts.append(format_binary(memory[P_REG], bit_width))
parts.append(format_binary(memory[DATA_REG_A], bit_width))
parts.append(format_binary(memory[DATA_REG_X], bit_width))
# Format memory starting at PROGRAM_START
first_mem = format_binary(memory[PROGRAM_START], bit_width)
parts.append(f"p{first_mem}")
for addr in range(PROGRAM_START + 1, end_addr):
bin_val = format_binary(memory[addr], bit_width)
parts.append(f" {bin_val}")
# Join with hyphens
return "-".join(parts)
# Parse command line arguments
if len(sys.argv) < 2:
print("Usage: python vm_assembler.py <assembly_file> [--disassemble] [--debug] [--bits N]")
sys.exit(1)
filename = sys.argv[1]
disassemble_flag = "--disassemble" in sys.argv
debug_flag = "--debug" in sys.argv
# Get bit width if specified
bit_width = 8 # Default
for i, arg in enumerate(sys.argv):
if arg == "--bits" and i + 1 < len(sys.argv):
try:
bit_width = int(sys.argv[i + 1])
except ValueError:
print(f"Warning: Invalid bit width '{sys.argv[i + 1]}', using default (8)")
# Run assembler
memory, end_addr = assemble(filename)
# Print the binary memory representation before execution
binary_output = format_memory_binary(memory, end_addr, bit_width)
print(binary_output)
# Continue with the rest of the program
if disassemble_flag:
disassemble(memory, 0, end_addr)
# Execute the VM
execute_vm(memory, debug_flag)