diff --git a/.gitignore b/.gitignore index 50a4fba6..f77e5af2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .DS_Store release/* +build +runtime/cpp/build-* +.vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 429a413b..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "cmake.sourceDirectory": "${workspaceFolder}/runtime/cpp", - "cmake.configureArgs": [ - "-DCMAKE_TOOLCHAIN_FILE=${workspaceFolder}/runtime/cpp/build/conan_toolchain.cmake", - "-DCMAKE_PREFIX_PATH=${workspaceFolder}/runtime/cpp/build" - ] -} \ No newline at end of file diff --git a/CODING_STYLE.md b/CODING_STYLE.md new file mode 100644 index 00000000..83ac2b54 --- /dev/null +++ b/CODING_STYLE.md @@ -0,0 +1,270 @@ +# Coding Style + +This document describes the coding style rules for egg. All code is expected to +strictly follow them to maintain the homogeneity of the codebase. + +## General Philosophy + +We spend most of our time reading existing code. For that reason we aim to minimize +the amount of text to be read. Shorter code means less effort and more things fitting +on screen. + +- Favor simplicity and minimality over performance. +- Leave simple optimizations to the runtime system instead of writing them by hand. +- All code is autoformatted, unless there is a _very_ special and specific reason. +- Prefer words that are already part of the existing vocabulary in the code corpus. + +## Naming + +### Temporary Variables + +Temporary names consist of just **one word**, chosen by priority: +1. Usage — what it is used for. +2. Contents — what it holds. +3. Type — what kind of object it is. + +```smalltalk +"Good — named by usage" +done := Set new. + +"Acceptable — named by contents" +tasks := Set new. + +"Avoid — named by type" +set := Set new. +``` + +Do not reuse a temporary name for a different value within the same method. Each +temporary should be assigned once. + +### Method Arguments + +Arguments are named after the **expected type**, prefixed by an article (`a`, `an`): + +```smalltalk +add: anObject +name: aString +at: anInteger put: anObject +``` + +When the type prefix is not possible or desirable, use temporary-naming rules instead. + +### Block Arguments + +Block arguments are short, typically one word (often a single letter when the block +is small and the meaning is clear from context): + +```smalltalk +self do: [:each | each process]. +self collect: [:x | x squared]. +self keysAndValuesDo: [:k :v | ...]. +``` + +### Selectors + +Method selectors should be **short and succinct**, one or two words if possible: + +```smalltalk +"Good" +#elements + +"Acceptable" +#elementsArray + +"Avoid" +#arrayOfElements +``` + +### Instance Variables and Class Names + +Keep them short and concise, following the same spirit as temporaries. + +### No Abbreviations + +Do not abbreviate words in names. Use the full word: + +```smalltalk +"Avoid" +cmdNew +reqCount +msg + +"Preferred" +commandNew +requestCount +message +``` + +## Methods + +### Keep Methods Short + +Avoid long methods. Ideally a method has one level of iteration and a clear +single purpose. Factor complex logic into helper methods and let the compiler +optimize. + +```smalltalk +"Avoid" +baz + collection do: [:elem | + "first do this" + ...bunch of code... + "then do that" + ...more code...] + +"Preferred" +baz + collection do: [:elem | + self doThis: elem; doThat: elem] +``` + +### No Nested Loops + +Nested loops are highly discouraged. Factor the inner loop into its own method: + +```smalltalk +"Avoid" +foo + [aCollection isEmpty] whileTrue: [ + last := aCollection removeLast. + aCollection do: [:other | last use: other]] + +"Preferred" +foo + [aCollection isEmpty] whileTrue: [ + last := aCollection removeLast. + self useOthers: aCollection with: last] + +useOthers: aCollection with: anObject + aCollection do: [:other | anObject use: other] +``` + +### No Keyword Message as Argument of Keyword Message + +Use a temporary to break the nesting: + +```smalltalk +"Avoid" +self foo: (self bar: aBaz) + +"Preferred" +bar := self bar: aBaz. +self foo: bar +``` + +### No Comments + +Comments are **not allowed**, except: +- As headers in methods that define public APIs. +- For _very_ special reasons (which almost never exist). + +If you feel the need to write a comment in the middle of a method, refactor the +code into another method or create appropriate objects to make the code self-explanatory. + +## Formatting + +### File Structure (Tonel format) + +Each `.st` file starts with an optional copyright header as a string literal, +followed by the class definition and then methods. Class definitions use the +standard Tonel syntax: + +```smalltalk +" + Copyright (c) 2020 Aucerna. + See (MIT) license in root directory. +" + +Class { + #name : #MyClass, + #superclass : #Object, + #instVars : [ + 'first', + 'second' + ], + #category : #Kernel +} +``` + +### Method Definitions + +Each method is preceded by a category pragma and a blank line separates methods: + +```smalltalk +{ #category : #accessing } +MyClass >> selector [ + ^value +] + +{ #category : #accessing } +MyClass >> selector: anObject [ + value := anObject +] +``` + +### Indentation + +- Use **one tab** for indentation inside method bodies. +- Continuation lines in cascades and multi-keyword messages are indented with + one extra tab. + +### Brackets and Blocks + +- Opening brackets `[` stay on the same line as the expression that introduces them. +- Closing brackets `]` go on their own line only when the block spans multiple lines + and aligns with the opening expression. Short blocks stay on a single line. + +```smalltalk +"Single-line block" +self do: [:each | each process]. + +"Multi-line block" +self do: [:element | + separate ifTrue: [separatorBlock value] ifFalse: [separate := true]. + aBlock evaluateWith: element] +``` + +### Return Statements + +Use `^` (caret) with a space before the returned expression. Early returns are the +preferred pattern for guard clauses: + +```smalltalk +self isEmpty ifTrue: [^nil]. +``` + +### Cascades + +Cascades are preferred when sending multiple messages to the same receiver. For +more than two or three sends, break them across lines: + +```smalltalk +"Short cascade on one line" +^self new add: anObject; yourself + +"Long cascade across lines" +^self new + add: object1; + add: object2; + add: object3; + yourself +``` + +### Multi-keyword Selectors + +When a method selector with many keywords is too long for one line, put each keyword +on its own line: + +```smalltalk +Collection class >> with: object1 +with: object2 +with: object3 +with: object4 [ + ^self new + add: object1; + add: object2; + add: object3; + add: object4; + yourself +] +``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d592f846..a78c8747 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,80 +7,11 @@ Please use github issue tracker to raise new issues. Take care of the following before committing: - Use short non-empty messages. - - Start the message with a : (i.e. bootstrap: fix whatever, or js: great feature added) + - Start the message with a [] (i.e. [bootstrap] fix whatever, or [js] great feature added) - Always commit to a branch (pushing to main is not allowed anyway). - Split commits in the branch to ease others reviewing and searching history. - Do not put unrelated things in a same branch. - - Follow the code style rules stated in the next section. + - Follow the code style rules in [CODING_STYLE.md](CODING_STYLE.md). - Keep the repo clean! Do not push blobs, autogenerated code, copy-pasted code that could be autogenerated, garbage, etc. -# Code Style - -This section explains the rules that need to be applied in order to get code accepted -for integration. - -## General philosophy - -As developers, we spend most of our time understanding existing code by reading it. For -that reason, in this project we aim to minimize the amount of text to be read. Shorter -code means less reading effort and more things fitting in the text editors, maximizing -screen space efficiency. - -The following general rules apply: - -- Short and concise names are expected for both variables, selectors and class names. -- We prefer words that are part of the existing vocabulary in the code corpus. -- All code is autoformatted, unless there is a _very_ special and specific reason. -- Simple optimizations should be left to be done dynamically by the runtime system, - instead of being written by the developer short and concise -- We favor simplicity and minimality to performance. -- Bee has been written using the rules explained here, giving it homogeneity - through all the code base. New code is expected to strictly follow them too. - - - -## Methods - -- Temporary variable names consist of just _one_ word. The name has to be related - primarily to the usage, secondarily to the contents and finally to the type. - As an example, `done := Set new` would be preferred to `tasks := Set new` which - would be preferred to `set := Set new` for a set that stores tasks that have been - done. -- Method arguments are named after the type expected, prefixed by an article. In - cases where it is not possible/desirable then temporary naming rules apply. -- Method selectors are preferred short and succinct, one or two words if possible. - As an example, `#elements` is preferred to `#elementsArray`, which is preferred - to `#arrayOfElements` - -- _Comments are not allowed_, except as headers in methods that define public APIs, - or for _very_ special reasons (which almost never exist). If you are writing a - comment in the middle of a method, then refactor the thing into another method - or create according objects to make the code self understandable. -- Nested loops are highly discouraged. Again, factor them into methods and let - the compiler optimize. Example: - - ``` - foo - [aCollection isEmpty] whileTrue: [ - last := aCollection removeLast. aCollection do: [:other | last use: other]] - - ==> - foo - [aCollection isEmpty] whileTrue: [ - last := aCollection removeLast. self useOthers: aCollection with: last] - - useOthers: aCollection with: anObject - aCollection do: [:other | anObject use: other] - ``` - -- Using keyword messages as argument of other keyword arguments is not allowed. - Example: - - ``` - self foo: (self bar: aBaz) - ==> - bar := self bar: aBaz. - self foo: bar - ``` - diff --git a/bin/epm b/bin/epm new file mode 100755 index 00000000..9b870d4e --- /dev/null +++ b/bin/epm @@ -0,0 +1,15 @@ +#!/bin/sh +# epm - Egg Package Manager +# Finds and runs the egg binary with the EPM module + +# Determine egg binary location +if [ -n "$EGG_HOME" ] && [ -x "$EGG_HOME/bin/egg" ]; then + EGG="$EGG_HOME/bin/egg" +elif command -v egg >/dev/null 2>&1; then + EGG=egg +else + echo "Error: egg binary not found. Set EGG_HOME or add egg to PATH." >&2 + exit 1 +fi + +exec "$EGG" EPM "$@" diff --git a/modules/ArgParser/ArgParser.st b/modules/ArgParser/ArgParser.st new file mode 100644 index 00000000..6197d59a --- /dev/null +++ b/modules/ArgParser/ArgParser.st @@ -0,0 +1,102 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #ArgParser, + #superclass : #Object, + #instVars : [ + #name, + #description, + #commands, + #globalOptions + ], + #category : #ArgParser +} + +{#category : #commands} +ArgParser >> addCommand: aCommand [ + commands at: aCommand name put: aCommand +] + +{#category : #options} +ArgParser >> addGlobalOption: anOption [ + globalOptions add: anOption +] + +{#category : #accessing} +ArgParser >> description [ + ^description +] + +{#category : #accessing} +ArgParser >> description: aString [ + description := aString +] + +{#category : #parsing} +ArgParser >> extractGlobalArgs: args [ + | remaining i | + remaining := OrderedCollection new. + i := 1. + [i <= args size] whileTrue: [ + | arg match rest | + arg := args at: i. + match := globalOptions detect: [:opt | opt matches: arg] ifNone: [nil]. + match ifNotNil: [ + match hasValue ifTrue: [i := i + 1] + ] ifNil: [ + rest := args copyFrom: i to: args size. + ^remaining addAll: rest; yourself]. + i := i + 1]. + ^remaining +] + +{#category : #initialization} +ArgParser >> initialize [ + commands := OrderedDictionary new. + globalOptions := OrderedCollection new +] + +{#category : #accessing} +ArgParser >> name [ + ^name +] + +{#category : #accessing} +ArgParser >> name: aString [ + name := aString +] + +{#category : #parsing} +ArgParser >> parse: args [ + | remaining key command result tail | + remaining := self extractGlobalArgs: args. + remaining isEmpty ifTrue: [self printHelp. ^nil]. + key := remaining first. + command := commands at: key ifAbsent: [ + self error: 'Unknown command: ', key, '. Run with no args for help.']. + result := ParseResult new. + result command: key. + tail := remaining copyFrom: 2 to: remaining size. + command parse: tail into: result. + command action ifNotNil: [:act | act value: result]. + ^result +] + +{#category : #printing} +ArgParser >> printHelp [ + | text | + text := name ifNil: ['command']. + description ifNotNil: [ + text := text , ' - ' , description]. + text := text , String cr , String cr , 'Commands:' , String cr. + commands keysAndValuesDo: [:key :cmd | + text := text , ' ' , key. + cmd description ifNotNil: [:d | + text := text , ' - ' , d]. + text := text , String cr]. + Kernel log: text +] + diff --git a/modules/ArgParser/ArgParserModule.st b/modules/ArgParser/ArgParserModule.st new file mode 100644 index 00000000..0d3ef0da --- /dev/null +++ b/modules/ArgParser/ArgParserModule.st @@ -0,0 +1,18 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #ArgParserModule, + #superclass : #Module, + #category : #ArgParser +} + +{#category : #spec} +ArgParserModule >> imports [ + ^{ + #Kernel -> #(Error OrderedDictionary). + } +] + diff --git a/modules/ArgParser/Command.st b/modules/ArgParser/Command.st new file mode 100644 index 00000000..a98edf62 --- /dev/null +++ b/modules/ArgParser/Command.st @@ -0,0 +1,159 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #Command, + #superclass : #Object, + #instVars : [ + #name, + #description, + #options, + #positionalNames, + #action, + #subcommands + ], + #category : #ArgParser +} + +{#category : #accessing} +Command >> action [ + ^action +] + +{#category : #accessing} +Command >> action: aBlock [ + action := aBlock +] + +{#category : #configuration} +Command >> addOption: anOption [ + options add: anOption +] + +{#category : #configuration} +Command >> addPositional: aString [ + positionalNames add: aString +] + +{#category : #configuration} +Command >> addSubcommand: aCommand [ + subcommands at: aCommand name put: aCommand +] + +{#category : #private} +Command >> applyDefaults: aResult [ + options do: [:opt | + (aResult hasOption: opt name) ifFalse: [ + opt defaultValue ifNotNil: [:v | + aResult setOption: opt name to: v]]] +] + +{#category : #private} +Command >> checkRequired: aResult [ + options do: [:opt | + | name | + name := opt long ifNil: [opt short]. + (opt required and: [(aResult hasOption: opt name) not]) + ifTrue: [self error: 'Required option missing: ', name]] +] + +{#category : #accessing} +Command >> description [ + ^description +] + +{#category : #accessing} +Command >> description: aString [ + description := aString +] + +{#category : #private} +Command >> findOption: aString [ + ^options detect: [:opt | opt matches: aString] ifNone: [nil] +] + +{#category : #testing} +Command >> hasSubcommands [ + ^subcommands notEmpty +] + +{#category : #initialization} +Command >> initialize [ + options := OrderedCollection new. + positionalNames := OrderedCollection new. + subcommands := OrderedDictionary new +] + +{#category : #accessing} +Command >> name [ + ^name +] + +{#category : #accessing} +Command >> name: aString [ + name := aString +] + +{#category : #accessing} +Command >> options [ + ^options +] + +{#category : #parsing} +Command >> parse: args into: result [ + | i | + i := 1. + [i <= args size] whileTrue: [ + | arg | + arg := args at: i. + (arg first = $-) + ifTrue: [i := self parseOption: arg from: args at: i into: result] + ifFalse: [result addPositional: arg]. + i := i + 1]. + self checkRequired: result. + self applyDefaults: result +] + +{#category : #parsing} +Command >> parseOption: aString from: anArray at: anInteger into: aResult [ + | opt | + opt := self findOption: aString. + opt ifNil: [self error: 'Unknown option: ', aString]. + opt hasValue ifTrue: [ + | val next | + next := anInteger + 1. + next > anArray size ifTrue: [self error: 'Option ', aString, ' requires a value']. + val := anArray at: next. + aResult setOption: opt name to: val. + ^next] + ifFalse: [ + aResult setOption: opt name to: true]. + ^anInteger +] + +{#category : #accessing} +Command >> positionalNames [ + ^positionalNames +] + +{#category : #printing} +Command >> printUsageOn: aStream [ + aStream nextPutAll: name. + positionalNames do: [:pname | + aStream nextPutAll: ' <'; nextPutAll: pname; nextPut: $>]. + options do: [:opt | + aStream nextPutAll: ' ['. + opt short ifNotNil: [:s | aStream nextPutAll: s]. + (opt short notNil and: [opt long notNil]) ifTrue: [aStream nextPutAll: '|']. + opt long ifNotNil: [:l | aStream nextPutAll: l]. + opt hasValue ifTrue: [aStream nextPutAll: ' ']. + aStream nextPut: $]] +] + +{#category : #accessing} +Command >> subcommands [ + ^subcommands +] + diff --git a/modules/ArgParser/Option.st b/modules/ArgParser/Option.st new file mode 100644 index 00000000..5289e244 --- /dev/null +++ b/modules/ArgParser/Option.st @@ -0,0 +1,98 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #Option, + #superclass : #Object, + #instVars : [ + #short, + #long, + #description, + #required, + #hasValue, + #defaultValue + ], + #category : #ArgParser +} + +{#category : #configuration} +Option >> beFlag [ + hasValue := false. + defaultValue := false +] + +{#category : #configuration} +Option >> beRequired [ + required := true +] + +{#category : #accessing} +Option >> defaultValue [ + ^defaultValue +] + +{#category : #accessing} +Option >> defaultValue: anObject [ + defaultValue := anObject +] + +{#category : #accessing} +Option >> description [ + ^description +] + +{#category : #accessing} +Option >> description: aString [ + description := aString +] + +{#category : #testing} +Option >> hasValue [ + ^hasValue +] + +{#category : #initialization} +Option >> initialize [ + required := false. + hasValue := true. + defaultValue := nil +] + +{#category : #accessing} +Option >> long [ + ^long +] + +{#category : #accessing} +Option >> long: aString [ + long := aString +] + +{#category : #testing} +Option >> matches: aString [ + ^aString = short or: [aString = long] +] + +{#category : #accessing} +Option >> name [ + long ifNotNil: [^long copyFrom: 3]. + ^short copyFrom: 2 +] + +{#category : #testing} +Option >> required [ + ^required +] + +{#category : #accessing} +Option >> short [ + ^short +] + +{#category : #accessing} +Option >> short: aString [ + short := aString +] + diff --git a/modules/ArgParser/ParseResult.st b/modules/ArgParser/ParseResult.st new file mode 100644 index 00000000..b3596576 --- /dev/null +++ b/modules/ArgParser/ParseResult.st @@ -0,0 +1,72 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #ParseResult, + #superclass : #Object, + #instVars : [ + #command, + #positionals, + #options + ], + #category : #ArgParser +} + +{#category : #accessing} +ParseResult >> addPositional: aString [ + positionals add: aString +] + +{#category : #accessing} +ParseResult >> command [ + ^command +] + +{#category : #accessing} +ParseResult >> command: aString [ + command := aString +] + +{#category : #testing} +ParseResult >> hasOption: aString [ + ^options includesKey: aString +] + +{#category : #initialization} +ParseResult >> initialize [ + positionals := OrderedCollection new. + options := OrderedDictionary new +] + +{#category : #accessing} +ParseResult >> optionAt: aString [ + ^options at: aString ifAbsent: [nil] +] + +{#category : #accessing} +ParseResult >> optionAt: aString ifAbsent: aBlock [ + ^options at: aString ifAbsent: aBlock +] + +{#category : #accessing} +ParseResult >> options [ + ^options +] + +{#category : #accessing} +ParseResult >> positionalAt: anInteger [ + ^positionals at: anInteger +] + +{#category : #accessing} +ParseResult >> positionals [ + ^positionals +] + +{#category : #accessing} +ParseResult >> setOption: aString to: aValue [ + options at: aString put: aValue +] + diff --git a/modules/Compiler/MessageInliner.st b/modules/Compiler/MessageInliner.st index fbea34aa..5fa84db5 100644 --- a/modules/Compiler/MessageInliner.st +++ b/modules/Compiler/MessageInliner.st @@ -1,3 +1,8 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + Class { #name : #MessageInliner, #superclass : #Object, @@ -13,7 +18,7 @@ MessageInliner >> inline: aMessageNode [ message := aMessageNode. message receiver isSuper ifTrue: [ ^ self ]. - (message isCascadeMessage and: [ message receiver isBlockNode ]) + message isCascadeMessage ifTrue: [ ^ self ]. s := message selector value. s == #ifTrue: diff --git a/modules/Compiler/Tests/MessageInlinerTest.st b/modules/Compiler/Tests/MessageInlinerTest.st new file mode 100644 index 00000000..3c54ad72 --- /dev/null +++ b/modules/Compiler/Tests/MessageInlinerTest.st @@ -0,0 +1,71 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #MessageInlinerTest, + #superclass : #TestCase, + #category : #'Compiler.Tests' +} + +{ #category : #helpers } +MessageInlinerTest >> ast: source [ + ^SCompiler new parse: source +] + +{ #category : #helpers } +MessageInlinerTest >> cascadeMessagesIn: source [ + | ast cascadeMessages | + ast := self ast: source. + cascadeMessages := OrderedCollection new. + ast nodesDo: [:n | + (n isMessageNode and: [n isCascadeMessage]) + ifTrue: [cascadeMessages add: n]]. + ^cascadeMessages +] + +{ #category : #'tests - cascade' } +MessageInlinerTest >> testCascadeAndKeywordIsNotInlined [ + "Regression: a keyword cascade message such as `at:put:` must not be inlined." + | messages | + messages := self cascadeMessagesIn: 'foo + ^Dictionary new at: #a put: 1; at: #b put: 2; yourself'. + messages do: [:m | self deny: m isInlined] +] + +{ #category : #'tests - cascade' } +MessageInlinerTest >> testCascadeIfTrueIsNotInlined [ + "Regression: ifTrue: as the first message of a cascade must not be inlined, + because the cascade machinery needs the receiver (a Boolean) to receive + the subsequent messages. This is the pattern that the original + InternalReadStream>>peekFor: relied on: + ^self peek = token ifTrue: [position := position + 1]; yourself" + | messages | + messages := self cascadeMessagesIn: 'foo + ^true ifTrue: [42]; yourself'. + self assert: messages size = 2. + messages do: [:m | self deny: m isInlined] +] + +{ #category : #'tests - cascade' } +MessageInlinerTest >> testCascadeWhileTrueIsNotInlined [ + "Regression: whileTrue: in a cascade must not be inlined either." + | messages | + messages := self cascadeMessagesIn: 'foo + ^[true] whileTrue: [^1]; yourself'. + messages do: [:m | self deny: m isInlined] +] + +{ #category : #'tests - non-cascade' } +MessageInlinerTest >> testNonCascadeIfTrueIsInlined [ + "Sanity check: outside a cascade, ifTrue: is still inlined as expected." + | ast inlined | + ast := self ast: 'foo + ^true ifTrue: [42]'. + inlined := false. + ast nodesDo: [:n | + (n isMessageNode and: [n isInlined]) + ifTrue: [inlined := true]]. + self assert: inlined +] diff --git a/modules/Compiler/Tests/SmalltalkScannerTest.st b/modules/Compiler/Tests/SmalltalkScannerTest.st new file mode 100644 index 00000000..e7834889 --- /dev/null +++ b/modules/Compiler/Tests/SmalltalkScannerTest.st @@ -0,0 +1,392 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #SmalltalkScannerTest, + #superclass : #TestCase, + #instVars : [ + 'scanner' + ], + #category : #'Compiler.Tests' +} + +{ #category : #setup } +SmalltalkScannerTest >> setUp [ + super setUp. + scanner := SCompiler new smalltalkScanner +] + +{ #category : #numbers } +SmalltalkScannerTest >> test0xNotation [ + | node | + node := SCompiler new smalltalkParser parseExpression: '0x10'. + self assert: node statements first value = 16. + node := SCompiler new smalltalkParser parseExpression: '0X10'. + self assert: node statements first value = 16 +] + +{ #category : #arrays } +SmalltalkScannerTest >> testArrayStart [ + | node | + scanner on: '#()'. + node := scanner next. + self + assert: (node is: #'#('); + assert: node source = '#('. + scanner on: '#['. + node := scanner next. + self + assert: (node is: #'#['); + assert: node source = '#['. + scanner on: '#''hello'''. + node := scanner next. + self + assert: node value = #hello; + assert: node source = '#''hello''' +] + +{ #category : #strings } +SmalltalkScannerTest >> testBinary [ + | node | + scanner on: '- + -- + --- + ==> + ~!|\/%&*+=><'. + node := scanner next. + self + assert: node isBinary; + assert: node value = #'-'; + assert: node source = '-'. + node := scanner next. + self + assert: node isBinary; + assert: node value = #'--'; + assert: node source = '--'. + node := scanner next. + self + assert: node isBinary; + assert: node value = #'---'; + assert: node source = '---'. + node := scanner next. + self + assert: node isBinary; + assert: node value = #'==>'; + assert: node source = '==>'. + node := scanner next. + self + assert: node isBinary; + assert: node value = #'~!|\/%&*+=><'; + assert: node source = '~!|\/%&*+=><' +] + +{ #category : #symbols } +SmalltalkScannerTest >> testBinarySymbol [ + | node | + scanner on: '#='. + node := scanner next. + self + assert: node isLiteral; + assert: node hasSymbol; + assert: node value = #'=' +] + +{ #category : #strings } +SmalltalkScannerTest >> testCharacter [ + | node | + scanner on: '$a$b'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = $a; + assert: node source = '$a'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = $b; + assert: node source = '$b' +] + +{ #category : #colons } +SmalltalkScannerTest >> testColon [ + | node | + scanner on: ':a'. + node := scanner next. + self + assert: (node is: $:); + assert: node source = ':'. + node := scanner next. + self + assert: node value = 'a'; + assert: node source = 'a'. + scanner on: ':='. + node := scanner next. + self + assert: node isAssignment; + assert: node source = ':=' +] + +{ #category : #colons } +SmalltalkScannerTest >> testColon2 [ + | node | + scanner on: '::'. + node := scanner next. + self + assert: node class = SStringToken; + assert: node source = '::' +] + +{ #category : #comments } +SmalltalkScannerTest >> testComment [ + | node | + scanner on: '"comment"a'. + node := scanner next. + self + assert: node isComment; + assert: node value = 'comment'; + assert: node source = '"comment"'. + scanner on: '"comment"'. + node := scanner next. + self + assert: node isComment; + assert: node source = '"comment"'; + assert: node position = 1. + scanner on: 'a"comment"'. + node := scanner next. + self + assert: node isNameToken; + assert: node value = 'a'; + assert: node source = 'a'. + node := scanner next. + self + assert: node isComment; + assert: node position = ('a' size + 1) +] + +{ #category : #symbols } +SmalltalkScannerTest >> testDashStartingSymbol [ + | node | + scanner on: '#++'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'++'; + assert: node source = '#++'. + scanner on: '#//'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'//'; + assert: node source = '#//'. + scanner on: '#--'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'--'; + assert: node source = '#--'. + scanner on: '#+-'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'+-'; + assert: node source = '#+-' +] + +{ #category : #strings } +SmalltalkScannerTest >> testEmpty [ + scanner on: ''. + self assert: scanner next isEnd +] + +{ #category : #numbers } +SmalltalkScannerTest >> testFloat [ + | node | + scanner on: '0.1'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 0.1; + assert: node source = '0.1'. + scanner on: '0.1e-35'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 0.1e-35; + assert: node source = '0.1e-35' +] + +{ #category : #identifiers } +SmalltalkScannerTest >> testIdentifier [ + | node | + scanner on: 'a'. + node := scanner next. + self + assert: node isNameToken; + assert: node value = 'a'; + assert: node source = 'a'. + scanner on: ' a1'. + node := scanner next. + self + assert: node isNameToken; + assert: node value = 'a1'; + assert: node source = 'a1'. + scanner on: '_a'. + node := scanner next. + self + assert: node isNameToken; + assert: node value = '_a'; + assert: node source = '_a'. + scanner on: 'a_1b'. + node := scanner next. + self + assert: node isNameToken; + assert: node value = 'a_1b'; + assert: node source = 'a_1b' +] + +{ #category : #numbers } +SmalltalkScannerTest >> testInteger [ + | node | + scanner on: '0 12 -35'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 0; + assert: node source = '0'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 12; + assert: node source = '12'. + node := scanner next. + self + deny: node isNumberNode; + assert: (node is: #'-'); + assert: node source = '-'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 35; + assert: node source = '35' +] + +{ #category : #numbers } +SmalltalkScannerTest >> testIntegerRadix [ + | node | + scanner on: '16rFA2'. + node := scanner next. + self + assert: node isNumberNode; + assert: node value = 16rFA2; + assert: node source = '16rFA2' +] + +{ #category : #identifiers } +SmalltalkScannerTest >> testKeyword [ + | node node2 | + scanner on: 'a:'. + node := scanner next. + self + assert: node isKeyword; + assert: node value = 'a:'; + assert: node source = 'a:'. + scanner on: 'ab:cd:'. + node := scanner next. + node2 := scanner next. + self + assert: node isKeyword; + assert: node value = 'ab:'; + assert: node source = 'ab:'; + assert: node2 isKeyword; + assert: node2 value = 'cd:'; + assert: node2 source = 'cd:' +] + +{ #category : #symbols } +SmalltalkScannerTest >> testNumericSymbol [ + scanner on: '#35'. + self assert: scanner next value = '35' asSymbol +] + +{ #category : #strings } +SmalltalkScannerTest >> testString [ + | node quote | + scanner on: '' storeString. + node := scanner next. + self + assert: node isLiteral; + assert: node value = ''; + assert: node source = (String with: $' with: $'). + scanner on: 'Hello World!' storeString. + node := scanner next. + self + assert: node isLiteral; + assert: node value = 'Hello World!'; + assert: node source = 'Hello World!' storeString. + quote := String with: $'. + scanner on: quote storeString. + node := scanner next. + self + assert: node isLiteral; + assert: node value = quote; + assert: node source = quote storeString +] + +{ #category : #symbols } +SmalltalkScannerTest >> testSymbol [ + | node | + scanner on: '#-'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'-'; + assert: node source = '#-'. + scanner on: '#a:'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #a:; + assert: node source = '#a:'. + scanner on: '#-!'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #'-!'; + assert: node source = '#-!'. + scanner on: '#a:b:'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #a:b:; + assert: node source = '#a:b:'. + scanner on: '#a:b'. + node := scanner next. + self + assert: node isLiteral; + assert: node value = #a:; + assert: node source = '#a:'. + node := scanner next. + self assert: node value = 'b'; assert: node source = 'b' +] + +{ #category : #unicode } +SmalltalkScannerTest >> testUnicodeScanning [ + "#knownIssue: depends on Character class>>value: path that resolves UnicodeLibrary, + which is not yet defined in the egg Kernel. Re-enable once UnicodeLibrary is ported." + | next three tensor | + #knownIssue. + self assert: false. + scanner on: '3 + 4' asWideString. + self assert: scanner next value = 3. + next := scanner next. + self + assert: next value == #'+'; + assert: scanner next value = 4. + scanner on: '3' asWideString , 8855 asCharacter asString , '4' asWideString. + three := scanner next. + self assert: three value = 3. + tensor := scanner next. + self assert: tensor value = 8855 asCharacter asString asSymbol +] diff --git a/modules/Compiler/Tests/TestsModule.st b/modules/Compiler/Tests/TestsModule.st new file mode 100644 index 00000000..9d7ac0d2 --- /dev/null +++ b/modules/Compiler/Tests/TestsModule.st @@ -0,0 +1,35 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TestsModule, + #superclass : #Module, + #category : #'Compiler.Tests' +} + +{ #category : #spec } +TestsModule >> imports [ + ^{ + #Kernel -> #(Array Error Exception OrderedCollection UndefinedObject WideString WideSymbol). + #Compiler -> #(SCompiler SCompilationError SMethodNode SSelectorNode SMessageNode SStringToken InlinedArgEnvironment MethodScope). + #SUnit -> #(TestCase TestResult TestSuite) + } +] + +{ #category : #main } +TestsModule >> main: anArray [ + | suite result | + Kernel log: 'Running Compiler tests... +'. + suite := TestSuite forModule: self. + Kernel log: 'suite built with ', suite tests size printString, ' tests +'. + result := suite run. + Kernel log: 'Done. +'. + Kernel log: result printString. + Kernel log: ' +' +] diff --git a/modules/EPM/Config.st b/modules/EPM/Config.st new file mode 100644 index 00000000..472fdb80 --- /dev/null +++ b/modules/EPM/Config.st @@ -0,0 +1,128 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #Config, + #superclass : #Object, + #instVars : [ + #project, + #user, + #merged + ], + #category : #EPM +} + +{#category : #'instance creation'} +Config class >> load [ + ^self new load +] + +{#category : #accessing} +Config >> at: aString [ + ^merged at: aString ifAbsent: [nil] +] + +{#category : #accessing} +Config >> at: aString ifAbsent: aBlock [ + ^merged at: aString ifAbsent: aBlock +] + +{#category : #accessing} +Config >> dependencies [ + ^self at: 'dependencies' ifAbsent: [OrderedDictionary new] +] + +{#category : #loading} +Config >> load [ + self loadUser. + self loadProject. + self merge +] + +{#category : #loading} +Config >> loadProject [ + | cwd path contents | + cwd := Kernel currentDirectory. + path := cwd, '/epm.toml'. + (Kernel pathExists: path) + ifTrue: [ + contents := Kernel readFile: path. + project := TOMLParser parse: contents] + ifFalse: [project := OrderedDictionary new] +] + +{#category : #loading} +Config >> loadUser [ + | home path contents | + home := Kernel getEnv: 'HOME'. + home ifNil: [user := OrderedDictionary new. ^self]. + path := home, '/.egg/config.toml'. + (Kernel pathExists: path) + ifTrue: [ + contents := Kernel readFile: path. + user := TOMLParser parse: contents] + ifFalse: [user := OrderedDictionary new] +] + +{#category : #private} +Config >> merge [ + merged := OrderedDictionary new. + self mergeFrom: user into: merged. + self mergeFrom: project into: merged +] + +{#category : #private} +Config >> mergeFrom: aSource into: aTarget [ + aSource keysAndValuesDo: [:k :v | + | existing | + existing := aTarget at: k ifAbsent: [nil]. + (v isKindOf: OrderedDictionary) + ifTrue: [ + (existing isKindOf: OrderedDictionary) + ifTrue: [self mergeFrom: v into: existing] + ifFalse: [ + | copy | + copy := OrderedDictionary new. + self mergeFrom: v into: copy. + aTarget at: k put: copy]] + ifFalse: [aTarget at: k put: v]] +] + +{#category : #accessing} +Config >> modulePaths [ + | paths section | + paths := self at: 'paths'. + paths ifNil: [^#()]. + section := paths at: 'modules' ifAbsent: [nil]. + section ifNil: [^#()]. + ^section +] + +{#category : #accessing} +Config >> name [ + | section | + section := self at: 'project'. + section ifNil: [^nil]. + ^section at: 'name' ifAbsent: [nil] +] + +{#category : #accessing} +Config >> project [ + ^project +] + +{#category : #accessing} +Config >> user [ + ^user +] + +{#category : #accessing} +Config >> version [ + | section | + section := self at: 'project'. + section ifNil: [^nil]. + ^section at: 'version' ifAbsent: [nil] +] + diff --git a/modules/EPM/EPMModule.st b/modules/EPM/EPMModule.st index 56c70381..5f85514c 100644 --- a/modules/EPM/EPMModule.st +++ b/modules/EPM/EPMModule.st @@ -1,81 +1,179 @@ " - Copyright (c) 2022, Javier Pimás. + Copyright (c) 2026, Javier Pimás. See (MIT) license in root directory. " Class { - #name : #EPMModule, - #superclass : #Module, + #name : #EPMModule, + #superclass : #Module, #instVars : [ - 'args', - 'dir', - 'projectName' + #args, + #parser, + #config ], - #category : #'Powerlang-Core-LMR' + #category : #EPM } -{ #category : #spec } -EPMModule >> imports [ - ^{ - #Kernel -> #(Error ByteArray Module NumberParser Species OrderedDictionary). - #FileSystem. - #ArgParser -> ArgParser. - } +{#category : #private} +EPMModule >> addCommand: aString description: aDescription action: aSymbol [ + | command | + command := Command new + name: aString; + description: aDescription. + command action: [:result | self perform: aSymbol with: result]. + parser addCommand: command. + ^command ] -{ #category : #initializing } -EPMModule >> main: aCollection [ - args := aCollection - parser := ArgParser for: self. - parser evaluate +{#category : #commands} +EPMModule >> commandDev: aResult [ + | cwd name dev module | + cwd := Kernel currentDirectory. + name := cwd copyAfterLast: $/. + dev := Kernel load: #Development. + module := Kernel load: name asSymbol. + dev main: args ] -{ #category : #private } -EPMModule >> createModuleClass [ - | classname filename | - classname := projectName, 'Module'. - filename := dir / classname , '.st'. - filename writeStreamDo: [:ws | - ws - cr; nextPutAll: 'Class {'; crtab; - nextPutAll: '#name : #Module'; crtab; - nextPutAll: '#superclass : #', supername; crtab; - nextPutAll: '#category : #', projectName; cr; - nextPut: $}; cr; cr; - nextPutAll: '{ #category: #main }'; cr; - nextPutAll: classname; nextPutAll: ' >> main: args ['; cr; - nextPut: $]; cr; cr - ] +{#category : #commands} +EPMModule >> commandInit: aResult [ + | cwd name generator | + cwd := Kernel currentDirectory. + name := cwd copyAfterLast: $/. + name isEmpty ifTrue: [name := 'myproject']. + generator := ProjectGenerator new. + generator name: name. + generator dir: cwd. + generator generateToml. + Kernel log: 'Initialized epm.toml for: ', name ] -{ #category : #commands } -EPMModule >> dev [ - | dev module | - dir := '.' asFilename asDirectory. - projectName := dir parent name. - dev := Kernel load: #Development. - module := Kernel load: projectName. - dev main: args +{#category : #commands} +EPMModule >> commandInstall: aResult [ + | dependencies | + config ifNil: [self loadConfig]. + dependencies := config dependencies. + dependencies isEmpty ifTrue: [ + Kernel log: 'No dependencies to install'. + ^self]. + Kernel log: 'Installing dependencies...'. + dependencies keysAndValuesDo: [:name :constraint | + Kernel log: ' ', name, ' (', constraint asString, ')']. + Kernel log: 'Done' +] + +{#category : #commands} +EPMModule >> commandList: aResult [ + | dependencies | + config ifNil: [self loadConfig]. + dependencies := config dependencies. + dependencies isEmpty ifTrue: [ + Kernel log: 'No dependencies'. + ^self]. + Kernel log: config name ifNil: ['(unnamed project)']. + dependencies keysAndValuesDo: [:name :constraint | + Kernel log: ' ', name, ' ', constraint asString] ] -{ #category : #commands } -EPMModule >> new: aString [ - projectName := aString. - dir := projectName asFilename asDirectory. - dir exists - ifTrue: [ - dir isEmpty ifTrue: [ ^self error: 'the directory is not empty']] - ifFalse: [ dir create ]. - - self createModuleClass +{#category : #commands} +EPMModule >> commandNew: aResult [ + | name generator | + aResult positionals isEmpty ifTrue: [ + ^self error: 'Usage: epm new ']. + name := aResult positionalAt: 1. + generator := ProjectGenerator new. + generator name: name. + generator generate. + Kernel log: 'Created project: ', name ] -{ #category : #commands } -EPMModule >> start [ - | module | - dir := '.' asFilename asDirectory. - projectName := dir parent name. - module := Kernel load: projectName. +{#category : #commands} +EPMModule >> commandStart: aResult [ + | cwd name module | + cwd := Kernel currentDirectory. + name := cwd copyAfterLast: $/. + module := Kernel load: name asSymbol. module main: args ] +{#category : #commands} +EPMModule >> commandTest: aResult [ + | name module positionals | + positionals := aResult positionals. + name := positionals isEmpty + ifTrue: [self projectName] + ifFalse: [positionals first]. + name ifNil: [ + ^self error: 'Usage: epm test [module-name]']. + module := Kernel load: (name, '.Tests') asSymbol. + (aResult optionAt: 'debug') == true + ifTrue: [self debugTestModule: module] + ifFalse: [module main: args] +] + +{#category : #commands} +EPMModule >> debugTestModule: aModule [ + Kernel log: 'Running ', aModule name, ' tests in debug mode...'; log: String cr. + (TestSuite forModule: aModule) runDebug. + Kernel log: 'Done.'; log: String cr +] + +{#category : #spec} +EPMModule >> imports [ + ^{ + #Kernel -> #(Error OrderedDictionary). + #ArgParser -> #(ArgParser Command Option). + #SUnit -> #(TestSuite). + #TOML -> #(TOMLParser TOMLWriter). + } +] + +{#category : #private} +EPMModule >> loadConfig [ + | host | + config := Config load. + host := Kernel host. + config modulePaths do: [:path | + host prependSearchPath: path type: #tonel. + host prependSearchPath: path type: #ems] +] + +{#category : #main} +EPMModule >> main: aCollection [ + | params | + args := aCollection. + self loadConfig. + self setupParser. + params := OrderedCollection new. + 3 to: args size do: [:i | + | arg | + arg := args at: i. + params add: arg]. + parser parse: params asArray +] + +{#category : #accessing} +EPMModule >> projectName [ + ^config ifNotNil: [config name] +] + +{#category : #private} +EPMModule >> setupParser [ + | create | + parser := ArgParser new name: 'epm'; description: 'Egg Package Manager'. + create := self addCommand: 'new' description: 'Create a new project' action: #commandNew:. + create addPositional: 'name'. + self addCommand: 'init' description: 'Initialize epm.toml in current directory' action: #commandInit:. + self addCommand: 'start' description: 'Run current project' action: #commandStart:. + self addCommand: 'dev' description: 'Start development environment for current project' action: #commandDev:. + self addCommand: 'install' description: 'Install dependencies from epm.toml' action: #commandInstall:. + self addCommand: 'list' description: 'List project dependencies' action: #commandList:. + ((self addCommand: 'test' description: 'Run tests for a module' action: #commandTest:) + addPositional: 'module'; + addOption: (Option new + long: '--debug'; + description: 'Run tests without swallowing exceptions'; + beFlag; + yourself)) +] + diff --git a/modules/EPM/ProjectGenerator.st b/modules/EPM/ProjectGenerator.st new file mode 100644 index 00000000..61cbd01a --- /dev/null +++ b/modules/EPM/ProjectGenerator.st @@ -0,0 +1,90 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #ProjectGenerator, + #superclass : #Object, + #instVars : [ + #name, + #dir + ], + #category : #EPM +} + +{#category : #accessing} +ProjectGenerator >> dir: aString [ + dir := aString +] + +{#category : #private} +ProjectGenerator >> ensureDir [ + dir ifNil: [ + dir := Kernel currentDirectory, '/', name]. + Kernel createDirectory: dir +] + +{#category : #generating} +ProjectGenerator >> generate [ + | modules module | + self ensureDir. + modules := dir, '/modules'. + module := modules, '/', self moduleName. + Kernel createDirectory: modules. + Kernel createDirectory: module. + self generateToml. + self generateModule: module +] + +{#category : #generating} +ProjectGenerator >> generateModule: aString [ + | filename contents module | + module := self moduleName. + filename := aString, '/', module, 'Module.st'. + contents := '" + See (MIT) license in root directory. +" + +Class { + #name : #', module, 'Module, + #superclass : #Module, + #instVars : [], + #category : #', module, ' +} + +{ #category : #main } +', module, 'Module >> main: args [ + Kernel log: ''Hello from ', name, ''' +] +'. + Kernel writeFile: filename contents: contents +] + +{#category : #generating} +ProjectGenerator >> generateToml [ + | filename contents | + self ensureDir. + filename := dir, '/epm.toml'. + contents := '[project] +name = "', name, '" +version = "0.1.0" +description = "" + +[dependencies] +'. + Kernel writeFile: filename contents: contents +] + +{#category : #accessing} +ProjectGenerator >> moduleName [ + | first | + first := name first asUppercase. + ^(String with: first), (name copyFrom: 2) +] + +{#category : #accessing} +ProjectGenerator >> name: aString [ + name := aString +] + diff --git a/modules/Kernel/BareTests/BareTestsModule.st b/modules/Kernel/BareTests/BareTestsModule.st index aacf0cce..59f1aab6 100644 --- a/modules/Kernel/BareTests/BareTestsModule.st +++ b/modules/Kernel/BareTests/BareTestsModule.st @@ -298,10 +298,147 @@ BareTestsModule >> test180FallbackToLargeNegativeInteger [ ^result = '-18446744073709551616' ] +{ #category : #tests } +BareTestsModule >> test182BitShiftRightFullWidth [ + "SmallInteger>>bitShift: with negative shift >= word width must yield 0 + (or -1 for negative receivers), not undefined behaviour from C++." + ^(1 bitShift: -64) == 0 + and: [(1 bitShift: -100) == 0 + and: [(-1 bitShift: -64) == -1 + and: [(16rFFFFFFFFFFFFFFFF + 1) = 16r10000000000000000]]] +] + +{ #category : #tests } +BareTestsModule >> test183FloatTimesTwoPower [ + ^(1.0 timesTwoPower: 0) = 1.0 + and: [(1.0 timesTwoPower: 10) = 1024.0 + and: [(1.0 timesTwoPower: -10) = (1.0 / 1024.0) + and: [(1.0 timesTwoPower: -107) > 0.0 + and: [(1 / 10000000000000000) asFloat > 0.0]]]] +] + { #category : #tests } BareTestsModule >> test181FallbackToLargePositiveInteger [ | result | result := 1. 18 timesRepeat: [result := result * 10]. ^result printString = '1000000000000000000' +] + +{ #category : #tests } +BareTestsModule >> test200BitAndSmallIntegers [ + ^(16rAAAA bitAnd: 16r00FF) == 16r00AA +] + +{ #category : #tests } +BareTestsModule >> test201BitAndSmallIntWithLargeInt [ + ^(16rAA bitAnd: 16rFFFFFFFFFFFFFFFF) == 16rAA +] + +{ #category : #tests } +BareTestsModule >> test202BitAndLargeIntWithSmallInt [ + ^(16rFFFFFFFFFFFFFFFF bitAnd: 16rAA) == 16rAA +] + +{ #category : #tests } +BareTestsModule >> test210BitOrSmallIntegers [ + ^(16rAA00 bitOr: 16r00AA) == 16rAAAA +] + +{ #category : #tests } +BareTestsModule >> test211BitOrSmallIntWithLargeInt [ + ^(16rAA bitOr: 16rAA00000000) = 16rAA000000AA +] + +{ #category : #tests } +BareTestsModule >> test212BitOrLargeIntWithSmallInt [ + ^(16rAA00000000 bitOr: 16rAA) = 16rAA000000AA +] + +{ #category : #tests } +BareTestsModule >> test220BitXorSmallIntegers [ + ^(16rAAAA bitXor: 16r00FF) == 16rAA55 +] + +{ #category : #tests } +BareTestsModule >> test221BitXorSmallIntWithLargeInt [ + ^(16rAA bitXor: 16rAA000000AA) = 16rAA00000000 +] + +{ #category : #tests } +BareTestsModule >> test222BitXorLargeIntWithSmallInt [ + ^(16rAA000000AA bitXor: 16rAA) = 16rAA00000000 +] + +{ #category : #tests } +BareTestsModule >> test230BitShiftLeftOverflow [ + ^(1 bitShift: 64) = 16r10000000000000000 +] + +{ #category : #tests } +BareTestsModule >> test231BitShiftRightNegativeArg [ + ^(16rAA00 bitShift: -8) == 16rAA +] + +{ #category : #tests } +BareTestsModule >> test232BitShiftRightSelector [ + ^(16rAA00 bitShiftRight: 8) == 16rAA +] + +{ #category : #main } +BareTestsModule >> runTest: selector [ + | result | + result := self perform: selector. + result == true + ifTrue: [Kernel log: ' PASS: ', selector, ' +'] + ifFalse: [Kernel log: ' FAIL: ', selector, ' returned: ', result printString, ' +']. + ^result == true +] + +{ #category : #main } +BareTestsModule >> main: anArray [ + | passed failed tests | + Kernel log: 'Running Kernel.BareTests... +'. + passed := 0. failed := 0. + tests := #( + test001SendYourself test010SendEqualsEquals test020SendPlus + test025SendQuotient test0 + test030UseTemporary test031SendIfFalseToTrue test031UseManyTemporaries + test032SendIfTrueToTrue + test040SendIfNotNil test040UseArg + test041SendIfNotNilWithArgs test041UseArgsInOrder + test042SendIfNotNilWithNonEvaluableArg test042IntegerOperations + test043HashMultiply test049HashSymbol + test100EvaluateClosure test100SendWhileTrue + test110SendToDo test120SendTimesRepeat test121SendTimesRepeatInsideBlock + test130EvaluateClosureWithArgs test131EvaluateClosureWithArgsAndTemps + test132EvaluateClosureWithArgsAndMethodTemps + test133EvaluateClosureWithArgsAndMultipleTemps + test140EvaluateClosureNonLocalReturn + test150EvaluateNestedClosure test151EvaluateNestedClosureWithArgs + test152EvaluateNestedClosureWithTemps + test153EvaluateInlinedClosureInClosureWithMethodTemps + test160ConcatenateStrings test161CreateDictionary + test162AddAssociationToDictionary + test170SmallIntegerPrintString test171LargeIntegerPrintString + test172NegativeIntegerPrintString + test180FallbackToLargeNegativeInteger test181FallbackToLargePositiveInteger + test182BitShiftRightFullWidth test183FloatTimesTwoPower + test200BitAndSmallIntegers test201BitAndSmallIntWithLargeInt + test202BitAndLargeIntWithSmallInt + test210BitOrSmallIntegers test211BitOrSmallIntWithLargeInt + test212BitOrLargeIntWithSmallInt + test220BitXorSmallIntegers test221BitXorSmallIntWithLargeInt + test222BitXorLargeIntWithSmallInt + test230BitShiftLeftOverflow test231BitShiftRightNegativeArg + test232BitShiftRightSelector). + tests do: [:sel | + (self runTest: sel) + ifTrue: [passed := passed + 1] + ifFalse: [failed := failed + 1]]. + Kernel log: tests size printString, ' run, ', passed printString, ' passed, ', failed printString, ' failed +' ] \ No newline at end of file diff --git a/modules/Kernel/BareTests/README.md b/modules/Kernel/BareTests/README.md new file mode 100644 index 00000000..4c2368d5 --- /dev/null +++ b/modules/Kernel/BareTests/README.md @@ -0,0 +1,22 @@ +# Kernel.BareTests + +Lightweight kernel tests that run without SUnit. Each test method returns `true` +on success. + +## Running + +```bash +cd runtime/cpp/build/Darwin-arm64-Debug # on MacOS +./egg Kernel.BareTests +``` + +## Expected output + +``` +Running Kernel.BareTests... + PASS: test001SendYourself + ... + PASS: test232BitShiftAndRotate +X run, X passed, 0 failed +``` + diff --git a/modules/Kernel/HostSystem.st b/modules/Kernel/HostSystem.st index 7a61e24c..3c94a35e 100644 --- a/modules/Kernel/HostSystem.st +++ b/modules/Kernel/HostSystem.st @@ -6,9 +6,27 @@ Class { #name : #HostSystem, #superclass : #Object, + #instVars : [ + 'searchPaths' + ], #category : #Kernel } +{ #category : #'search paths' } +HostSystem >> addSearchPath: aString type: aSymbol [ + "Append a search path. aSymbol is #ems or #tonel." + self searchPaths add: aString -> aSymbol +] + +{ #category : #services } +HostSystem >> createDirectory: aString [ + +] + +{ #category : #services } +HostSystem >> currentDirectory [ + +] HostSystem >> currentMilliseconds [ @@ -23,20 +41,51 @@ HostSystem >> fixOverrides [ ] +{ #category : #services } +HostSystem >> getEnv: aString [ + +] + +{ #category : #'search paths' } +HostSystem >> homeDirectory [ + ^self isWindows + ifTrue: [self getEnv: 'USERPROFILE'] + ifFalse: [self getEnv: 'HOME'] +] + { #category : #bootstrapping } HostSystem >> initializeFFI: anExternalLibrary symbolFinder: anFFIMethod [ ] -{ #category : #loading } -HostSystem >> primitiveLoad: aSymbol [ - +{ #category : #'search paths' } +HostSystem >> isWindows [ + ^(self platformName indexOfSubcollection: 'win32') > 0 ] { #category : #loading } HostSystem >> load: aSymbol [ + "Search configured paths for a module named aSymbol. Each search path is + an association `path -> type` where type is #ems (binary segment file) or + #tonel (sources directory)." + | relative | + relative := self pathFor: aSymbol. + searchPaths ifNil: [^nil]. + searchPaths do: [:assoc | | base type candidate | + base := assoc key. + type := assoc value. + candidate := type = #ems + ifTrue: [base , '/' , relative , '.ems'] + ifFalse: [base , '/' , relative]. + (self pathExists: candidate) ifTrue: [ + ^self loadModuleFromPath: candidate]]. + ^nil +] + +{ #category : #loading } +HostSystem >> loadModuleFromPath: aString [ | module | - module := self primitiveLoad: aSymbol. + module := self primitiveLoadModuleFromPath: aString. ^module justLoaded ] @@ -46,13 +95,18 @@ HostSystem >> log: aString level: anInteger [ ] { #category : #logging } -HostSystem >> logTrace: aString [ - self log: aString level: 0 +HostSystem >> logDebug: aString [ + self log: aString level: 1 ] { #category : #logging } -HostSystem >> logDebug: aString [ - self log: aString level: 1 +HostSystem >> logError: aString [ + self log: aString level: 4 +] + +{ #category : #logging } +HostSystem >> logFatal: aString [ + self log: aString level: 5 ] { #category : #logging } @@ -60,32 +114,96 @@ HostSystem >> logInfo: aString [ self log: aString level: 2 ] +{ #category : #logging } +HostSystem >> logTrace: aString [ + self log: aString level: 0 +] + { #category : #logging } HostSystem >> logWarning: aString [ self log: aString level: 3 ] -{ #category : #logging } -HostSystem >> logError: aString [ - self log: aString level: 4 +{ #category : #services } +HostSystem >> pathExists: aString [ + ] -{ #category : #logging } -HostSystem >> logFatal: aString [ - self log: aString level: 5 +{ #category : #loading } +HostSystem >> pathFor: aSymbol [ + ^aSymbol asString replaceAll: '.' with: '/' +] +{ #category : #'search paths' } +HostSystem >> pathSeparator [ + "Separator used by the host OS in PATH-like environment variables." + ^self isWindows ifTrue: [$;] ifFalse: [$:] ] + { #category : #loading } HostSystem >> platformName [ ] +{ #category : #'search paths' } +HostSystem >> prependSearchPath: aString type: aSymbol [ + self searchPaths addFirst: aString -> aSymbol +] + +{ #category : #loading } +HostSystem >> primitiveLoad: aSymbol [ + +] + +{ #category : #loading } +HostSystem >> primitiveLoadModuleFromPath: aString [ + +] + +{ #category : #services } +HostSystem >> readFile: filename [ + +] + +{ #category : #'search paths' } +HostSystem >> searchPaths [ + "Ordered collection of associations `path -> type` (#ems or #tonel)." + ^searchPaths ifNil: [searchPaths := OrderedCollection new] +] + +{ #category : #'search paths' } +HostSystem >> setupDefaultSearchPaths [ + "Configure default search paths. EGG_MODULES_PATH (paths separated by the + platform-native PATH separator) supplies extra source directories. We also + walk a few levels up from the current directory so the runtime works both + from the project root and from a build subdirectory." + | env home cwd prefixes | + env := self getEnv: 'EGG_MODULES_PATH'. + env ifNotNil: [ + (env substringsSplitBy: self pathSeparator) do: [:p | + self addSearchPath: p type: #tonel. + self addSearchPath: p type: #ems]]. + cwd := self currentDirectory. + prefixes := #('' '../' '../../' '../../../' '../../../../'). + prefixes do: [:p | | base | + base := cwd , '/' , p , 'modules'. + (self pathExists: base) ifTrue: [ + self addSearchPath: base type: #tonel. + self addSearchPath: base type: #ems]]. + home := self homeDirectory. + home ifNotNil: [ + self addSearchPath: home , '/.egg/cache/modules' type: #ems]. + self isWindows ifFalse: [ + self addSearchPath: '/usr/local/share/egg/modules' type: #tonel. + self addSearchPath: '/usr/local/share/egg/modules' type: #ems] +] + { #category : #private } HostSystem >> suspended: aProcess because: anException [ ] { #category : #services } -HostSystem >> readFile: filename [ - +HostSystem >> writeFile: aFilename contents: aString [ + ] diff --git a/modules/Kernel/InternalReadStream.st b/modules/Kernel/InternalReadStream.st index 9a652963..7957256b 100644 --- a/modules/Kernel/InternalReadStream.st +++ b/modules/Kernel/InternalReadStream.st @@ -199,7 +199,10 @@ InternalReadStream >> peek [ { #category : #peeking } InternalReadStream >> peekFor: token [ - ^self peek = token ifTrue: [position := position + 1]; yourself + | found | + found := self peek = token. + found ifTrue: [position := position + 1]. + ^found ] { #category : #accessing } diff --git a/modules/Kernel/KernelModule.st b/modules/Kernel/KernelModule.st index 59654c54..88cc1b1e 100644 --- a/modules/Kernel/KernelModule.st +++ b/modules/Kernel/KernelModule.st @@ -162,6 +162,36 @@ KernelModule >> readFile: filename [ ^host readFile: filename ] +{ #category : #services } +KernelModule >> writeFile: aFilename contents: aString [ + ^host writeFile: aFilename contents: aString +] + +{ #category : #services } +KernelModule >> createDirectory: aString [ + ^host createDirectory: aString +] + +{ #category : #services } +KernelModule >> pathExists: aString [ + ^host pathExists: aString +] + +{ #category : #services } +KernelModule >> currentDirectory [ + ^host currentDirectory +] + +{ #category : #services } +KernelModule >> getEnv: aString [ + ^host getEnv: aString +] + +{ #category : #services } +KernelModule >> loadModuleFromPath: aString [ + ^host loadModuleFromPath: aString +] + { #category : #services } KernelModule >> load: aSymbol [ | filename | @@ -254,6 +284,7 @@ KernelModule >> unhandledExceptionHandler: aClosure [ { #category : #loading } KernelModule >> useHostModuleLoader [ + host setupDefaultSearchPaths. self addModuleLoader: host. ] diff --git a/modules/Kernel/Object.st b/modules/Kernel/Object.st index 708ae04c..6c4ddb0f 100644 --- a/modules/Kernel/Object.st +++ b/modules/Kernel/Object.st @@ -200,7 +200,7 @@ Object >> disableCode: aBlock [ { #category : #private } Object >> ensurePropertyTable [ - ^Smalltalk ensurePropertyTableFor: self + ^Kernel ensurePropertyTableFor: self ] { #category : #evaluating } @@ -782,7 +782,7 @@ Object >> propertyAt: aSymbol put: anObject [ { #category : #private } Object >> propertyTable [ - ^(Smalltalk propertiesFor: self) ifNil: [IdentityDictionary new] + ^(Kernel propertiesFor: self) ifNil: [IdentityDictionary new] ] { #category : #'event handling' } @@ -867,7 +867,7 @@ Object >> removeProperty: aSymbol [ { #category : #'accessing-properties' } Object >> removeProperty: aSymbol ifAbsent: aBlock [ | t | - t := Smalltalk propertiesFor: self. + t := Kernel propertiesFor: self. t notNil ifTrue: [t removeKey: aSymbol ifAbsent: aBlock] ] diff --git a/modules/Kernel/ProtoObject.st b/modules/Kernel/ProtoObject.st index 81c9282f..d32b96cf 100644 --- a/modules/Kernel/ProtoObject.st +++ b/modules/Kernel/ProtoObject.st @@ -109,6 +109,17 @@ ProtoObject >> _size [ ^self errorVMSpecific ] +{ #category : #'raw access' } +ProtoObject >> _uLongAtOffset: offset [ + ^self _primitiveULongAtOffset: offset +] + +{ #category : #'raw access' } +ProtoObject >> _uLongAtValidOffset: offset put: anInteger [ + self _primitiveULongAtOffset: offset put: anInteger. + ^anInteger +] + { #category : #comparing } ProtoObject >> ~~ anObject [ ^self == anObject ifTrue: [false] ifFalse: [true] diff --git a/modules/Kernel/ReadStream.st b/modules/Kernel/ReadStream.st index a13f7309..907a1e25 100644 --- a/modules/Kernel/ReadStream.st +++ b/modules/Kernel/ReadStream.st @@ -339,16 +339,19 @@ ReadStream >> peek: n [ { #category : #peeking } ReadStream >> peekFor: token [ - ^self peek = token ifTrue: [self skip]; yourself + | found | + found := self peek = token. + found ifTrue: [self skip]. + ^found ] { #category : #peeking } ReadStream >> peekForAll: aCollection [ - | n | + | n found | n := aCollection size. - ^((self peek: n) equalsTo: aCollection) - ifTrue: [self skip: n]; - yourself + found := (self peek: n) equalsTo: aCollection. + found ifTrue: [self skip: n]. + ^found ] { #category : #'reverse reading' } @@ -532,7 +535,7 @@ ReadStream >> upToClosing: open [ " '|things||some|-}blah' readStream next; upToClosing: $| " - | start end result | + | start end result opens | start := self position. [ self upTo: open. @@ -540,7 +543,9 @@ ReadStream >> upToClosing: open [ self position: start. ^nil]. self skip: 1. - self peek = open ifTrue: [self next]; yourself] whileTrue. + opens := self peek = open. + opens ifTrue: [self next]. + opens] whileTrue. end := self position. self position: start. result := self next: end - start - 1. diff --git a/modules/Kernel/VM/SmallInteger.st b/modules/Kernel/VM/SmallInteger.st index af13fd41..d1c9733b 100644 --- a/modules/Kernel/VM/SmallInteger.st +++ b/modules/Kernel/VM/SmallInteger.st @@ -8,6 +8,7 @@ Extension { #name : #SmallInteger } { #category : '*Primitives' } SmallInteger >> - aNumber [ + aNumber _isSmallInteger ifTrue: [^self asLargeInteger - aNumber]. ^(aNumber - self) negated ] @@ -20,6 +21,7 @@ SmallInteger >> != aNumber [ { #category : '*Primitives' } SmallInteger >> * aNumber [ + aNumber _isSmallInteger ifTrue: [^self asLargeInteger * aNumber]. ^aNumber * self ] @@ -36,6 +38,7 @@ SmallInteger >> \\ aNumber [ { #category : '*Primitives' } SmallInteger >> + aNumber [ + aNumber _isSmallInteger ifTrue: [^self asLargeInteger + aNumber]. ^aNumber + self ] @@ -59,22 +62,37 @@ SmallInteger >> >= aNumber [ { #category : '*Primitives' } SmallInteger >> bitAnd: anInteger [ + | sum | + anInteger isInteger ifFalse: [^anInteger bitAnd: self]. + (self < 0 and: [anInteger < 0]) ifTrue: [^anInteger bitAnd: self]. + sum := 0. + WordSize to: 1 by: -1 do: [:i | | ai si | + ai := anInteger at: i. + si := self at: i. + sum := (sum bitShift: 8) + (si bitAnd: ai)]. + ^sum ] { #category : '*Primitives' } SmallInteger >> bitOr: anInteger [ + ^anInteger bitOr: self ] { #category : '*Primitives' } SmallInteger >> bitXor: anInteger [ + ^anInteger bitXor: self ] { #category : '*Primitives' } SmallInteger >> bitShift: anInteger [ + anInteger = 0 ifTrue: [^self]. + ^anInteger > 0 + ifTrue: [self bitShiftLeft: anInteger] + ifFalse: [self bitShiftRight: 0 - anInteger] ] { #category : '*Primitives' } diff --git a/modules/Kernel/WideSymbol.st b/modules/Kernel/WideSymbol.st index f3c9e290..e82b9865 100644 --- a/modules/Kernel/WideSymbol.st +++ b/modules/Kernel/WideSymbol.st @@ -22,7 +22,7 @@ WideSymbol class >> findInterned: aString [ { #category : #accessing } WideSymbol class >> intern: aString [ | symbol canonical | - self isByteCompliant ifTrue: [^self reduced asSymbol]. + aString isByteCompliant ifTrue: [^aString reduced asSymbol]. symbol := aString bytes changeClassTo: self. canonical := SymbolTable lookup: symbol. canonical == nil ifTrue: [^SymbolTable add: symbol]. diff --git a/modules/Random/Random.st b/modules/Random/Random.st new file mode 100644 index 00000000..c5305f8e --- /dev/null +++ b/modules/Random/Random.st @@ -0,0 +1,141 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. + + Based on Random from Pharo (MIT license). + PCG implementation: (c) 2014 M.E. O'Neill / pcg-random.org (Apache License 2.0). + + Implements a PCG XSH-RR random number generator with 64-bit internal state. + The generator state is held in a 1-element Array (`state`) which is mutated + in place by `primitiveRandomNumber:`. The `seed` ivar tracks the current + state value (updated before each step in `privateNextValue`), so that + `seed` reflects the generator's evolving position rather than just + the initial seed. Sending `seed:` resets both the `seed` ivar and + the internal state (via `setStateFromSeed`), allowing save/restore + of the generator's position. +" + +Class { + #name : #Random, + #superclass : #Object, + #instVars : [ + 'state', + 'seed' + ], + #category : #Random +} + +{ #category : #'instance creation' } +Random class >> new [ + ^super new useClockBasedSeed +] + +{ #category : #'instance creation' } +Random class >> seed: anInteger [ + ^super new seed: anInteger +] + +{ #category : #private } +Random class >> primitiveRandomNumber: aState [ + "Answer a random integer in the interval [0, 16r7FFFFFFF]. + Implements the PCG XSH-RR algorithm on the 64-bit state." + | returnValue count xorshifted | + returnValue := aState first. + count := returnValue bitShiftRight: 59. + aState + at: 1 + put: (returnValue * 6364136223846793005 + + 1442695040888963407 + bitAnd: 16rFFFFFFFFFFFFFFFF). + returnValue := (returnValue bitShiftRight: 18) bitXor: returnValue. + xorshifted := (returnValue bitShiftRight: 27) bitAnd: 16rFFFFFFFF. + ^((xorshifted bitShiftRight: count) + bitOr: (xorshifted bitShift: (count negated bitAnd: 31))) + bitAnd: 16r7FFFFFFF +] + +{ #category : #accessing } +Random >> maxValue [ + ^16r7FFFFFFF +] + +{ #category : #accessing } +Random >> next [ + "Answer a random Float in the interval [0 to 1)." + ^self privateNextValue asFloat / (self maxValue + 1) +] + +{ #category : #accessing } +Random >> next: anInteger [ + ^self next: anInteger into: (Array new: anInteger) +] + +{ #category : #accessing } +Random >> next: anInteger into: anArray [ + 1 to: anInteger do: [:index | anArray at: index put: self next]. + ^anArray +] + +{ #category : #accessing } +Random >> nextBetween: lowerBound and: higherBound [ + "Answer a random float number from the range [lowerBound, higherBound)" + ^lowerBound + (self next * (higherBound - lowerBound)) +] + +{ #category : #accessing } +Random >> nextInteger: anInteger [ + "Answer a random integer in the interval [1, anInteger]." + anInteger strictlyPositive ifFalse: [ + self error: 'Range must be positive']. + ^(self privateNextValue / (self maxValue + 1) * anInteger) truncated + 1 +] + +{ #category : #accessing } +Random >> nextIntegerBetween: lowerBound and: higherBound [ + "Answer a random integer number from the inclusive range [lowerBound, higherBound]" + ^lowerBound + (self nextInteger: higherBound - lowerBound + 1) - 1 +] + +{ #category : #accessing } +Random >> state [ + ^state +] + +{ #category : #initialization } +Random >> initialize [ + super initialize. + state := Array new: 1 +] + +{ #category : #initialization } +Random >> seed: aNumber [ + seed := aNumber asInteger. + self setStateFromSeed +] + +{ #category : #initialization } +Random >> setStateFromSeed [ + state at: 1 put: seed + 1442695040888963407. + self class primitiveRandomNumber: state +] + +{ #category : #initialization } +Random >> useClockBasedSeed [ + "Set seed based on the host millisecond clock." + [ + seed := (Kernel host currentMilliseconds bitAnd: 16rFFFFFFFFFFFFFFFF) + bitXor: self hash. + seed = 0] whileTrue. + self setStateFromSeed +] + +{ #category : #private } +Random >> privateNextValue [ + seed := state at: 1. + ^self class primitiveRandomNumber: state +] + +{ #category : #private } +Random >> seed [ + ^seed +] diff --git a/modules/Random/RandomModule.st b/modules/Random/RandomModule.st new file mode 100644 index 00000000..02a1025a --- /dev/null +++ b/modules/Random/RandomModule.st @@ -0,0 +1,23 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #RandomModule, + #superclass : #Module, + #instVars : [], + #category : #Random +} + +{ #category : #spec } +RandomModule >> imports [ + ^{ + #Kernel -> #(Float). + } +] + +{ #category : #spec } +RandomModule >> exports [ + ^#(Random) +] diff --git a/modules/Random/Tests/RandomTest.st b/modules/Random/Tests/RandomTest.st new file mode 100644 index 00000000..b918f833 --- /dev/null +++ b/modules/Random/Tests/RandomTest.st @@ -0,0 +1,156 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #RandomTest, + #superclass : #TestCase, + #instVars : [ + 'gen' + ], + #category : #'Random.Tests' +} + +{ #category : #running } +RandomTest >> setUp [ + super setUp. + gen := Random seed: 112629 +] + +{ #category : #helpers } +RandomTest >> generateNewSequenceOfSize: aNumber withGenerator: aRandom [ + | results | + results := Array new: aNumber. + 1 to: aNumber do: [:i | + results at: i put: (aRandom nextIntegerBetween: 0 and: 16r7FFFFFFF)]. + ^results +] + +{ #category : #tests } +RandomTest >> testNext [ + 10000 timesRepeat: [ + | next | + next := gen next. + self assert: next >= 0. + self assert: next < 1] +] + +{ #category : #tests } +RandomTest >> testNextBetweenAnd [ + 10000 timesRepeat: [ + | next | + next := gen nextBetween: -10 and: 5. + self assert: next >= -10. + self assert: next < 5] +] + +{ #category : #tests } +RandomTest >> testNextInteger [ + | int | + int := gen nextInteger: 256. + self assert: int isInteger. + self assert: (int between: 1 and: 256) +] + +{ #category : #tests } +RandomTest >> testNextIntegerBetweenAnd [ + 10000 timesRepeat: [ + | next | + next := gen nextIntegerBetween: -3 and: 5. + self assert: next isInteger. + self assert: (next between: -3 and: 5)] +] + +{ #category : #tests } +RandomTest >> testNextInto [ + | array | + array := Array new: 8. + array := gen next: 8 into: array. + array do: [:each | + self assert: each isFloat. + self assert: each >= 0. + self assert: each < 1] +] + +{ #category : #tests } +RandomTest >> testPrimitiveRandomGeneration1 [ + self + assert: + (self generateNewSequenceOfSize: 100 withGenerator: (gen seed: 42)) + equals: + #( 1123384278 1795671209 1924641435 1143034755 1974427309 + 1757328946 1271345452 1441777623 2062288904 2131966645 + 898244057 1731076225 106581468 2027215766 1693907025 251273835 + 872213210 736288324 449296931 1097054660 1174969416 1493913367 + 727525072 1895983019 2045651843 601924129 790240170 274078875 + 85460756 661429448 1551229168 1346782475 331806902 2001698278 + 1653883637 969082088 58484890 877451631 106813142 879896758 + 1465586082 23970619 222390255 1872327836 132911854 169678554 + 685616442 1905681248 1101299353 543443413 10498198 1567420440 + 8429346 246076037 463063522 1131711124 352105423 1100034716 + 1647334895 972870151 1415116872 419609241 810313841 1259894043 + 1073135744 969363312 1568528795 664453680 1680803242 460188684 + 1401049631 1589666895 324952468 2133811574 2125455444 1469931429 + 387789563 881643062 1751561152 16000064 56618491 1697760127 + 1756201022 566280575 1702381265 413278598 514120285 2138146072 + 1826857778 878121720 1338240606 1068671382 403284681 815313315 + 391109809 395500445 774308967 509549754 1714383213 1069569682 ) +] + +{ #category : #tests } +RandomTest >> testPrimitiveRandomGeneration2 [ + self + assert: + (self generateNewSequenceOfSize: 100 withGenerator: (gen seed: 4112000)) + equals: + #( 911034467 623064739 2069461119 1776170644 2133203577 1730457956 + 324747982 363675164 1904345172 1621255112 1297556496 23505356 + 1355834468 160610595 1617487803 1039549212 1846883319 2010354200 + 476369801 171757954 989848022 1603079100 1314026991 1454207784 + 2078473025 227444708 504950213 1345591439 600653439 449189537 + 324817986 914192558 1239256601 395373279 501565559 256583195 + 715973605 1269777629 314601761 1690741703 155672304 2037828346 + 1973102905 1150196189 546648478 1888933509 521133230 1757563876 + 1339955536 431524952 704934916 1711974879 70559250 1982829667 + 1110020946 1707037755 1973351774 1942247750 1217323071 + 1125713023 1148575093 364025800 2112712187 1191251157 1293701689 + 1325582175 848888591 1835433750 198980106 1331838368 602369642 + 1749496722 1726468786 1537953647 395924263 190370818 949685575 + 1337449940 918067889 2043992072 374792107 865520841 798576892 + 554778780 755314185 616251642 1095303820 1762283268 207097256 + 1698265582 657252139 1166599601 1256604542 1336871508 98506035 + 1267254030 2056734050 1359495350 1088203006 1024528242 ) +] + +{ #category : #tests } +RandomTest >> testPrimitiveRandomGeneration3 [ + self + assert: + (self generateNewSequenceOfSize: 100 withGenerator: (gen seed: 123456)) + equals: + #( 566038743 1171143710 196292920 1669880209 1144641032 1992765256 + 659093549 14218717 2061950801 1678079829 1578110325 1879404830 + 1933870707 434085891 341950436 625432323 1134678563 859685437 + 1236225730 315560151 852048096 1922244605 925650437 504664098 + 1260282598 465193379 245828783 279261993 1902667837 5944093 + 2136271957 1094758864 329209476 105592850 1897929857 1913195112 + 821050896 643503696 776471402 1415619124 700430832 355145952 + 1353942448 1074383166 2140513849 159165778 932788109 673910915 + 1417469399 258047532 939982909 2086845843 1112252639 1003572104 + 1719096493 2029507176 685431625 1070730662 833670055 2041486110 + 968545889 1011430722 1287709220 885237954 1257267839 2003737035 + 935102566 1481687452 1302662897 1254412380 1759700629 1564192767 + 661511340 227189186 302694750 1035479791 876735686 1001817412 + 1647194895 440089346 331642580 1825295173 617063505 10834473 + 863891414 1393364432 1164413488 1487711579 717709665 981313536 + 1334721008 1303947308 627686644 2053289872 878240920 891827854 + 498333902 399158151 832415416 1705443437 ) +] + +{ #category : #tests } +RandomTest >> testWithFloatAsSeed [ + | r | + r := Random seed: 10.5. + self assert: r seed equals: 10.5 asInteger +] diff --git a/modules/Random/Tests/TestsModule.st b/modules/Random/Tests/TestsModule.st new file mode 100644 index 00000000..9dfcab17 --- /dev/null +++ b/modules/Random/Tests/TestsModule.st @@ -0,0 +1,35 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TestsModule, + #superclass : #Module, + #category : #'Random.Tests' +} + +{ #category : #spec } +TestsModule >> imports [ + ^{ + #Kernel -> #(Float). + #Random -> #(Random). + #SUnit -> #(TestCase TestResult) + } +] + +{ #category : #main } +TestsModule >> main: anArray [ + | suite result | + Kernel log: 'Running Random tests... +'. + suite := RandomTest buildTestSuite. + Kernel log: 'suite built with ', suite tests size printString, ' tests +'. + result := suite run. + Kernel log: 'Done. +'. + Kernel log: result printString. + Kernel log: ' +' +] diff --git a/modules/STON/STONModule.st b/modules/STON/STONModule.st index f08d047e..03fd91c9 100644 --- a/modules/STON/STONModule.st +++ b/modules/STON/STONModule.st @@ -18,3 +18,9 @@ STONModule >> imports [ #String. #Symbol. #Time. #Timestamp. #UndefinedObject. #IdentityDictionary. #MessageNotUnderstood}. } ] + +{ #category : #loading } +STONModule >> justLoaded [ + super justLoaded. + STONWriter initialize +] diff --git a/modules/SUnit/TestSuite.st b/modules/SUnit/TestSuite.st index cd1c6928..4efe9854 100644 --- a/modules/SUnit/TestSuite.st +++ b/modules/SUnit/TestSuite.st @@ -22,6 +22,16 @@ TestSuite class >> availableEvents [ ^super availableEvents add: #resultChanged:; yourself ] +{ #category : #'instance creation' } +TestSuite class >> forModule: aModule [ + | suite | + suite := self named: aModule name. + aModule classes do: [:cls | + (cls inheritsFrom: TestCase) ifTrue: [ + suite addTest: cls buildTestSuite]]. + ^suite +] + { #category : #'instance creation' } TestSuite class >> fromString: aString [ | suite cm | @@ -195,6 +205,18 @@ TestSuite >> run [ ^result ] +{ #category : #running } +TestSuite >> runDebug [ + | res | + res := self resources. + res do: [:resource | resource beAvailableFor: self]. + [ + self allCases do: [:test | + Kernel log: 'running ', test printString; log: String cr. + test runCase]] + sunitEnsure: [res do: [:resource | resource reset]] +] + { #category : #running } TestSuite >> run: aTestResult [ | res | diff --git a/modules/TOML/TOMLModule.st b/modules/TOML/TOMLModule.st new file mode 100644 index 00000000..cc793030 --- /dev/null +++ b/modules/TOML/TOMLModule.st @@ -0,0 +1,18 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TOMLModule, + #superclass : #Module, + #category : #TOML +} + +{#category : #spec} +TOMLModule >> imports [ + ^{ + #Kernel -> #(Character Error OrderedDictionary). + } +] + diff --git a/modules/TOML/TOMLParser.st b/modules/TOML/TOMLParser.st new file mode 100644 index 00000000..d093a2ea --- /dev/null +++ b/modules/TOML/TOMLParser.st @@ -0,0 +1,658 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TOMLParser, + #superclass : #Object, + #instVars : [ + #stream, + #root, + #current + ], + #category : #TOML +} + +{#category : #'instance creation'} +TOMLParser class >> on: aReadStream [ + ^self new on: aReadStream +] + +{#category : #'instance creation'} +TOMLParser class >> parse: aString [ + ^(self on: aString readStream) parse +] + +{#category : #testing} +TOMLParser >> atTripleQuote: aCharacter [ + | position | + stream peek = aCharacter ifFalse: [^false]. + position := stream position. + stream next. + (stream peekFor: aCharacter) ifTrue: [ + (stream peekFor: aCharacter) ifTrue: [^true]]. + stream position: position. + ^false +] + +{#category : #private} +TOMLParser >> error: aString [ + ^Error signal: 'TOML parse error at position ', + stream position asString, ': ', aString +] + +{#category : #private} +TOMLParser >> expect: aString [ + aString do: [:each | + (stream peekFor: each) ifFalse: [ + self error: 'Expected "', aString, '"']] +] + +{#category : #private} +TOMLParser >> expectChar: aCharacter [ + | found | + (stream peekFor: aCharacter) ifTrue: [^self]. + found := stream atEnd + ifTrue: ['end of input'] + ifFalse: [stream peek asString]. + self error: 'Expected ', aCharacter asString, ' but got ', found +] + +{#category : #private} +TOMLParser >> hexValue: aCharacter [ + | code | + code := aCharacter asInteger. + (code between: 48 and: 57) ifTrue: [^code - 48]. + (code between: 65 and: 70) ifTrue: [^code - 55]. + (code between: 97 and: 102) ifTrue: [^code - 87]. + self error: 'Invalid hex digit' +] + +{#category : #testing} +TOMLParser >> isBareKeyChar: aCharacter [ + ^aCharacter isAlphanumeric or: [aCharacter = $- or: [aCharacter = $_]] +] + +{#category : #testing} +TOMLParser >> isBinaryChar: aCharacter [ + ^aCharacter = $0 or: [aCharacter = $1 or: [aCharacter = $_]] +] + +{#category : #testing} +TOMLParser >> isHexChar: aCharacter [ + ^aCharacter isDigit or: [ + (aCharacter between: $a and: $f) or: [ + (aCharacter between: $A and: $F) or: [aCharacter = $_]]] +] + +{#category : #testing} +TOMLParser >> isNewline: aCharacter [ + ^aCharacter = Character cr or: [aCharacter = Character lf] +] + +{#category : #testing} +TOMLParser >> isOctalChar: aCharacter [ + ^(aCharacter between: $0 and: $7) or: [aCharacter = $_] +] + +{#category : #private} +TOMLParser >> matchChar: aCharacter [ + ^stream peekFor: aCharacter +] + +{#category : #initialization} +TOMLParser >> on: aReadStream [ + stream := aReadStream +] + +{#category : #parsing} +TOMLParser >> parse [ + root := OrderedDictionary new. + current := root. + [stream atEnd] whileFalse: [self parseLine]. + ^root +] + +{#category : #parsing} +TOMLParser >> parseArray [ + | elements | + self expectChar: $[. + elements := OrderedCollection new. + self skipWhitespaceAndNewlines. + (self matchChar: $]) ifTrue: [^elements asArray]. + [ + self skipWhitespaceAndNewlines. + elements add: self parseValue. + self skipWhitespaceAndNewlines. + self matchChar: $,] whileTrue: [ + self skipWhitespaceAndNewlines. + (stream peek = $]) ifTrue: [ + self expectChar: $]. + ^elements asArray]]. + self skipWhitespaceAndNewlines. + self expectChar: $]. + ^elements asArray +] + +{#category : #parsing} +TOMLParser >> parseArrayTable [ + | key table array | + self skipWhitespace. + key := self parseDottedKey. + self + skipWhitespace; + expectChar: $]; + expectChar: $]; + skipToNewline. + table := self resolveTable: key allButLast create: true. + array := table at: key last ifAbsent: [nil]. + array ifNil: [ + array := OrderedCollection new. + table at: key last put: array]. + current := OrderedDictionary new. + array add: current +] + +{#category : #parsing} +TOMLParser >> parseBareKey [ + | start | + start := stream position. + [stream atEnd not and: [self isBareKeyChar: stream peek]] + whileTrue: [stream next]. + stream position = start ifTrue: [self error: 'Empty bare key']. + ^stream collection copyFrom: start + 1 to: stream position +] + +{#category : #parsing} +TOMLParser >> parseBasicString [ + self expectChar: $". + ^self parseBasicStringBody +] + +{#category : #parsing} +TOMLParser >> parseBasicStringBody [ + | result | + result := String streamContents: [:s | + [stream atEnd or: [stream peek = $"]] + whileFalse: [s nextPut: self parseCharacter]]. + self expectChar: $". + ^result +] + +{#category : #parsing} +TOMLParser >> parseBinaryInteger: aBoolean [ + | value count | + value := 0. + count := 0. + [stream atEnd not and: [self isBinaryChar: stream peek]] + whileTrue: [ + | digit | + digit := stream next. + digit = $_ ifFalse: [ + value := value * 2 + digit digitValue. + count := count + 1]]. + count = 0 ifTrue: [self error: 'Expected binary digit']. + aBoolean ifTrue: [^value negated]. + ^value +] + +{#category : #parsing} +TOMLParser >> parseCharacter [ + | char | + char := stream next. + char = $\ ifTrue: [^self parseEscape]. + ^char +] + +{#category : #parsing} +TOMLParser >> parseDecimalNumber: aBoolean [ + | integer | + integer := self parseDigits. + (stream atEnd not and: [stream peek = $.]) ifTrue: [ + stream next. + ^self parseFloat: integer negated: aBoolean]. + (stream atEnd not and: [stream peek = $e or: [stream peek = $E]]) ifTrue: [ + stream next. + ^self parseExponent: integer asFloat negated: aBoolean]. + aBoolean ifTrue: [^integer negated]. + ^integer +] + +{#category : #parsing} +TOMLParser >> parseDigits [ + | value count | + value := 0. + count := 0. + [stream atEnd not and: [stream peek isDigit or: [stream peek = $_]]] + whileTrue: [ + | digit | + digit := stream next. + digit = $_ ifFalse: [ + value := value * 10 + digit digitValue. + count := count + 1]]. + count = 0 ifTrue: [self error: 'Expected digit']. + ^value +] + +{#category : #parsing} +TOMLParser >> parseDottedKey [ + | parts | + parts := OrderedCollection new. + parts add: self parseKey. + [self skipWhitespace. stream peekFor: $.] whileTrue: [ + self skipWhitespace. + parts add: self parseKey]. + ^parts asArray +] + +{#category : #parsing} +TOMLParser >> parseEscape [ + | char | + char := stream next. + char = $b ifTrue: [^Character backspace]. + char = $t ifTrue: [^Character tab]. + char = $n ifTrue: [^Character lf]. + char = $f ifTrue: [^Character newPage]. + char = $r ifTrue: [^Character cr]. + char = $" ifTrue: [^$"]. + char = $\ ifTrue: [^$\]. + char = $u ifTrue: [^self parseUnicode: 4]. + char = $U ifTrue: [^self parseUnicode: 8]. + self error: 'Invalid escape: \', char asString +] + +{#category : #parsing} +TOMLParser >> parseExponent: aNumber negated: aBoolean [ + | negative exponent result | + negative := stream peekFor: $-. + negative ifFalse: [stream peekFor: $+]. + exponent := self parseDigits. + negative ifTrue: [exponent := exponent negated]. + result := aNumber * (10 raisedTo: exponent). + aBoolean ifTrue: [^result negated]. + ^result +] + +{#category : #parsing} +TOMLParser >> parseFalse [ + self expect: 'false'. + ^false +] + +{#category : #parsing} +TOMLParser >> parseFloat: anInteger negated: aBoolean [ + | fraction power result | + fraction := 0. + power := 1.0. + [stream atEnd not and: [stream peek isDigit or: [stream peek = $_]]] + whileTrue: [ + | digit | + digit := stream next. + digit = $_ ifFalse: [ + fraction := fraction * 10 + digit digitValue. + power := power * 10.0]]. + result := anInteger + (fraction / power). + (stream atEnd not and: [stream peek = $e or: [stream peek = $E]]) ifTrue: [ + stream next. + ^self parseExponent: result negated: aBoolean]. + aBoolean ifTrue: [^result negated]. + ^result +] + +{#category : #parsing} +TOMLParser >> parseHexDigit [ + | char code | + stream atEnd ifTrue: [self error: 'Expected hex digit']. + char := stream next. + code := char asInteger. + (code between: 48 and: 57) ifTrue: [^code - 48]. + (code between: 65 and: 70) ifTrue: [^code - 55]. + (code between: 97 and: 102) ifTrue: [^code - 87]. + self error: 'Invalid hex digit: ', char asString +] + +{#category : #parsing} +TOMLParser >> parseHexInteger: aBoolean [ + | value count | + value := 0. + count := 0. + [stream atEnd not and: [self isHexChar: stream peek]] + whileTrue: [ | digit | + digit := stream next. + digit = $_ ifFalse: [ + value := (value bitShift: 4) + (self hexValue: digit). + count := count + 1]]. + count = 0 ifTrue: [self error: 'Expected hex digit']. + aBoolean ifTrue: [^value negated]. + ^value +] + +{#category : #parsing} +TOMLParser >> parseInlineTable [ + | table | + self expectChar: ${. + table := OrderedDictionary new. + self skipWhitespace. + (self matchChar: $}) ifTrue: [^table]. + [ + self skipWhitespace. + self parseKeyValueInto: table. + self skipWhitespace. + self matchChar: $,] whileTrue. + self skipWhitespace. + self expectChar: $}. + ^table +] + +{#category : #parsing} +TOMLParser >> parseKey [ + | char | + stream atEnd ifTrue: [self error: 'Expected key']. + char := stream peek. + char = $" ifTrue: [^self parseBasicString]. + char = $' ifTrue: [^self parseLiteralString]. + ^self parseBareKey +] + +{#category : #parsing} +TOMLParser >> parseKeyValue [ + | key value target last | + key := self parseDottedKey. + self + skipWhitespace; + expectChar: $=; + skipWhitespace. + value := self parseValue. + self skipToNewline. + target := self resolveIn: current path: key allButLast create: true. + last := key last. + (target includesKey: last) + ifTrue: [self error: 'Duplicate key: ', last]. + target at: last put: value +] + +{#category : #parsing} +TOMLParser >> parseKeyValueInto: aDictionary [ + | key value target | + key := self parseDottedKey. + self + skipWhitespace; + expectChar: $=; + skipWhitespace. + value := self parseValue. + target := self resolveIn: aDictionary path: key allButLast create: true. + target at: key last put: value +] + +{#category : #parsing} +TOMLParser >> parseLine [ + | char | + self skipWhitespace. + stream atEnd ifTrue: [^self]. + char := stream peek. + char = $# ifTrue: [^self skipComment]. + char = $[ ifTrue: [^self parseTableHeader]. + (char = Character cr or: [char = Character lf]) + ifTrue: [^self skipNewline]. + self parseKeyValue +] + +{#category : #parsing} +TOMLParser >> parseLiteralString [ + self expectChar: $'. + ^self parseLiteralStringBody +] + +{#category : #parsing} +TOMLParser >> parseLiteralStringBody [ + | result | + result := String streamContents: [:s | + [stream atEnd or: [stream peek = $']] + whileFalse: [s nextPut: stream next]]. + self expectChar: $'. + ^result +] + +{#category : #parsing} +TOMLParser >> parseLiteralStringValue [ + stream next. + (stream peekFor: $') + ifTrue: [ + (stream peekFor: $') + ifTrue: [^self parseMultilineLiteralString] + ifFalse: [^'']] + ifFalse: [^self parseLiteralStringBody] +] + +{#category : #parsing} +TOMLParser >> parseMultilineBasicChar: aStream [ + | char | + char := stream next. + char = $\ ifFalse: [^aStream nextPut: char]. + (self isNewline: stream peek) + ifTrue: [self skipWhitespaceAndNewlines] + ifFalse: [stream back. aStream nextPut: self parseCharacter] +] + +{#category : #parsing} +TOMLParser >> parseMultilineBasicString [ + self skipNewlineIfPresent. + ^String streamContents: [:s | + [stream atEnd or: [self atTripleQuote: $"]] + whileFalse: [self parseMultilineBasicChar: s]] +] + +{#category : #parsing} +TOMLParser >> parseMultilineLiteralString [ + self skipNewlineIfPresent. + ^String streamContents: [:s | + [stream atEnd or: [self atTripleQuote: $']] + whileFalse: [s nextPut: stream next]] +] + +{#category : #parsing} +TOMLParser >> parseNumberValue [ + | start negated sign | + negated := false. + sign := stream peek. + (sign = $+ or: [sign = $-]) ifTrue: [ + negated := sign = $-. + stream next]. + start := stream position. + stream peek = $0 ifTrue: [ + ^self parsePrefixedNumber: negated from: start]. + ^self parseDecimalNumber: negated +] + +{#category : #parsing} +TOMLParser >> parseOctalInteger: aBoolean [ + | value count | + value := 0. + count := 0. + [stream atEnd not and: [self isOctalChar: stream peek]] + whileTrue: [ + | digit | + digit := stream next. + digit = $_ ifFalse: [ + value := value * 8 + digit digitValue. + count := count + 1]]. + count = 0 ifTrue: [self error: 'Expected octal digit']. + aBoolean ifTrue: [^value negated]. + ^value +] + +{#category : #parsing} +TOMLParser >> parsePrefixedNumber: aBoolean from: anInteger [ + | prefix | + stream next. + stream atEnd ifTrue: [^0]. + prefix := stream peek. + prefix = $x ifTrue: [stream next. ^self parseHexInteger: aBoolean]. + prefix = $o ifTrue: [stream next. ^self parseOctalInteger: aBoolean]. + prefix = $b ifTrue: [stream next. ^self parseBinaryInteger: aBoolean]. + (prefix = $. or: [prefix = $e or: [prefix = $E]]) + ifTrue: [stream position: anInteger. ^self parseDecimalNumber: aBoolean]. + ^0 +] + +{#category : #parsing} +TOMLParser >> parseSpecialFloat [ + | position | + position := stream position. + (self tryMatch: 'inf') ifTrue: [^Float infinity]. + (self tryMatch: 'nan') ifTrue: [^Float nan]. + stream position: position. + self error: 'Unexpected character' +] + +{#category : #parsing} +TOMLParser >> parseStringValue [ + stream next. + (stream peekFor: $") + ifTrue: [ + (stream peekFor: $") + ifTrue: [^self parseMultilineBasicString] + ifFalse: [^'']] + ifFalse: [^self parseBasicStringBody] +] + +{#category : #parsing} +TOMLParser >> parseTable [ + | key | + self skipWhitespace. + key := self parseDottedKey. + self + skipWhitespace; + expectChar: $]; + skipToNewline. + current := self resolveTable: key create: true +] + +{#category : #parsing} +TOMLParser >> parseTableHeader [ + stream next. + (stream peekFor: $[) + ifTrue: [self parseArrayTable] + ifFalse: [self parseTable] +] + +{#category : #parsing} +TOMLParser >> parseTrue [ + self expect: 'true'. + ^true +] + +{#category : #parsing} +TOMLParser >> parseUnicode: anInteger [ + | value | + value := 0. + anInteger timesRepeat: [ + value := (value bitShift: 4) + self parseHexDigit]. + ^Character codePoint: value +] + +{#category : #parsing} +TOMLParser >> parseValue [ + | char | + stream atEnd ifTrue: [self error: 'Expected value']. + char := stream peek. + char = $" ifTrue: [^self parseStringValue]. + char = $' ifTrue: [^self parseLiteralStringValue]. + char = $t ifTrue: [^self parseTrue]. + char = $f ifTrue: [^self parseFalse]. + char = $[ ifTrue: [^self parseArray]. + char = ${ ifTrue: [^self parseInlineTable]. + (char = $- or: [char = $+ or: [char isDigit]]) + ifTrue: [^self parseNumberValue]. + char = $i ifTrue: [^self parseSpecialFloat]. + char = $n ifTrue: [^self parseSpecialFloat]. + self error: 'Unexpected character: ', char asString +] + +{#category : #private} +TOMLParser >> resolveIn: aDictionary path: anArray create: aBoolean [ + | table | + table := aDictionary. + anArray do: [:key | + | next | + next := table at: key ifAbsent: [nil]. + next ifNil: [ + aBoolean ifFalse: [^nil]. + next := OrderedDictionary new. + table at: key put: next]. + (next isKindOf: OrderedCollection) + ifTrue: [table := next last] + ifFalse: [table := next]]. + ^table +] + +{#category : #private} +TOMLParser >> resolveTable: anArray create: aBoolean [ + ^self resolveIn: root path: anArray create: aBoolean +] + +{#category : #private} +TOMLParser >> skipComment [ + [stream atEnd not and: [(self isNewline: stream peek) not]] + whileTrue: [stream next] +] + +{#category : #private} +TOMLParser >> skipNewline [ + (stream peekFor: Character cr) ifTrue: [stream peekFor: Character lf]. + stream peekFor: Character lf +] + +{#category : #private} +TOMLParser >> skipNewlineIfPresent [ + (stream atEnd not and: [self isNewline: stream peek]) + ifTrue: [self skipNewline] +] + +{#category : #private} +TOMLParser >> skipToNewline [ + self skipWhitespace. + stream atEnd ifTrue: [^self]. + stream peek = $# ifTrue: [self skipComment]. + stream atEnd ifTrue: [^self]. + (self isNewline: stream peek) + ifTrue: [self skipNewline] + ifFalse: [self error: 'Expected newline, got: ', stream peek asString] +] + +{#category : #private} +TOMLParser >> skipWhitespace [ + [stream atEnd not and: [ + | char | + char := stream peek. + char = Character space or: [char = Character tab]]] + whileTrue: [stream next] +] + +{#category : #private} +TOMLParser >> skipWhitespaceAndNewlines [ + [stream atEnd not and: [ + | char | + char := stream peek. + char = Character space or: [ + char = Character tab or: [ + char = Character cr or: [ + char = Character lf or: [char = $#]]]]]] + whileTrue: [ + stream peek = $# + ifTrue: [self skipComment] + ifFalse: [stream next]] +] + +{#category : #private} +TOMLParser >> tryMatch: aString [ + | position | + position := stream position. + aString do: [:each | + (stream peekFor: each) ifFalse: [ + stream position: position. + ^false]]. + ^true +] + diff --git a/modules/TOML/TOMLWriter.st b/modules/TOML/TOMLWriter.st new file mode 100644 index 00000000..d43a0cad --- /dev/null +++ b/modules/TOML/TOMLWriter.st @@ -0,0 +1,179 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TOMLWriter, + #superclass : #Object, + #instVars : [ + #stream, + #indent + ], + #category : #TOML +} + +{#category : #'instance creation'} +TOMLWriter class >> on: aWriteStream [ + ^self new on: aWriteStream +] + +{#category : #'instance creation'} +TOMLWriter class >> write: aDictionary [ + ^String streamContents: [:s | + (self on: s) write: aDictionary] +] + +{#category : #testing} +TOMLWriter >> isArrayOfTables: anObject [ + (anObject isKindOf: Array) ifFalse: [ + (anObject isKindOf: OrderedCollection) ifFalse: [^false]]. + anObject isEmpty ifTrue: [^false]. + ^anObject first isKindOf: OrderedDictionary +] + +{#category : #testing} +TOMLWriter >> isBareKey: aString [ + aString isEmpty ifTrue: [^false]. + ^aString allSatisfy: [:ch | + ch isAlphanumeric or: [ch = $- or: [ch = $_]]] +] + +{#category : #testing} +TOMLWriter >> isTable: anObject [ + ^(anObject isKindOf: OrderedDictionary) + or: [self isArrayOfTables: anObject] +] + +{#category : #initialization} +TOMLWriter >> on: aWriteStream [ + stream := aWriteStream. + indent := '' +] + +{#category : #writing} +TOMLWriter >> writeArray: anArray [ + stream nextPut: $[. + anArray withIndexDo: [:elem :i | + i > 1 ifTrue: [stream nextPutAll: ', ']. + self writeValue: elem]. + stream nextPut: $] +] + +{#category : #writing} +TOMLWriter >> writeArrayOfTables: anArray path: aPath [ + anArray do: [:table | + stream nextPutAll: '[['. + self writePath: aPath. + stream nextPutAll: ']]'. + stream nextPut: Character lf. + self writeTopLevel: table path: aPath] +] + +{#category : #writing} +TOMLWriter >> write: aDictionary [ + self writeTopLevel: aDictionary path: #() +] + +{#category : #writing} +TOMLWriter >> writeBasicString: aString [ + stream nextPut: $". + aString do: [:ch | + ch = $" ifTrue: [stream nextPut: $\]. + ch = $\ ifTrue: [stream nextPut: $\]. + ch = Character lf ifTrue: [stream nextPutAll: '\n'. ^self]. + ch = Character cr ifTrue: [stream nextPutAll: '\r'. ^self]. + ch = Character tab ifTrue: [stream nextPutAll: '\t'. ^self]. + stream nextPut: ch]. + stream nextPut: $" +] + +{#category : #writing} +TOMLWriter >> writeFloat: aFloat [ + aFloat isInfinite + ifTrue: [ + aFloat > 0 + ifTrue: [stream nextPutAll: 'inf'] + ifFalse: [stream nextPutAll: '-inf']] + ifFalse: [ + aFloat isNaN + ifTrue: [stream nextPutAll: 'nan'] + ifFalse: [stream nextPutAll: aFloat printString]] +] + +{#category : #writing} +TOMLWriter >> writeInlineTable: aDictionary [ + | first | + stream nextPutAll: '{'. + first := true. + aDictionary keysAndValuesDo: [:k :v | + first ifTrue: [first := false] ifFalse: [stream nextPutAll: ', ']. + self writeKeyName: k. + stream nextPutAll: ' = '. + self writeValue: v]. + stream nextPutAll: '}' +] + +{#category : #writing} +TOMLWriter >> writeKey: aString value: anObject [ + self writeKeyName: aString. + stream nextPutAll: ' = '. + self writeValue: anObject. + stream nextPut: Character lf +] + +{#category : #writing} +TOMLWriter >> writeKeyName: aString [ + (self isBareKey: aString) + ifTrue: [stream nextPutAll: aString] + ifFalse: [self writeBasicString: aString] +] + +{#category : #writing} +TOMLWriter >> writePath: anArray [ + anArray withIndexDo: [:key :i | + i > 1 ifTrue: [stream nextPut: $.]. + self writeKeyName: key] +] + +{#category : #writing} +TOMLWriter >> writeTableHeader: anArray [ + stream nextPut: $[. + self writePath: anArray. + stream nextPut: $]. + stream nextPut: Character lf +] + +{#category : #writing} +TOMLWriter >> writeTopLevel: aDictionary path: anArray [ + | tables | + tables := OrderedCollection new. + aDictionary keysAndValuesDo: [:k :v | + (self isTable: v) + ifTrue: [tables add: k -> v] + ifFalse: [self writeKey: k value: v]]. + tables do: [:assoc | + | key value path | + key := assoc key. + value := assoc value. + path := anArray copyWith: key. + (self isArrayOfTables: value) + ifTrue: [self writeArrayOfTables: value path: path] + ifFalse: [self writeTableHeader: path. + self writeTopLevel: value path: path]] +] + +{#category : #writing} +TOMLWriter >> writeValue: anObject [ + anObject isString ifTrue: [^self writeBasicString: anObject]. + anObject = true ifTrue: [^stream nextPutAll: 'true']. + anObject = false ifTrue: [^stream nextPutAll: 'false']. + anObject isInteger ifTrue: [^stream nextPutAll: anObject printString]. + anObject isFloat ifTrue: [^self writeFloat: anObject]. + (anObject isKindOf: Array) ifTrue: [^self writeArray: anObject]. + (anObject isKindOf: OrderedCollection) + ifTrue: [^self writeArray: anObject asArray]. + (anObject isKindOf: OrderedDictionary) ifTrue: [^self writeInlineTable: anObject]. + stream nextPutAll: anObject printString +] + diff --git a/modules/TOML/Tests/TOMLParserTest.st b/modules/TOML/Tests/TOMLParserTest.st new file mode 100644 index 00000000..27042003 --- /dev/null +++ b/modules/TOML/Tests/TOMLParserTest.st @@ -0,0 +1,349 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TOMLParserTest, + #superclass : #TestCase, + #category : #'TOML.Tests' +} + +{ #category : #'testing - strings' } +TOMLParserTest >> testBasicString [ + | result | + result := TOMLParser parse: 'key = "hello"'. + self assert: (result at: 'key') equals: 'hello' +] + +{ #category : #'testing - strings' } +TOMLParserTest >> testStringEscapes [ + | result | + result := TOMLParser parse: 'key = "hello\nworld"'. + self assert: (result at: 'key') equals: 'hello', Character lf asString, 'world' +] + +{ #category : #'testing - strings' } +TOMLParserTest >> testEmptyString [ + | result | + result := TOMLParser parse: 'key = ""'. + self assert: (result at: 'key') equals: '' +] + +{ #category : #'testing - strings' } +TOMLParserTest >> testLiteralString [ + | result | + result := TOMLParser parse: 'key = ''no \escapes'''. + self assert: (result at: 'key') equals: 'no \escapes' +] + +{ #category : #'testing - strings' } +TOMLParserTest >> testMultilineBasicString [ + | input result | + input := 'key = """ +hello +world"""'. + result := TOMLParser parse: input. + self assert: (result at: 'key') equals: 'hello', Character lf asString, 'world' +] + +{ #category : #'testing - strings' } +TOMLParserTest >> testMultilineLiteralString [ + | input result | + input := 'key = '''''' +first +second'''''''. + result := TOMLParser parse: input. + self assert: (result at: 'key') equals: 'first', Character lf asString, 'second' +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testInteger [ + | result | + result := TOMLParser parse: 'key = 42'. + self assert: (result at: 'key') equals: 42 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testNegativeInteger [ + | result | + result := TOMLParser parse: 'key = -17'. + self assert: (result at: 'key') equals: -17 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testPositiveInteger [ + | result | + result := TOMLParser parse: 'key = +99'. + self assert: (result at: 'key') equals: 99 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testZero [ + | result | + result := TOMLParser parse: 'key = 0'. + self assert: (result at: 'key') equals: 0 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testUnderscoresInInteger [ + | result | + result := TOMLParser parse: 'key = 1_000_000'. + self assert: (result at: 'key') equals: 1000000 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testHexInteger [ + | result | + result := TOMLParser parse: 'key = 0xff'. + self assert: (result at: 'key') equals: 255 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testOctalInteger [ + | result | + result := TOMLParser parse: 'key = 0o77'. + self assert: (result at: 'key') equals: 63 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testBinaryInteger [ + | result | + result := TOMLParser parse: 'key = 0b1010'. + self assert: (result at: 'key') equals: 10 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testFloat [ + | result | + result := TOMLParser parse: 'key = 3.14'. + self assert: ((result at: 'key') - 3.14) abs < 0.001 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testFloatWithExponent [ + | result | + result := TOMLParser parse: 'key = 1e10'. + self assert: (result at: 'key') equals: 10000000000.0 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testNegativeFloat [ + | result | + result := TOMLParser parse: 'key = -0.5'. + self assert: ((result at: 'key') + 0.5) abs < 0.001 +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testInfinity [ + | result | + result := TOMLParser parse: 'key = inf'. + self assert: (result at: 'key') equals: Float infinity +] + +{ #category : #'testing - numbers' } +TOMLParserTest >> testNan [ + | result | + result := TOMLParser parse: 'key = nan'. + self assert: (result at: 'key') isNaN +] + +{ #category : #'testing - booleans' } +TOMLParserTest >> testTrue [ + | result | + result := TOMLParser parse: 'key = true'. + self assert: (result at: 'key') equals: true +] + +{ #category : #'testing - booleans' } +TOMLParserTest >> testFalse [ + | result | + result := TOMLParser parse: 'key = false'. + self assert: (result at: 'key') equals: false +] + +{ #category : #'testing - arrays' } +TOMLParserTest >> testEmptyArray [ + | result | + result := TOMLParser parse: 'key = []'. + self assert: (result at: 'key') equals: #() +] + +{ #category : #'testing - arrays' } +TOMLParserTest >> testIntegerArray [ + | result | + result := TOMLParser parse: 'key = [1, 2, 3]'. + self assert: (result at: 'key') equals: #(1 2 3) +] + +{ #category : #'testing - arrays' } +TOMLParserTest >> testMixedArray [ + | result array | + result := TOMLParser parse: 'key = [1, "two", true]'. + array := result at: 'key'. + self + assert: array size equals: 3; + assert: (array at: 1) equals: 1; + assert: (array at: 2) equals: 'two'; + assert: (array at: 3) equals: true +] + +{ #category : #'testing - arrays' } +TOMLParserTest >> testMultilineArray [ + | input result | + input := 'key = [ + 1, + 2, + 3, +]'. + result := TOMLParser parse: input. + self assert: (result at: 'key') equals: #(1 2 3) +] + +{ #category : #'testing - tables' } +TOMLParserTest >> testSimpleTable [ + | input result | + input := '[server] +host = "localhost" +port = 8080'. + result := TOMLParser parse: input. + self + assert: ((result at: 'server') at: 'host') equals: 'localhost'; + assert: ((result at: 'server') at: 'port') equals: 8080 +] + +{ #category : #'testing - tables' } +TOMLParserTest >> testNestedTable [ + | input result | + input := '[a.b] +key = "value"'. + result := TOMLParser parse: input. + self assert: (((result at: 'a') at: 'b') at: 'key') equals: 'value' +] + +{ #category : #'testing - tables' } +TOMLParserTest >> testInlineTable [ + | result table | + result := TOMLParser parse: 'point = {x = 1, y = 2}'. + table := result at: 'point'. + self + assert: (table at: 'x') equals: 1; + assert: (table at: 'y') equals: 2 +] + +{ #category : #'testing - tables' } +TOMLParserTest >> testArrayOfTables [ + | input result products | + input := '[[products]] +name = "Hammer" + +[[products]] +name = "Nail"'. + result := TOMLParser parse: input. + products := result at: 'products'. + self + assert: products size equals: 2; + assert: (products first at: 'name') equals: 'Hammer'; + assert: (products last at: 'name') equals: 'Nail' +] + +{ #category : #'testing - keys' } +TOMLParserTest >> testBareKey [ + | result | + result := TOMLParser parse: 'bare-key_123 = "value"'. + self assert: (result at: 'bare-key_123') equals: 'value' +] + +{ #category : #'testing - keys' } +TOMLParserTest >> testQuotedKey [ + | result | + result := TOMLParser parse: '"quoted key" = "value"'. + self assert: (result at: 'quoted key') equals: 'value' +] + +{ #category : #'testing - keys' } +TOMLParserTest >> testDottedKey [ + | result | + result := TOMLParser parse: 'a.b.c = "deep"'. + self assert: (((result at: 'a') at: 'b') at: 'c') equals: 'deep' +] + +{ #category : #'testing - comments' } +TOMLParserTest >> testComment [ + | input result | + input := '# this is a comment +key = "value" # inline comment'. + result := TOMLParser parse: input. + self assert: (result at: 'key') equals: 'value' +] + +{ #category : #'testing - keys' } +TOMLParserTest >> testMultipleKeyValues [ + | input result | + input := 'name = "test" +version = "1.0" +count = 5'. + result := TOMLParser parse: input. + self + assert: (result at: 'name') equals: 'test'; + assert: (result at: 'version') equals: '1.0'; + assert: (result at: 'count') equals: 5 +] + +{ #category : #'testing - errors' } +TOMLParserTest >> testDuplicateKeyError [ + self should: [ + TOMLParser parse: 'key = 1 +key = 2'] raise: Error +] + +{ #category : #'testing - integration' } +TOMLParserTest >> testEpmToml [ + | input result project dependencies | + input := '[project] +name = "myapp" +version = "0.1.0" +description = "A test app" + +[dependencies] +JSON = ">=1.0" +STON = "~>2.0"'. + result := TOMLParser parse: input. + project := result at: 'project'. + dependencies := result at: 'dependencies'. + self + assert: (project at: 'name') equals: 'myapp'; + assert: (project at: 'version') equals: '0.1.0'; + assert: (dependencies at: 'JSON') equals: '>=1.0'; + assert: (dependencies at: 'STON') equals: '~>2.0' +] + +{ #category : #'testing - integration' } +TOMLParserTest >> testComplexDocument [ + | input result owner database servers | + input := 'title = "TOML Example" + +[owner] +name = "John" + +[database] +ports = [8001, 8001, 8002] +enabled = true + +[servers.alpha] +ip = "10.0.0.1" + +[servers.beta] +ip = "10.0.0.2"'. + result := TOMLParser parse: input. + owner := result at: 'owner'. + database := result at: 'database'. + servers := result at: 'servers'. + self + assert: (result at: 'title') equals: 'TOML Example'; + assert: (owner at: 'name') equals: 'John'; + assert: (database at: 'ports') equals: #(8001 8001 8002); + assert: (database at: 'enabled') equals: true; + assert: ((servers at: 'alpha') at: 'ip') equals: '10.0.0.1'; + assert: ((servers at: 'beta') at: 'ip') equals: '10.0.0.2' +] diff --git a/modules/TOML/Tests/TestsModule.st b/modules/TOML/Tests/TestsModule.st new file mode 100644 index 00000000..8a9ab2d5 --- /dev/null +++ b/modules/TOML/Tests/TestsModule.st @@ -0,0 +1,35 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TestsModule, + #superclass : #Module, + #category : #'TOML.Tests' +} + +{ #category : #spec } +TestsModule >> imports [ + ^{ + #Kernel -> #(Error OrderedDictionary). + #TOML -> #(TOMLParser). + #SUnit -> #(TestCase TestResult) + } +] + +{ #category : #main } +TestsModule >> main: anArray [ + | suite result | + Kernel log: 'Running TOML tests... +'. + suite := TOMLParserTest buildTestSuite. + Kernel log: 'suite built with ', suite tests size printString, ' tests +'. + result := suite run. + Kernel log: 'Done. +'. + Kernel log: result printString. + Kernel log: ' +' +] diff --git a/modules/Tonel/Module.st b/modules/Tonel/Module.st new file mode 100644 index 00000000..237d7f50 --- /dev/null +++ b/modules/Tonel/Module.st @@ -0,0 +1,36 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Extension { #name : #Module } + +{#category : #'*Tonel'} +Module >> writeSourceTo: aDirectoryPath [ + | writer | + self classes do: [:cls | + | filename contents | + contents := TonelWriter writeClass: cls category: self name. + filename := aDirectoryPath, '/', cls name, '.st'. + Kernel writeFile: filename contents: contents]. + self writeExtensionsTo: aDirectoryPath. + self writeModuleClassTo: aDirectoryPath +] + +{#category : #'*Tonel'} +Module >> writeExtensionsTo: aDirectoryPath [ + extensions keysAndValuesDo: [:key :methods | + | filename contents | + contents := TonelWriter writeMethods: methods. + filename := aDirectoryPath, '/', (key copyReplacing: ' ' with: '.'), '.st'. + Kernel writeFile: filename contents: contents] +] + +{#category : #'*Tonel'} +Module >> writeModuleClassTo: aDirectoryPath [ + | cls filename contents | + cls := self class. + contents := TonelWriter writeClass: cls category: self name. + filename := aDirectoryPath, '/', cls name, '.st'. + Kernel writeFile: filename contents: contents +] diff --git a/modules/Tonel/Tests/TestsModule.st b/modules/Tonel/Tests/TestsModule.st new file mode 100644 index 00000000..5d76cf91 --- /dev/null +++ b/modules/Tonel/Tests/TestsModule.st @@ -0,0 +1,30 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TestsModule, + #superclass : #Module, + #category : #'Tonel.Tests' +} + +{ #category : #spec } +TestsModule >> imports [ + ^{ + #Kernel -> #(OrderedDictionary Error). + #Tonel -> #(TonelWriter TonelReader). + #SUnit -> #(TestCase TestSuite TestResult TestFailure) + } +] + +{ #category : #main } +TestsModule >> main: anArray [ + | result | + Kernel log: 'Running ', self name, ' tests...'; log: String cr. + result := (TestSuite forModule: self) run. + Kernel + log: 'Done: ', result passedCount printString, ' passed, ', + (result failureCount + result errorCount) printString, ' failed.'; + log: String cr +] diff --git a/modules/Tonel/Tests/TonelWriterTest.st b/modules/Tonel/Tests/TonelWriterTest.st new file mode 100644 index 00000000..b3b7bdf2 --- /dev/null +++ b/modules/Tonel/Tests/TonelWriterTest.st @@ -0,0 +1,192 @@ +" + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. +" + +Class { + #name : #TonelWriterTest, + #superclass : #TestCase, + #category : #'Tonel.Tests' +} + +{ #category : #testing } +TonelWriterTest >> testWriteClassProducesOutput [ + | output | + output := TonelWriter writeClass: TonelWriter. + self assert: output notEmpty +] + +{ #category : #testing } +TonelWriterTest >> testWriteClassStartsWithClassType [ + | output | + output := TonelWriter writeClass: TonelWriter. + self assert: (output includesString: 'Class {') +] + +{ #category : #testing } +TonelWriterTest >> testWriteClassContainsClassName [ + | output | + output := TonelWriter writeClass: TonelWriter. + self assert: (output includesString: '#TonelWriter') +] + +{ #category : #testing } +TonelWriterTest >> testWriteClassContainsCategory [ + | output | + output := TonelWriter writeClass: TonelWriter category: 'Tonel'. + self assert: (output includesString: '#Tonel') +] + +{ #category : #testing } +TonelWriterTest >> testWriteClassContainsMethods [ + | output | + output := TonelWriter writeClass: TonelWriter. + self assert: (output includesString: 'TonelWriter >> ') +] + +{ #category : #testing } +TonelWriterTest >> testClassMethodsAppearBeforeInstanceMethods [ + | output classMethodPos instanceMethodPos | + output := TonelWriter writeClass: TonelWriter. + classMethodPos := output indexOfString: 'TonelWriter class >> '. + instanceMethodPos := output indexOfString: 'TonelWriter >> write ['. + self assert: classMethodPos > 0. + self assert: instanceMethodPos > 0. + self assert: classMethodPos < instanceMethodPos +] + +{ #category : #testing } +TonelWriterTest >> testMethodsAreSortedBySelector [ + | output lines methodLines selectors | + output := TonelWriter writeClass: TonelWriter. + lines := output lines. + methodLines := lines select: [:line | + (line includesString: 'TonelWriter >> ') and: [ + (line includesString: 'TonelWriter class >> ') not]]. + selectors := methodLines collect: [:line | + | start rest space | + start := line indexOfString: '>> '. + rest := line copyFrom: start + 3. + space := rest indexOf: $ . + space = 0 + ifTrue: [rest trimBlanks] + ifFalse: [(rest copyFrom: 1 to: space - 1) trimBlanks]]. + selectors size > 1 ifTrue: [ + 1 to: selectors size - 1 do: [:i | + self assert: (selectors at: i) <= (selectors at: i + 1)]] +] + +{ #category : #testing } +TonelWriterTest >> testRoundTrip [ + | output reader | + output := TonelWriter writeClass: TonelWriter category: 'Tonel'. + reader := TonelReader on: output readStream. + reader read. + self assert: true +] + +{ #category : #testing } +TonelWriterTest >> testReadMethodWithDollarDollarLiteral [ + "A method body containing $$ followed by a special character + (like [ or ]) must not confuse the block terminator scanner." + | input reader | + input := 'Class { #name : #Foo, #superclass : #Object, #category : #X } + +{ #category : #m } +Foo >> m [ + | c | c := $$. ^c == $[ +] +'. + reader := TonelReader on: input readStream. + reader read. + self assert: reader methods size equals: 1 +] + +{ #category : #testing } +TonelWriterTest >> testReadMethodWithDollarDollarFollowedByBracket [ + "After $$ the next character is a real source character that must be + honored for nesting. E.g. $$[:x|x] contains a real opening [ that the + scanner must count." + | input reader method | + input := 'Class { #name : #Foo, #superclass : #Object, #category : #X } + +{ #category : #m } +Foo >> m [ + ^$$ , [:x | x] value: 1 +] +'. + reader := TonelReader on: input readStream. + reader read. + self assert: reader methods size equals: 1. + method := reader methods first. + self assert: ((method at: #body) includesString: 'value: 1') +] + +{ #category : #testing } +TonelWriterTest >> testReadMethodBodyEndingWithDollarDollar [ + "$$ is a complete character literal (the $ char). The next character + (in this case the closing ]) is a real source character, not escaped. + Scanners that blindly treat 'prev = $' as escaping the next position + will fail to terminate here." + | input reader method | + input := 'Class { #name : #Foo, #superclass : #Object, #category : #X } + +{ #category : #m } +Foo >> m [ + ^$$] +'. + reader := TonelReader on: input readStream. + reader read. + self assert: reader methods size equals: 1. + method := reader methods first. + self assert: ((method at: #body) includesString: '$$') +] + +{ #category : #testing } +TonelWriterTest >> testReadUnterminatedBlockRaises [ + "An unterminated method body must raise an error, not hang nor silently truncate." + | input reader | + input := 'Class { #name : #Foo, #superclass : #Object, #category : #X } + +{ #category : #m } +Foo >> m [ + self foo. +'. + reader := TonelReader on: input readStream. + self should: [reader read] raise: Error +] + +{ #category : #testing } +TonelWriterTest >> testRoundTripWithUnicodeBody [ + "Non-ASCII characters in a method body must round-trip through the + reader without being lost or mangled." + | stream input reader method body nch | + nch := Character value: 241. + stream := String new writeStream. + stream nextPutAll: 'Class { #name : #Foo, #superclass : #Object, #category : #X }'; cr; cr. + stream nextPutAll: '{ #category : #m }'; cr. + stream nextPutAll: 'Foo >> m ['; cr. + stream tab; nextPutAll: '^'''; nextPut: nch; nextPutAll: ''''; cr. + stream nextPutAll: ']'; cr. + input := stream contents. + reader := TonelReader on: input readStream. + reader read. + self assert: reader methods size equals: 1. + method := reader methods first. + body := method at: #body. + self assert: (body includes: nch) +] + +{ #category : #testing } +TonelWriterTest >> testWriteMethodFirstLineHasNoExtraTabs [ + "Writer output for each method must start with `Class >> selector` at + column 0 (no leading tabs)." + | output lines headerLines tab | + output := TonelWriter writeClass: TonelWriter. + lines := output lines. + headerLines := lines select: [:line | line includesString: 'TonelWriter ']. + self assert: headerLines notEmpty. + tab := Character tab asString. + headerLines do: [:line | + self deny: (line beginsWith: tab)] +] diff --git a/modules/Tonel/TonelReader.st b/modules/Tonel/TonelReader.st index d494e9e2..3e793066 100644 --- a/modules/Tonel/TonelReader.st +++ b/modules/Tonel/TonelReader.st @@ -37,19 +37,23 @@ TonelReader >> methods [ {#category : #reading} TonelReader >> nextBlock [ - | nested char start eol prev | - nested := 0. - prev := nil. - char := $[. + | nested char start eol skipNext | + nested := 1. + skipNext := false. start := stream skipLine; position. - [ - (char == $[ andNot: [prev == $$]) ifTrue: [nested := nested + 1]. - (char = $] andNot: [prev == $$]) ifTrue: [nested := nested - 1]. - (char = $' andNot: [prev == $$]) ifTrue: [self skipString]. - (char = $" andNot: [prev == $$]) ifTrue: [self skipComment]. - prev := char. - nested = 0] - whileFalse: [char := stream next]. + [nested > 0] whileTrue: [ + stream atEnd ifTrue: [^self error: 'unterminated method body']. + char := stream next. + skipNext + ifTrue: [skipNext := false] + ifFalse: [ + char = $$ + ifTrue: [skipNext := true] + ifFalse: [ + char = $[ ifTrue: [nested := nested + 1]. + char = $] ifTrue: [nested := nested - 1]. + char = $' ifTrue: [self skipString]. + char = $" ifTrue: [self skipComment]]]]. eol := stream eol size. ^stream copyFrom: start + eol - 1 to: stream position - 1 ] @@ -65,10 +69,13 @@ TonelReader >> read [ {#category : #reading} TonelReader >> readComments [ + | start | stream skipSeparators. stream peek = $" ifTrue: [ stream next. - self skipComment] + start := stream position. + self skipComment. + class at: #comment put: (stream contents copyFrom: start + 1 to: stream position - 1)] ] {#category : #reading} diff --git a/modules/Tonel/TonelWriter.st b/modules/Tonel/TonelWriter.st index 440d981c..8194c74a 100644 --- a/modules/Tonel/TonelWriter.st +++ b/modules/Tonel/TonelWriter.st @@ -113,6 +113,20 @@ TonelWriter >> methods: aCollection [ meta notNil and: [classes anySatisfy: [:cls | cls = meta instanceClass]]]]]) ] +{#category : #accessing} +TonelWriter >> sortedMethods [ + | classMethods instanceMethods | + classMethods := OrderedCollection new. + instanceMethods := OrderedCollection new. + self methods do: [:m | + m classBinding isMetaclass + ifTrue: [classMethods add: m] + ifFalse: [instanceMethods add: m]]. + classMethods sortBy: [:a :b | a selector <= b selector]. + instanceMethods sortBy: [:a :b | a selector <= b selector]. + ^classMethods asArray & instanceMethods asArray +] + {#category : #private} TonelWriter >> position: aStream afterSelector: aSymbol [ | c | @@ -120,11 +134,8 @@ TonelWriter >> position: aStream afterSelector: aSymbol [ ifTrue: [aStream skipSeparators; skip: aSymbol size] ifFalse: [ aSymbol keywords do: [:part | - aStream - skipSeparators; - skip: part size; - skipSeparators; - nextKeyword]]. + aStream skipSeparators; skip: part size; skipSeparators. + self skipIdentifierIn: aStream]]. [ aStream atEnd ifTrue: [^self]. c := aStream peek. @@ -133,6 +144,12 @@ TonelWriter >> position: aStream afterSelector: aSymbol [ c isEndOfLine ifTrue: [aStream next] ] +{#category : #private} +TonelWriter >> skipIdentifierIn: aStream [ + [aStream atEnd not and: [aStream peek isAlphaNumericOrUnderscore]] + whileTrue: [aStream next] +] + {#category : #accessing} TonelWriter >> stream: aWriteStream [ stream := aWriteStream @@ -168,7 +185,10 @@ TonelWriter >> writeClass: aClass category: aString [ {#category : #writing} TonelWriter >> writeComments [ - + | comment | + comment := class comment. + (comment isNil or: [comment isEmpty]) ifTrue: [^self]. + stream nextPut: $"; nextPutAll: comment; nextPut: $"; cr; cr ] {#category : #writing} @@ -180,7 +200,9 @@ TonelWriter >> writeDefinition [ {#category : #writing} TonelWriter >> writeMethod: method [ | source head body metadata | - source := method sourceCode readStream. + source := method sourceObject. + source isNil ifTrue: [^self]. + source := source readStream. self position: source afterSelector: method selector. head := source copyFrom: 1 to: source position. body := source upToEnd. @@ -189,12 +211,12 @@ TonelWriter >> writeMethod: method [ (STONWriter on: stream) writeMap: metadata. stream cr; print: method classBinding name; nextPutAll: ' >> '. head lines - do: [:line | stream nextPutAll: line utf8] + do: [:line | stream nextPutAll: line] separatedBy: [stream cr]. stream nextPutAll: ' ['. body := body readStream. [ - stream cr; nextPutAll: body nextLine utf8. + stream cr; nextPutAll: body nextLine. body atEnd] whileFalse. stream cr; @@ -205,7 +227,7 @@ TonelWriter >> writeMethod: method [ {#category : #writing} TonelWriter >> writeMethods [ - self methods do: [:m | self writeMethod: m] + self sortedMethods do: [:m | self writeMethod: m] ] {#category : #writing} diff --git a/runtime/cpp/Allocator/GCSpace.cpp b/runtime/cpp/Allocator/GCSpace.cpp index 4171ccca..20d70e4a 100644 --- a/runtime/cpp/Allocator/GCSpace.cpp +++ b/runtime/cpp/Allocator/GCSpace.cpp @@ -67,7 +67,7 @@ bool GCSpace::commitMemoryUpTo_(uintptr_t address) if (newLimit < address) return false; - CommitMemory(_committedLimit, newLimit - _base); + CommitMemory(_committedLimit, newLimit - _committedLimit); _committedLimit = newLimit; return true; } diff --git a/runtime/cpp/Bootstrap/Bootstrapper.cpp b/runtime/cpp/Bootstrap/Bootstrapper.cpp index a993f820..b73ea3f7 100644 --- a/runtime/cpp/Bootstrap/Bootstrapper.cpp +++ b/runtime/cpp/Bootstrap/Bootstrapper.cpp @@ -37,6 +37,7 @@ Bootstrapper::Bootstrapper(const std::string& kernelPath, Loader* loader) _stringClass(nullptr), _wideStringClass(nullptr), _arrayClass(nullptr), _methodDictionaryClass(nullptr) { _compiler = std::make_unique(); + _methodDictBuilder = std::make_unique(this); } Bootstrapper::~Bootstrapper() { @@ -460,6 +461,7 @@ void Bootstrapper::createRuntimeWithBootstrappedKernel() { kernel->addExport("Ephemeron", _classes.at("Ephemeron")); kernel->addExport("ProcessVMStack", _classes.at("ProcessVMStack")); kernel->addExport("OpenHashTable", _classes.at("OpenHashTable")); + kernel->addExport("Character", _classes.at("Character")); _loader->_runtime = new Runtime(_loader, kernel, _symbolProvider); _loader->_runtime->addSegmentSpace_(kernel); @@ -472,6 +474,11 @@ void Bootstrapper::createRuntimeWithBootstrappedKernel() { // Fill symbol table fillSymbolTable(); + + // Convert Array method dicts to proper MethodDictionary objects, then + // switch from array-based to Smalltalk message-based method installation + convertMethodDictionaries(); + _methodDictBuilder = std::make_unique(_loader->_runtime); } // ========================================================================= @@ -491,6 +498,50 @@ void Bootstrapper::fillSymbolTable() { _loader->_runtime->switchToDynamicSymbolProvider_(tableRef.get()->asHeapObject()); } +// ========================================================================= +// Phase 9: Convert Array method dicts to proper MethodDictionary objects +// ========================================================================= + +void Bootstrapper::convertMethodDictionaries() { + for (const auto& [className, cls] : _classes) { + convertBehaviorMethodDict_(cls); + convertBehaviorMethodDict_(_metaclasses.at(className)); + } +} + +void Bootstrapper::convertBehaviorMethodDict_(HeapObject* species) { + auto runtime = _loader->_runtime; + HeapObject* behavior = species->slot(Offsets::SpeciesInstanceBehavior)->asHeapObject(); + Object* mdObj = behavior->slot(Offsets::BehaviorMethodDictionary); + + HeapObject* md = mdObj->asHeapObject(); + ASSERT (runtime->speciesOf_((Object*)md) == _arrayClass); + + // Count actual methods in the array (stops at first nil selector) + uint32_t count = 0; + uint32_t size = md->size(); + for (uint32_t i = 0; i + 1 < size; i += 2) { + if (md->slot(i) == (Object*)_nilObj) break; + count++; + } + + // Create MethodDictionary new: count and populate via public API + auto mdClassObj = (Object*)_classes.at("MethodDictionary"); + auto sizeArg = (Object*)runtime->newInteger_(count); + auto newMd = runtime->sendLocal_to_with_("new:", mdClassObj, sizeArg); + GCedRef newMdRef(newMd); + + for (uint32_t i = 0; i + 1 < size; i += 2) { + Object* selector = md->slot(i); + if (selector == (Object*)_nilObj) break; + Object* method = md->slot(i + 1); + runtime->sendLocal_to_with_with_("at:put:", newMdRef.get(), selector, method); + } + + // Replace the behavior's method dict + behavior->slot(Offsets::BehaviorMethodDictionary) = newMdRef.get(); +} + // ========================================================================= // Kernel-specific hash table helpers // ========================================================================= @@ -583,16 +634,7 @@ void Bootstrapper::compileAndInstallMethod_(const Egg::string& source, HeapObjec } // Install in behavior's method dictionary - HeapObject* behavior = cls->slot(Offsets::SpeciesInstanceBehavior)->asHeapObject(); - Object* methodDictObj = behavior->slot(Offsets::BehaviorMethodDictionary); - HeapObject* methodArray; - if (methodDictObj == nullptr || methodDictObj == (Object*)_nilObj) { - methodArray = newMethodArray(); - behavior->slot(Offsets::BehaviorMethodDictionary) = (Object*)methodArray; - } else { - methodArray = methodDictObj->asHeapObject(); - } - addMethodToArray_(methodArray, internSymbol_(selector), (Object*)method); + _methodDictBuilder->installMethod((Object*)cls, internSymbol_(selector), (Object*)method); } Object* Bootstrapper::transferLiteral_(const LiteralValue& lit, HeapObject* method) { @@ -602,21 +644,23 @@ Object* Bootstrapper::transferLiteral_(const LiteralValue& lit, HeapObject* meth case LiteralValue::String: return (Object*)newString_(lit.asString()); case LiteralValue::Integer: - return (Object*)newSmallInteger_(lit.asInteger()); - case LiteralValue::Float: - return (Object*)_nilObj; + return (Object*)newSmallInteger_((intptr_t)lit.asInteger()); + case LiteralValue::LargeInteger: + return (Object*)newLargeInteger_(lit.asLargeIntegerBytes(), lit.isLargeIntegerNegative()); + case LiteralValue::Float: { + double value = lit.asFloat(); + return (Object*)newBytes_("Float", &value, sizeof(double)); + } case LiteralValue::Character: - return (Object*)newSmallInteger_((intptr_t)lit.asCharacter()); + return transferCharacter_(lit.asCharacter()); case LiteralValue::Boolean: return (Object*)(lit.asBoolean() ? _trueObj : _falseObj); case LiteralValue::Nil: return (Object*)_nilObj; case LiteralValue::Array: return (Object*)transferArray_(lit.asArray()); - case LiteralValue::ByteArray: { - HeapObject* ba = newByteArray_(lit.asByteArray()); - return (Object*)ba; - } + case LiteralValue::ByteArray: + return (Object*)newByteArray_(lit.asByteArray()); case LiteralValue::Block: return (Object*)transferBlock_(lit.asBlock(), method); default: @@ -625,6 +669,20 @@ Object* Bootstrapper::transferLiteral_(const LiteralValue& lit, HeapObject* meth } } +Object* Bootstrapper::transferCharacter_(uint32_t codePoint) { + auto it = _characterMap.find(codePoint); + if (it != _characterMap.end()) return (Object*)it->second; + HeapObject* character = newSlots_("Character"); + character->slot(0) = (Object*)newSmallInteger_((intptr_t)codePoint); + _characterMap[codePoint] = character; + return (Object*)character; +} + +HeapObject* Bootstrapper::newLargeInteger_(const std::vector& leBytes, bool negative) { + const char* className = negative ? "LargeNegativeInteger" : "LargePositiveInteger"; + return newBytes_(className, leBytes.data(), (uint32_t)leBytes.size()); +} + HeapObject* Bootstrapper::transferArray_(const std::vector& elements) { HeapObject* arr = newArray_(elements.size()); arr->behavior(_behaviors["Array"]); diff --git a/runtime/cpp/Bootstrap/Bootstrapper.h b/runtime/cpp/Bootstrap/Bootstrapper.h index 35c788c8..61395ef9 100644 --- a/runtime/cpp/Bootstrap/Bootstrapper.h +++ b/runtime/cpp/Bootstrap/Bootstrapper.h @@ -17,6 +17,7 @@ #include "../Allocator/GCSpace.h" #include "../SymbolProvider.h" #include "CodeSpecs.h" +#include "MethodDictBuilder.h" namespace Egg { @@ -47,6 +48,8 @@ namespace Egg std::map _metaclasses; std::map _behaviors; std::map _metaBehaviors; + // map to keep Character identity during bootstrap + std::map _characterMap; // Cached class pointers (kernel-specific) HeapObject *_undefinedObjectClass; @@ -60,6 +63,7 @@ namespace Egg HeapObject *_wideStringClass; HeapObject *_arrayClass; HeapObject *_methodDictionaryClass; + std::unique_ptr _methodDictBuilder; public: Bootstrapper(const std::string &kernelPath, Loader *loader); @@ -93,6 +97,10 @@ namespace Egg // Phase 8: Fill Smalltalk symbol table with bootstrap symbols void fillSymbolTable(); + // Phase 9: Convert Array method dicts to proper MethodDictionary objects + void convertMethodDictionaries(); + void convertBehaviorMethodDict_(HeapObject *species); + // Kernel-specific hash table helpers HeapObject *newOpenHashTable_(uint32_t indexedSize, HeapObject *owner); void insertInOpenHashTable_(HeapObject *table, uint32_t indexedSize, Object *key, HeapObject *assoc); @@ -108,6 +116,8 @@ namespace Egg Object *transferLiteral_(const LiteralValue &lit, HeapObject *method); HeapObject *transferBlock_(const LiteralValue::BlockInfo &blockInfo, HeapObject *method); HeapObject *transferArray_(const std::vector &elements); + Object *transferCharacter_(uint32_t codePoint); + HeapObject *newLargeInteger_(const std::vector &leBytes, bool negative); // Object creation helpers HeapObject *newBytes_(const Egg::string &className, const void *data, uint32_t byteCount); diff --git a/runtime/cpp/Bootstrap/CMakeLists.txt b/runtime/cpp/Bootstrap/CMakeLists.txt index a5896f39..18de2540 100644 --- a/runtime/cpp/Bootstrap/CMakeLists.txt +++ b/runtime/cpp/Bootstrap/CMakeLists.txt @@ -1,10 +1,12 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.13) +cmake_policy(SET CMP0079 NEW) project(KernelBootstrapper) # Create bootstrapper library add_library(bootstrapper_lib CodeSpecs.cpp Bootstrapper.cpp + MethodDictBuilder.cpp SourceModuleLoader.cpp BootstrappedKernel.cpp TonelReader.cpp @@ -15,6 +17,14 @@ target_include_directories(bootstrapper_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -# Link bootstrapper with runtime +# Link bootstrapper with runtime. egg_runtime's Loader.cpp / +# SymbolProvider.cpp call back into bootstrapper_lib, so the two are +# mutually dependent. Declare both edges so CMake emits +# --start-group/--end-group on GNU ld. target_link_libraries(bootstrapper_lib PUBLIC egg_runtime) +target_link_libraries(egg_runtime PUBLIC bootstrapper_lib) + +if(BUILD_TESTING) + add_subdirectory(tests) +endif() diff --git a/runtime/cpp/Bootstrap/CodeSpecs.h b/runtime/cpp/Bootstrap/CodeSpecs.h index bb77daa3..045cba19 100644 --- a/runtime/cpp/Bootstrap/CodeSpecs.h +++ b/runtime/cpp/Bootstrap/CodeSpecs.h @@ -31,8 +31,12 @@ class MethodSpec { const Egg::string& source() const { return _source; } void source(const Egg::string& s) { _source = s; } + const Egg::string& category() const { return _category; } + void category(const Egg::string& c) { _category = c; } + private: Egg::string _source; + Egg::string _category; }; // ---- SpeciesSpec (base for ClassSpec and MetaclassSpec) ---- @@ -97,7 +101,7 @@ class MetaclassSpec : public SpeciesSpec { class ClassSpec : public SpeciesSpec { public: - ClassSpec() : _metaclass(nullptr), _variable(false), _pointers(true) {} + ClassSpec() : _metaclass(nullptr), _variable(false), _pointers(true), _isExtension(false) {} const Egg::string& name() const override { return _name; } void name(const Egg::string& n) { _name = n; } @@ -115,16 +119,24 @@ class ClassSpec : public SpeciesSpec { bool isPointers() const { return _pointers; } void isPointers(bool p) { _pointers = p; } + bool isExtension() const { return _isExtension; } + void isExtension(bool e) { _isExtension = e; } + const std::vector& classVarNames() const { return _classVarNames; } void classVarNames(const std::vector& cvars) { _classVarNames = cvars; } + const Egg::string& comment() const { return _comment; } + void comment(const Egg::string& c) { _comment = c; } + private: Egg::string _name; Egg::string _supername; MetaclassSpec* _metaclass; bool _variable; bool _pointers; + bool _isExtension; std::vector _classVarNames; + Egg::string _comment; }; // ---- ModuleSpec (minimal) ---- diff --git a/runtime/cpp/Bootstrap/MethodDictBuilder.cpp b/runtime/cpp/Bootstrap/MethodDictBuilder.cpp new file mode 100644 index 00000000..d7fe4e4d --- /dev/null +++ b/runtime/cpp/Bootstrap/MethodDictBuilder.cpp @@ -0,0 +1,44 @@ +/* + Copyright (c) 2024-2026, Javier Pimás. + See (MIT) license in root directory. + */ + +#include "MethodDictBuilder.h" +#include "Bootstrapper.h" +#include "../Evaluator/Runtime.h" +#include "../KnownConstants.h" + +namespace Egg { + +// ========================================================================= +// ArrayMethodDictBuilder +// ========================================================================= + +ArrayMethodDictBuilder::ArrayMethodDictBuilder(Bootstrapper* bootstrapper) + : _bootstrapper(bootstrapper) {} + +void ArrayMethodDictBuilder::installMethod(Object* species, Object* selector, Object* method) { + HeapObject* behavior = species->asHeapObject()->slot(Offsets::SpeciesInstanceBehavior)->asHeapObject(); + Object* mdObj = behavior->slot(Offsets::BehaviorMethodDictionary); + HeapObject* array; + if (mdObj == nullptr || mdObj == (Object*)_bootstrapper->_nilObj) { + array = _bootstrapper->newMethodArray(); + behavior->slot(Offsets::BehaviorMethodDictionary) = (Object*)array; + } else { + array = mdObj->asHeapObject(); + } + _bootstrapper->addMethodToArray_(array, selector, method); +} + +// ========================================================================= +// SmalltalkMethodDictBuilder +// ========================================================================= + +SmalltalkMethodDictBuilder::SmalltalkMethodDictBuilder(Runtime* runtime) + : _runtime(runtime) {} + +void SmalltalkMethodDictBuilder::installMethod(Object* species, Object* selector, Object* method) { + _runtime->sendLocal_to_with_with_("addSelector:withMethod:", species, selector, method); +} + +} // namespace Egg diff --git a/runtime/cpp/Bootstrap/MethodDictBuilder.h b/runtime/cpp/Bootstrap/MethodDictBuilder.h new file mode 100644 index 00000000..90120a35 --- /dev/null +++ b/runtime/cpp/Bootstrap/MethodDictBuilder.h @@ -0,0 +1,60 @@ +/* + Copyright (c) 2024-2026, Javier Pimás. + See (MIT) license in root directory. + */ + +#ifndef _METHOD_DICT_BUILDER_H_ +#define _METHOD_DICT_BUILDER_H_ + +#include "../HeapObject.h" + +namespace Egg { + +class Bootstrapper; +class Runtime; + +/** + * Abstract interface for method dictionary management. + * + * During early bootstrap, method dictionaries are plain Arrays (linear scan). + * After the Smalltalk-side bootstrap completes, they are converted to proper + * MethodDictionary objects and all subsequent method installation goes through + * Smalltalk messages. + * + * The builder is parameterized: SmalltalkMethodDictBuilder can create + * different kinds of dictionaries (MethodDictionary, Namespace, etc.) + * by specifying the target class name. + */ +class MethodDictBuilder { +public: + virtual ~MethodDictBuilder() = default; + virtual void installMethod(Object* species, Object* selector, Object* method) = 0; +}; + +/** + * Array-based method dictionary builder, used during C++ bootstrap phases. + * Method dictionaries are plain Arrays with [selector, method, selector, method, ...] layout. + * The C++ Runtime::methodFor_in_() handles this format via linear scan. + */ +class ArrayMethodDictBuilder : public MethodDictBuilder { + Bootstrapper* _bootstrapper; +public: + explicit ArrayMethodDictBuilder(Bootstrapper* bootstrapper); + void installMethod(Object* species, Object* selector, Object* method) override; +}; + +/** + * Smalltalk message-based method dictionary builder, used after bootstrap conversion. + * Installs methods by sending addSelector:withMethod: to the species, which operates + * on proper MethodDictionary objects via the standard Smalltalk protocol. + */ +class SmalltalkMethodDictBuilder : public MethodDictBuilder { + Runtime* _runtime; +public: + explicit SmalltalkMethodDictBuilder(Runtime* runtime); + void installMethod(Object* species, Object* selector, Object* method) override; +}; + +} // namespace Egg + +#endif // _METHOD_DICT_BUILDER_H_ diff --git a/runtime/cpp/Bootstrap/README.md b/runtime/cpp/Bootstrap/README.md new file mode 100644 index 00000000..1d4de23b --- /dev/null +++ b/runtime/cpp/Bootstrap/README.md @@ -0,0 +1,124 @@ +# Kernel Bootstrap + +## Overview + +The Kernel Bootstrap enables bootstrapping the Egg Smalltalk kernel from `.st` source files +**without a prebuilt image file**. Previously we used the `runtime/pharo` platform to bootstrap an +egg image and save it to disk, but that was cumbersome. The C++ bootstrapper allows faster feedback +when working on bootstrap and core code from files. + +## Design + +### Two-Phase Bootstrap + +**Phase 1: C++ creates minimal kernel** +- Core objects: `nil`, `true`, `false` +- Core classes with simplified method dictionaries +- Symbol table with essential symbols +- Enough infrastructure to send a message + +**Phase 2: Smalltalk completes initialization** +- C++ sends `#bootstrap` to the system +- Smalltalk rehashes dictionaries, initializes globals +- System is fully operational after this + +### Simplified Method Dictionaries + +Method dictionaries during bootstrap are **plain arrays** (behavior is `Array`): + +``` +Method array (during bootstrap): + ┌───────────────────────────────────────────────────┐ + │ selector1 │ method1 │ selector2 │ method2 │ ... │ + └───────────────────────────────────────────────────┘ +``` + +The VM lookup code checks the method dictionary type: +- If array (behavior is `Array`) → linear scan through selector/method pairs +- If proper `MethodDictionary` → hashed lookup + +```cpp +Object* Runtime::methodFor_in_(Object* symbol, HeapObject* behavior) { + auto md = behaviorMethodDictionary_(behavior); + if (isArrayMethodDict_(md)) { + return linearMethodLookup_(symbol, md); + } + return hashedMethodLookup_(symbol, md); +} +``` + +During `#bootstrap`, method dictionaries are converted from arrays to proper hashed +`MethodDictionary` format by the Smalltalk side. + +### Bootstrap Sequence + +``` +1. Bootstrapper::bootstrap() + │ + ├── Phase 1: loadKernelSpecs() + │ └── Parse all .st files into ClassSpecs with MethodSpecs + │ + ├── Phase 2: createInitialObjects() + │ └── Create nil, true, false, kernel module, symbol table + │ + ├── Phase 3: instantiateMetaobjects() + │ └── Allocate all classes, metaclasses and behaviors + │ + ├── Phase 4: initializeMetaobjects() + │ └── Link superclass chains, set formats, bind behaviors + │ + ├── Phase 5: createKernelNamespace() + │ └── Create namespace so class names resolve at runtime + │ + ├── Phase 6: loadKernelMethods() + │ └── Compile methods, store in array-based method dicts + │ + └── Phase 7: createRuntimeWithBootstrappedKernel() + ├── Create BootstrappedKernel and Runtime + ├── Initialize evaluator + └── Send #bootstrap to KernelModule (Smalltalk initializes constants + and converts array method dicts to hashed MethodDictionary) +``` + +## What It Does + +The `Bootstrapper` performs a complete bootstrap of the Egg kernel: + +1. **Creates Initial Objects**: nil, true, false, and the kernel module +2. **Creates Core Classes**: Class, Metaclass, UndefinedObject, True, False, Symbol, String, Array, MethodDictionary, CompiledMethod +3. **Loads Class Definitions**: Reads all `.st` files from the Kernel module directory +4. **Parses Class Definitions**: Extracts class names, superclasses, and instance variables from Tonel format +5. **Compiles Methods**: Parses method source and generates Egg treecode bytecode +6. **Installs Methods**: Stores compiled methods in plain array method dictionaries + +## Usage + +```cpp +#include "Bootstrap/Bootstrapper.h" + +Bootstrapper bootstrapper("/path/to/modules/Kernel"); +Runtime* runtime = bootstrapper.bootstrap(); +``` + +## Testing + +See [`runtime/cpp/README.md`](../README.md) for build and test instructions. +The bootstrap-side tests live in [`tests/`](tests/) and run as the +`bootstrapper_parser_tests` CTest target. + +## Implementation Overview + +`Bootstrapper` drives the seven phases above. It owns a `BootstrappedKernel` +(an `ImageSegment` adapter that hands out memory for the metaobjects it +allocates) and uses a `TonelReader` to turn each `.st` file under the kernel +directory into `CodeSpecs` (`ClassSpec` + `MethodSpec` trees). + +For every class spec it instantiates the metaobjects, then asks the compiler +to produce a `CompiledMethod` per method spec. Those methods are inserted into +plain-array dictionaries through `MethodDictBuilder`, which later (after +`#bootstrap` runs on the Smalltalk side) is also used to rebuild them as +hashed `MethodDictionary` instances. + +Once the kernel is alive, `SourceModuleLoader` reuses the same `TonelReader` +and `MethodDictBuilder` to load additional modules on demand from Tonel +source. diff --git a/runtime/cpp/Bootstrap/SourceModuleLoader.cpp b/runtime/cpp/Bootstrap/SourceModuleLoader.cpp index 9cd0f3a0..0557da69 100644 --- a/runtime/cpp/Bootstrap/SourceModuleLoader.cpp +++ b/runtime/cpp/Bootstrap/SourceModuleLoader.cpp @@ -12,6 +12,7 @@ #include "../Evaluator/Runtime.h" #include "../KnownConstants.h" #include "../GCedRef.h" +#include #include #include #include @@ -46,6 +47,7 @@ HeapObject* SourceModuleLoader::loadModuleFromSource(const std::string& modulePa // Phase 1: Parse all .st files in the module directory std::vector newClassNames; + std::vector extensionNames; for (const auto& entry : fs::directory_iterator(modulePath)) { if (entry.path().extension() == ".st") { std::string filename = entry.path().filename().string(); @@ -59,7 +61,11 @@ HeapObject* SourceModuleLoader::loadModuleFromSource(const std::string& modulePa ClassSpec* spec = reader.parseFile(source); _moduleSpec.addClass(spec); - newClassNames.push_back(spec->name()); + if (spec->isExtension()) { + extensionNames.push_back(spec->name()); + } else { + newClassNames.push_back(spec->name()); + } } } @@ -91,6 +97,13 @@ HeapObject* SourceModuleLoader::loadModuleFromSource(const std::string& modulePa GCedRef moduleClassRef(moduleClass); createMethodsOf_(moduleClassRef.get(), moduleSpec); + + // Set module class comment if the .st file had a header comment + if (!moduleSpec->comment().empty()) { + auto commentObj = (Object*)_runtime->newString_(moduleSpec->comment().toUtf8()); + _runtime->sendLocal_to_with_("comment:", moduleClassRef.get(), commentObj); + } + auto moduleInstance = _runtime->sendLocal_to_("new", moduleClassRef.get()); GCedRef moduleRef(moduleInstance); std::string modName = fs::path(modulePath).filename().string(); @@ -101,7 +114,13 @@ HeapObject* SourceModuleLoader::loadModuleFromSource(const std::string& modulePa _runtime->sendLocal_to_with_("module:", moduleClassRef.get(), moduleRef.get()); _runtime->sendLocal_to_with_("addClass:", moduleRef.get(), moduleClassRef.get()); - // Phase 4: Create remaining classes (sorted so superclasses come first) + // Populate namespace with kernel classes so superclass lookups work in Phase 4 + _runtime->sendLocal_to_("bindKernelExports", moduleRef.get()); + + // Phase 4: Import required modules so their classes are available as superclasses + _runtime->sendLocal_to_("importRequiredModules", moduleRef.get()); + + // Phase 5: Create remaining classes (sorted so superclasses come first) // Simple topological sort: process classes whose superclass is already available std::vector remaining; // Collect module-defined class names for dependency checking @@ -146,13 +165,68 @@ HeapObject* SourceModuleLoader::loadModuleFromSource(const std::string& modulePa auto cls = createNewClassFrom_(spec, moduleRef.get()); GCedRef clsRef(cls); createMethodsOf_(clsRef.get(), spec); + + // Set class comment if the .st file had a header comment + if (!spec->comment().empty()) { + auto commentObj = (Object*)_runtime->newString_(spec->comment().toUtf8()); + _runtime->sendLocal_to_with_("comment:", clsRef.get(), commentObj); + } } - // Phase 5: Set up module namespace and finalize - _runtime->sendLocal_to_("bindKernelExports", moduleRef.get()); - _runtime->sendLocal_to_("importRequiredModules", moduleRef.get()); - _runtime->sendLocal_to_("justLoaded", moduleRef.get()); + // Phase 6: Finalize module + // (importRequiredModules was already called in Phase 4) + + // Phase 7: Compile extension methods and register them via addExtension: + // so that justLoaded -> bind properly creates extension method copies + // with the MethodIsExtension flag and module association literal. + for (const auto& extName : extensionNames) { + ClassSpec* spec = _moduleSpec.resolveClass(extName); + auto namespace_ = _runtime->sendLocal_to_("namespace", moduleRef.get()); + + // Strip " class" suffix from extension name to get base class name. + // The TonelReader already classifies methods as instance/class side + // based on the method header (ClassName class >> vs ClassName >>), + // so we only need the base name for namespace lookup. + bool hasClassSuffix = extName.length() > 6 && + extName.substr(extName.length() - 6) == " class"; + Egg::string baseName = hasClassSuffix + ? extName.substr(0, extName.length() - 6) + : extName; + + auto symbol = (Object*)_runtime->addSymbol_(baseName.toUtf8()); + auto hasKey = _runtime->sendLocal_to_with_("includesKey:", namespace_, symbol); + if (hasKey != (Object*)_runtime->_trueObj) + continue; + + auto baseClass = _runtime->sendLocal_to_with_("at:", namespace_, symbol); + GCedRef baseRef(baseClass); + + // Compile instance-side methods (spec->methods()) and register as extensions. + // These always target the base class itself. + for (const auto& method : spec->methods()) { + auto cm = createExtensionMethod_(method.source(), baseRef.get()); + if (cm) { + GCedRef cmRef(cm); + _runtime->sendLocal_to_with_("addExtension:", moduleRef.get(), cmRef.get()); + } + } + // Compile class-side methods (spec->metaclass()->methods()) and register. + // These always target the metaclass of the base class. + if (spec->metaclass()) { + auto metaclass = _runtime->sendLocal_to_("class", baseRef.get()); + GCedRef metaRef(metaclass); + for (const auto& method : spec->metaclass()->methods()) { + auto cm = createExtensionMethod_(method.source(), metaRef.get()); + if (cm) { + GCedRef cmRef(cm); + _runtime->sendLocal_to_with_("addExtension:", moduleRef.get(), cmRef.get()); + } + } + } + } + + // Don't call justLoaded here — HostSystem>>load: calls it after the primitive returns return moduleRef.get()->asHeapObject(); } @@ -177,15 +251,25 @@ Object* SourceModuleLoader::createNewClassFrom_(ClassSpec* spec, Object* module) auto nameStr = (Object*)_runtime->newString_(spec->name().toUtf8()); _runtime->sendLocal_to_with_("name:", classRef.get(), nameStr); - // Set instance variables + // Set instance variables directly at the heap level (bypasses Smalltalk instVarNames: + // which causes stack overflow when called from within SourceModuleLoader) const auto& ivarNames = spec->instVarNames(); if (!ivarNames.empty()) { std::vector ivars; for (const auto& ivar : ivarNames) { ivars.push_back((Object*)_runtime->addSymbol_(ivar.toUtf8())); } - auto ivarArray = (Object*)_runtime->newArray_(ivars); - _runtime->sendLocal_to_with_("instVarNames:", classRef.get(), ivarArray); + auto ivarArray = _runtime->newArray_(ivars); + classRef.get()->asHeapObject()->slot(Offsets::SpeciesInstanceVariables) = (Object*)ivarArray; + + // Update format to include inst var count (same as updateInstSize) + auto formatObj = classRef.get()->asHeapObject()->slot(Offsets::SpeciesFormat); + int32_t format = formatObj->asSmallInteger()->asNative(); + int32_t oldInstSize = format & 0x7F; + auto superFormatObj = superclass->asHeapObject()->slot(Offsets::SpeciesFormat); + int32_t superInstSize = superFormatObj->asSmallInteger()->asNative() & 0x7F; + int32_t newFormat = format - oldInstSize + superInstSize + (int32_t)ivarNames.size(); + classRef.get()->asHeapObject()->slot(Offsets::SpeciesFormat) = (Object*)SmallInteger::from(newFormat); } // Set bytes flag if needed @@ -220,18 +304,18 @@ void SourceModuleLoader::createMethodsOf_(Object* cls, ClassSpec* spec) { // Instance methods for (const auto& method : spec->methods()) { - createNewMethod_(method.source(), clsRef.get()); + createNewMethod_(method.source(), clsRef.get(), method.category()); } // Class methods auto metaclass = _runtime->sendLocal_to_("class", clsRef.get()); GCedRef metaRef(metaclass); for (const auto& method : spec->metaclass()->methods()) { - createNewMethod_(method.source(), metaRef.get()); + createNewMethod_(method.source(), metaRef.get(), method.category()); } } -void SourceModuleLoader::createNewMethod_(const Egg::string& source, Object* species) { +void SourceModuleLoader::createNewMethod_(const Egg::string& source, Object* species, const Egg::string& category) { CompilationResult* result = _compiler->compileMethod_(source); SCompiledMethod* smethod = static_cast(result->method()); if (!smethod) { @@ -276,8 +360,59 @@ void SourceModuleLoader::createNewMethod_(const Egg::string& source, Object* spe methodRef.get()->asHeapObject()->slot(Offsets::MethodInstSize + i) = literal; } - // Install method: species addSelector: selector withMethod: method + // Install method via Smalltalk protocol (all method dicts are proper + // MethodDictionary objects after bootstrap conversion) _runtime->sendLocal_to_with_with_("addSelector:withMethod:", speciesRef.get(), selector, methodRef.get()); + + // Set method category if provided + if (!category.empty()) { + auto catObj = (Object*)_runtime->addSymbol_(category.toUtf8()); + _runtime->sendLocal_to_with_("category:", methodRef.get(), catObj); + } +} + +Object* SourceModuleLoader::createExtensionMethod_(const Egg::string& source, Object* species) { + CompilationResult* result = _compiler->compileMethod_(source); + SCompiledMethod* smethod = static_cast(result->method()); + if (!smethod) { + std::cerr << "ERROR: Failed to compile extension method from source: '" + << source.substr(0, std::min(source.length(), size_t(60))) << "...'" << std::endl; + return nullptr; + } + + GCedRef speciesRef(species); + + const auto& treecodes = smethod->treecodes(); + const auto& literals = smethod->literals(); + uint32_t literalCount = literals.size(); + + auto cmClass = lookupClass_("CompiledMethod"); + auto size = (Object*)_runtime->newInteger_(literalCount); + auto method = _runtime->sendLocal_to_with_("new:", cmClass, size); + GCedRef methodRef(method); + + auto format = (Object*)_runtime->newInteger_(smethod->format()); + _runtime->sendLocal_to_with_("format:", methodRef.get(), format); + + auto selector = (Object*)_runtime->addSymbol_(smethod->selector().toUtf8()); + _runtime->sendLocal_to_with_("selector:", methodRef.get(), selector); + + auto baClass = _runtime->_kernel->_exports["ByteArray"]; + auto ba = _runtime->newBytes_size_(baClass, treecodes.size()); + std::memcpy((void*)ba, treecodes.data(), treecodes.size()); + _runtime->sendLocal_to_with_("treecodes:", methodRef.get(), (Object*)ba); + + _runtime->sendLocal_to_with_("classBinding:", methodRef.get(), speciesRef.get()); + + auto sourceObj = (Object*)_runtime->newString_(source.toUtf8()); + _runtime->sendLocal_to_with_("sourceObject:", methodRef.get(), sourceObj); + + for (uint32_t i = 0; i < literalCount; i++) { + auto literal = transferLiteral_(literals[i], methodRef.get()); + methodRef.get()->asHeapObject()->slot(Offsets::MethodInstSize + i) = literal; + } + + return methodRef.get(); } Object* SourceModuleLoader::transferLiteral_(const LiteralValue& lit, Object* method) { @@ -287,11 +422,13 @@ Object* SourceModuleLoader::transferLiteral_(const LiteralValue& lit, Object* me case LiteralValue::String: return (Object*)_runtime->newString_(lit.asString().toUtf8()); case LiteralValue::Integer: - return (Object*)_runtime->newInteger_(lit.asInteger()); + return (Object*)_runtime->newInteger_((intptr_t)lit.asInteger()); + case LiteralValue::LargeInteger: + return newLargeInteger_(lit.asLargeIntegerBytes(), lit.isLargeIntegerNegative()); case LiteralValue::Float: return (Object*)_runtime->newDouble_(lit.asFloat()); case LiteralValue::Character: - return (Object*)_runtime->newInteger_((intptr_t)lit.asCharacter()); + return transferCharacter_(lit.asCharacter()); case LiteralValue::Boolean: return (Object*)(lit.asBoolean() ? _runtime->_trueObj : _runtime->_falseObj); case LiteralValue::Nil: @@ -324,6 +461,24 @@ Object* SourceModuleLoader::transferArray_(const std::vector& elem return arrRef.get(); } +// Canonicalize the Character via Character class>>value:, which consults +// ByteCharacters for code points 0..255. The kernel is fully alive at +// runtime when SourceModuleLoader is used, so this is always safe. +Object* SourceModuleLoader::transferCharacter_(uint32_t codePoint) { + auto cp = (Object*)_runtime->newInteger_((intptr_t)codePoint); + return _runtime->sendLocal_to_with_("value:", (Object*)_runtime->_characterClass, cp); +} + +Object* SourceModuleLoader::newLargeInteger_(const std::vector& leBytes, bool negative) { + const char* className = negative ? "LargeNegativeInteger" : "LargePositiveInteger"; + auto largeClass = _runtime->_kernel->_exports[className]; + auto obj = _runtime->newBytes_size_(largeClass, (uint32_t)leBytes.size()); + auto behavior = _runtime->speciesInstanceBehavior_(largeClass); + obj->behavior(behavior); + std::memcpy((void*)obj, leBytes.data(), leBytes.size()); + return (Object*)obj; +} + Object* SourceModuleLoader::transferBlock_(const LiteralValue::BlockInfo& info, Object* method) { auto blockClass = _runtime->_kernel->_exports["CompiledBlock"]; auto block = _runtime->newSlotsOf_(blockClass); diff --git a/runtime/cpp/Bootstrap/SourceModuleLoader.h b/runtime/cpp/Bootstrap/SourceModuleLoader.h index 3d1c1b18..496410e9 100644 --- a/runtime/cpp/Bootstrap/SourceModuleLoader.h +++ b/runtime/cpp/Bootstrap/SourceModuleLoader.h @@ -34,12 +34,15 @@ class SourceModuleLoader { // Class creation via runtime messages Object* createNewClassFrom_(ClassSpec* spec, Object* module); void createMethodsOf_(Object* cls, ClassSpec* spec); - void createNewMethod_(const Egg::string& source, Object* species); + void createNewMethod_(const Egg::string& source, Object* species, const Egg::string& category = Egg::string("")); + Object* createExtensionMethod_(const Egg::string& source, Object* species); // Literal transfer helpers Object* transferLiteral_(const LiteralValue& lit, Object* method); Object* transferBlock_(const LiteralValue::BlockInfo& blockInfo, Object* method); Object* transferArray_(const std::vector& elements); + Object* transferCharacter_(uint32_t codePoint); + Object* newLargeInteger_(const std::vector& leBytes, bool negative); // Lookup helpers Object* lookupClass_(const Egg::string& name); diff --git a/runtime/cpp/Bootstrap/TonelReader.cpp b/runtime/cpp/Bootstrap/TonelReader.cpp index bd3d287f..0ed103e8 100644 --- a/runtime/cpp/Bootstrap/TonelReader.cpp +++ b/runtime/cpp/Bootstrap/TonelReader.cpp @@ -7,6 +7,7 @@ #include "TonelReader.h" #include "CodeSpecs.h" +#include "Egg.h" namespace Egg { @@ -41,7 +42,7 @@ ClassSpec* TonelReader::parseFile(const std::string& utf8Source) { _source = Egg::string(utf8Source); _pos = 0; - readComments(); + Egg::string comment = readComments(); Egg::string type = readType(); auto fields = readDefinition(); @@ -50,9 +51,16 @@ ClassSpec* TonelReader::parseFile(const std::string& utf8Source) { meta->instanceClass(spec); spec->metaclass(meta); + if (!comment.empty()) + spec->comment(comment); + spec->name(fields.count("name") ? fields["name"] : Egg::string("")); - if (type == "Class") { + if (type == "Extension") { + spec->isExtension(true); + } else if (type == "Class") { spec->supername(fields.count("superclass") ? fields["superclass"] : Egg::string("")); + } else { + error_("Unknown Tonel type: " + type.toUtf8()); } // #type field: #variable (pointer-indexed), #bytes (byte-indexed) @@ -95,12 +103,16 @@ ClassSpec* TonelReader::parseFile(const std::string& utf8Source) { } // Mirrors TonelReader >> readComments -void TonelReader::readComments() { +Egg::string TonelReader::readComments() { skipSeparators(); if (!atEnd() && peek() == U'"') { next(); // skip opening " + size_t start = _pos; skipComment(); + // _pos is now past the closing " + return _source.substr(start, _pos - 1 - start); } + return Egg::string(""); } // Mirrors TonelReader >> readType (via ReadStream >> nextWordOrNumber) @@ -133,7 +145,7 @@ void TonelReader::readMethods(ClassSpec* spec, MetaclassSpec* meta) { void TonelReader::readMethod(ClassSpec* spec, MetaclassSpec* meta) { // 1. STON metadata { #category : #accessing } skipSeparators(); - parseSTONMap(); // metadata — we don't need category during bootstrap + auto metadata = parseSTONMap(); // 2. ClassName [class] >> selector...signature [ skipSeparators(); @@ -180,10 +192,14 @@ void TonelReader::readMethod(ClassSpec* spec, MetaclassSpec* meta) { // 5. Build method source = signature \n\t body Egg::string methodSource = signature + "\n\t" + body; + MethodSpec ms(methodSource); + if (metadata.count("category")) + ms.category(metadata["category"]); + if (isClassSide) - meta->addMethod(MethodSpec(methodSource)); + meta->addMethod(ms); else - spec->addMethod(MethodSpec(methodSource)); + spec->addMethod(ms); } // ── nextBlock ──────────────────────────────────────────────────────── @@ -201,16 +217,24 @@ Egg::string TonelReader::nextBlock() { int nested = 1; char32_t prev = 0; - while (!atEnd() && nested > 0) { + while (nested > 0) { + if (atEnd()) + error_("unterminated method body"); char32_t ch = next(); - if (ch == U'[' && prev != U'$') { + if (prev == U'$') { + // This character is a character literal value (e.g. $[ $] $' $" $$) + // Skip it entirely — don't count brackets or enter strings/comments + prev = 0; + continue; + } + if (ch == U'[') { nested++; - } else if (ch == U']' && prev != U'$') { + } else if (ch == U']') { nested--; if (nested == 0) break; - } else if (ch == U'\'' && prev != U'$') { + } else if (ch == U'\'') { skipString(); - } else if (ch == U'"' && prev != U'$') { + } else if (ch == U'"') { skipComment(); } prev = ch; diff --git a/runtime/cpp/Bootstrap/TonelReader.h b/runtime/cpp/Bootstrap/TonelReader.h index ddee7b7a..2eb948df 100644 --- a/runtime/cpp/Bootstrap/TonelReader.h +++ b/runtime/cpp/Bootstrap/TonelReader.h @@ -32,7 +32,7 @@ class TonelReader { void skipLine(); // Tonel structure (mirrors TonelReader.st >> read) - void readComments(); + Egg::string readComments(); Egg::string readType(); std::map readDefinition(); void readMethods(ClassSpec* spec, MetaclassSpec* meta); diff --git a/runtime/cpp/Bootstrap/tests/CMakeLists.txt b/runtime/cpp/Bootstrap/tests/CMakeLists.txt new file mode 100644 index 00000000..ff0ff533 --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.10) + +find_package(Catch2 REQUIRED) + +# Create test executable (parser tests only - no runtime dependencies) +add_executable(bootstrapper_parser_tests + test_main.cpp + SimpleParserTest.cpp + SourceLoadingTest.cpp + MethodParserTest.cpp + IntegrationTest.cpp + CompilationTest.cpp +) + +# Set C++ standard +set_property(TARGET bootstrapper_parser_tests PROPERTY CXX_STANDARD 17) + +# Include directories +target_include_directories(bootstrapper_parser_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../.. + ${CMAKE_CURRENT_SOURCE_DIR}/../../Compiler +) + +# Link libraries +target_link_libraries(bootstrapper_parser_tests + Catch2::Catch2 + egg_compiler + bootstrapper_lib +) + +# Add test to CTest +add_test(NAME BootstrapperParserTests COMMAND bootstrapper_parser_tests) diff --git a/runtime/cpp/Bootstrap/tests/CompilationTest.cpp b/runtime/cpp/Bootstrap/tests/CompilationTest.cpp new file mode 100644 index 00000000..d9227832 --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/CompilationTest.cpp @@ -0,0 +1,45 @@ +/* + Tests for treecode bytecode compilation. + */ + +#include +#include "../../Compiler/SSmalltalkCompiler.h" +#include "../../Compiler/CompilationResult.h" +#include "../../Compiler/Backend/SCompiledMethod.h" + +using namespace Egg; + +static const std::vector& compile(SSmalltalkCompiler& compiler, + const std::string& source) { + auto* result = compiler.compileMethod_(source); + REQUIRE(result != nullptr); + auto* method = static_cast(result->method()); + REQUIRE(method != nullptr); + return method->treecodes(); +} + +TEST_CASE("Compilation: yourself method produces valid treecode", "[compilation]") { + SSmalltalkCompiler compiler; + const auto& treecode = compile(compiler, "yourself\n\t^self"); + REQUIRE(!treecode.empty()); + + // First byte should be MethodId (101) + REQUIRE(treecode[0] == 101); + + // Should contain ReturnId (109) and IdentifierId (103) + bool hasReturn = false; + bool hasIdentifier = false; + for (auto byte : treecode) { + if (byte == 109) hasReturn = true; + if (byte == 103) hasIdentifier = true; + } + REQUIRE(hasReturn); + REQUIRE(hasIdentifier); +} + +TEST_CASE("Compilation: accessor method produces valid treecode", "[compilation]") { + SSmalltalkCompiler compiler; + const auto& treecode = compile(compiler, "x\n\t^x"); + REQUIRE(!treecode.empty()); + REQUIRE(treecode[0] == 101); // MethodId +} diff --git a/runtime/cpp/Bootstrap/tests/IntegrationTest.cpp b/runtime/cpp/Bootstrap/tests/IntegrationTest.cpp new file mode 100644 index 00000000..18079182 --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/IntegrationTest.cpp @@ -0,0 +1,117 @@ +/* + Integration tests for the bootstrapper. + Tests class creation and method compilation without full heap. + */ + +#include +#include "../TonelReader.h" +#include "../../Compiler/SSmalltalkCompiler.h" +#include "../../Compiler/Parser/SSmalltalkScanner.h" +#include "../../Compiler/Parser/SSmalltalkParser.h" +#include "../../Compiler/AST/SMethodNode.h" +#include "../../Compiler/AST/SSelectorNode.h" +#include "../../Compiler/TreecodeEncoder.h" + +using namespace Egg; + +TEST_CASE("Integration: Parse class and extract methods", "[integration]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Point', + #superclass : 'Object', + #instVars : [ 'x', 'y' ] +} + +{ #category : 'accessing' } +Point >> x [ + ^x +] + +{ #category : 'accessing' } +Point >> y [ + ^y +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "Point"); + REQUIRE(spec->instVarNames().size() == 2); + + REQUIRE(spec->methods().size() == 2); +} + +TEST_CASE("Integration: Compile method to treecode", "[integration]") { + SSmalltalkCompiler compiler; + + std::string methodSource = "yourself\n\t^self"; + compiler.scanner()->on_(methodSource); + SMethodNode* node = compiler.parser()->parseMethod(); + REQUIRE(node != nullptr); + REQUIRE(node->selector() != nullptr); + + SSelectorNode* sel = dynamic_cast(node->selector()); + REQUIRE(sel != nullptr); + REQUIRE(sel->symbol() == "yourself"); + + TreecodeEncoder encoder; + auto treecode = encoder.encodeMethod(node); + REQUIRE(!treecode.empty()); +} + +TEST_CASE("Integration: Compile accessor method", "[integration]") { + SSmalltalkCompiler compiler; + + std::string methodSource = "x\n\t^x"; + compiler.scanner()->on_(methodSource); + SMethodNode* node = compiler.parser()->parseMethod(); + REQUIRE(node != nullptr); + + SSelectorNode* sel = dynamic_cast(node->selector()); + REQUIRE(sel != nullptr); + REQUIRE(sel->symbol() == "x"); +} + +TEST_CASE("Integration: Compile keyword method", "[integration]") { + SSmalltalkCompiler compiler; + + std::string methodSource = "x: aNumber\n\tx := aNumber"; + compiler.scanner()->on_(methodSource); + SMethodNode* node = compiler.parser()->parseMethod(); + REQUIRE(node != nullptr); + + SSelectorNode* sel = dynamic_cast(node->selector()); + REQUIRE(sel != nullptr); + REQUIRE(sel->symbol() == "x:"); +} + +TEST_CASE("Integration: Full pipeline - parse source then compile", "[integration]") { + TonelReader reader; + SSmalltalkCompiler compiler; + + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'testing' } +Test >> yourself [ + ^self +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "Test"); + + REQUIRE(spec->methods().size() == 1); + + // Compile the method + compiler.scanner()->on_(spec->methods()[0].source()); + SMethodNode* node = compiler.parser()->parseMethod(); + REQUIRE(node != nullptr); + + TreecodeEncoder encoder; + auto treecode = encoder.encodeMethod(node); + REQUIRE(!treecode.empty()); +} diff --git a/runtime/cpp/Bootstrap/tests/MethodParserTest.cpp b/runtime/cpp/Bootstrap/tests/MethodParserTest.cpp new file mode 100644 index 00000000..7019b88a --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/MethodParserTest.cpp @@ -0,0 +1,216 @@ +/* + Tests for method parsing from Tonel format. + */ + +#include +#include "../TonelReader.h" + +using namespace Egg; + +TEST_CASE("MethodParser: Parse simple instance method", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Point', + #superclass : 'Object', + #instVars : [ 'x', 'y' ] +} + +{ #category : 'accessing' } +Point >> x [ + ^x +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); + REQUIRE(spec->methods()[0].source().find(Egg::string("x")) != Egg::string::npos); + REQUIRE(spec->methods()[0].source().find(Egg::string("^x")) != Egg::string::npos); +} + +TEST_CASE("MethodParser: Parse multiple instance methods", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Point', + #superclass : 'Object', + #instVars : [ 'x', 'y' ] +} + +{ #category : 'accessing' } +Point >> x [ + ^x +] + +{ #category : 'accessing' } +Point >> y [ + ^y +] + +{ #category : 'accessing' } +Point >> x: aNumber [ + x := aNumber +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 3); +} + +TEST_CASE("MethodParser: Parse class method", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Point', + #superclass : 'Object', + #instVars : [ 'x', 'y' ] +} + +{ #category : 'instance creation' } +Point class >> x: anX y: aY [ + ^self new x: anX; y: aY +] + +{ #category : 'accessing' } +Point >> x [ + ^x +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->metaclass()->methods().size() == 1); + REQUIRE(spec->methods().size() == 1); +} + +TEST_CASE("MethodParser: No methods returns empty vector", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Empty', + #superclass : 'Object' +} +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().empty()); +} + +TEST_CASE("MethodParser: Method with nested brackets", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'testing' } +Test >> testMethod [ + | x | + x := [1 + 2]. + ^x value +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); + REQUIRE(spec->methods()[0].source().find(Egg::string("[1 + 2]")) != Egg::string::npos); +} + +TEST_CASE("MethodParser: Method with string containing brackets", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'testing' } +Test >> testString [ + ^'hello [world]' +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); +} + +TEST_CASE("MethodParser: Method with keyword selector", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'accessing' } +Test >> at: index put: value [ + ^self basicAt: index put: value +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); + REQUIRE(spec->methods()[0].source().find(Egg::string("at: index put: value")) != Egg::string::npos); +} + +TEST_CASE("MethodParser: Method with comment", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'testing' } +Test >> yourself [ + "Answer the receiver" + ^self +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); +} + +TEST_CASE("MethodParser: Binary method", "[methods]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Number', + #superclass : 'Magnitude' +} + +{ #category : 'arithmetic' } +Number >> + aNumber [ + ^self subclassResponsibility +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 1); + REQUIRE(spec->methods()[0].source().find(Egg::string("+ aNumber")) != Egg::string::npos); +} + +TEST_CASE("MethodParser: FindMatchingBracket with nesting", "[methods]") { + TonelReader reader; + // Just test that multiple methods with nested brackets are handled correctly + std::string source = R"( +Class { + #name : 'Test', + #superclass : 'Object' +} + +{ #category : 'testing' } +Test >> one [ + [1 to: 10 do: [:i | i printString]] value +] + +{ #category : 'testing' } +Test >> two [ + ^2 +] +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->methods().size() == 2); +} diff --git a/runtime/cpp/Bootstrap/tests/SimpleParserTest.cpp b/runtime/cpp/Bootstrap/tests/SimpleParserTest.cpp new file mode 100644 index 00000000..ad9ab2e9 --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/SimpleParserTest.cpp @@ -0,0 +1,89 @@ +/* + Tests for TonelReader class definition parsing. + These tests use inline strings, no file I/O. + */ + +#include +#include "../TonelReader.h" + +using namespace Egg; + +TEST_CASE("TonelReader: Parse simple class definition", "[parser]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'Point', + #superclass : 'Object', + #instVars : [ + 'x', + 'y' + ], + #category : 'Kernel', + #package : 'Kernel' +} +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "Point"); + REQUIRE(spec->supername() == "Object"); + REQUIRE(spec->instVarNames().size() == 2); + REQUIRE(spec->instVarNames()[0] == "x"); + REQUIRE(spec->instVarNames()[1] == "y"); +} + +TEST_CASE("TonelReader: Parse class with no instance variables", "[parser]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'UndefinedObject', + #superclass : 'Object', + #category : 'Kernel', + #package : 'Kernel' +} +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "UndefinedObject"); + REQUIRE(spec->supername() == "Object"); + REQUIRE(spec->instVarNames().empty()); +} + +TEST_CASE("TonelReader: Parse class with many instance variables", "[parser]") { + TonelReader reader; + std::string source = R"( +Class { + #name : 'CompiledMethod', + #superclass : 'Object', + #instVars : [ + 'format', + 'executableCode', + 'treecodes', + 'classBinding', + 'selector', + 'source' + ], + #category : 'Kernel', + #package : 'Kernel' +} +)"; + + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "CompiledMethod"); + REQUIRE(spec->instVarNames().size() == 6); + REQUIRE(spec->instVarNames()[0] == "format"); + REQUIRE(spec->instVarNames()[5] == "source"); +} + +TEST_CASE("TonelReader: Extract class name", "[parser]") { + TonelReader reader; + std::string source = "Class {\n\t#name : 'Boolean',\n\t#superclass : 'Object'\n}"; + auto spec = reader.parseFile(source); + REQUIRE(spec->name() == "Boolean"); +} + +TEST_CASE("TonelReader: Extract superclass", "[parser]") { + TonelReader reader; + std::string source = "Class {\n\t#name : 'SmallInteger',\n\t#superclass : 'Integer'\n}"; + auto spec = reader.parseFile(source); + REQUIRE(spec->supername() == "Integer"); +} diff --git a/runtime/cpp/Bootstrap/tests/SourceLoadingTest.cpp b/runtime/cpp/Bootstrap/tests/SourceLoadingTest.cpp new file mode 100644 index 00000000..bc36f1aa --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/SourceLoadingTest.cpp @@ -0,0 +1,77 @@ +/* + Tests for reading .st source files from disk. + */ + +#include +#include "../TonelReader.h" +#include +#include +#include + +using namespace Egg; + +// Helper to find the Kernel module directory +static std::string findKernelPath() { + std::vector paths = { + "../../modules/Kernel", + "../../../modules/Kernel", + "../../../../modules/Kernel", + "../../../../../modules/Kernel", + "../../../../../../modules/Kernel", + "../../../../../../../modules/Kernel" + }; + for (const auto& path : paths) { + if (std::filesystem::exists(path + "/Object.st")) { + return path; + } + } + return ""; +} + +TEST_CASE("SourceLoading: Can find Kernel directory", "[source]") { + std::string path = findKernelPath(); + REQUIRE(!path.empty()); + REQUIRE(std::filesystem::exists(path)); +} + +TEST_CASE("SourceLoading: Can read Object.st", "[source]") { + std::string path = findKernelPath(); + REQUIRE(!path.empty()); + + std::ifstream file(path + "/Object.st"); + REQUIRE(file.is_open()); + + std::stringstream buffer; + buffer << file.rdbuf(); + std::string content = buffer.str(); + REQUIRE(!content.empty()); + REQUIRE(content.find("#name") != std::string::npos); +} + +TEST_CASE("SourceLoading: Can parse Object class definition", "[source]") { + std::string path = findKernelPath(); + REQUIRE(!path.empty()); + + std::ifstream file(path + "/Object.st"); + REQUIRE(file.is_open()); + + std::stringstream buffer; + buffer << file.rdbuf(); + + TonelReader reader; + auto spec = reader.parseFile(buffer.str()); + REQUIRE(spec->name() == "Object"); +} + +TEST_CASE("SourceLoading: Can list .st files in Kernel", "[source]") { + std::string path = findKernelPath(); + REQUIRE(!path.empty()); + + int count = 0; + for (const auto& entry : std::filesystem::directory_iterator(path)) { + if (entry.path().extension() == ".st") { + count++; + } + } + REQUIRE(count > 10); // Should have many .st files +} diff --git a/runtime/cpp/Bootstrap/tests/test_main.cpp b/runtime/cpp/Bootstrap/tests/test_main.cpp new file mode 100644 index 00000000..4ed06df1 --- /dev/null +++ b/runtime/cpp/Bootstrap/tests/test_main.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include diff --git a/runtime/cpp/CMakeLists.txt b/runtime/cpp/CMakeLists.txt index 5d059715..7986382a 100644 --- a/runtime/cpp/CMakeLists.txt +++ b/runtime/cpp/CMakeLists.txt @@ -4,6 +4,10 @@ set( CMAKE_VERBOSE_MAKEFILE on ) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Default Debug builds to -O1 so the VM is usable while still debuggable. +set(CMAKE_C_FLAGS_DEBUG "-g -O1" CACHE STRING "Flags used by the C compiler during DEBUG builds.") +set(CMAKE_CXX_FLAGS_DEBUG "-g -O1" CACHE STRING "Flags used by the C++ compiler during DEBUG builds.") + set(CMAKE_TRY_COMPILE_TARGET_TYPE "STATIC_LIBRARY") set(BUILD_SHARED_LIBS OFF) diff --git a/runtime/cpp/CODING_STYLE.md b/runtime/cpp/CODING_STYLE.md new file mode 100644 index 00000000..ddb7c492 --- /dev/null +++ b/runtime/cpp/CODING_STYLE.md @@ -0,0 +1,165 @@ +# C++ Coding Style (runtime/cpp) + +This file complements the project-wide [CODING_STYLE.md](../../CODING_STYLE.md). +The general philosophy (favor simplicity, minimize text to read, keep methods +short, no comments unless they document a public API, no abbreviations) applies +verbatim. The rules below cover the C++-specific concerns that arise from +porting the Smalltalk runtime. + +See also [TRANSPILATION_RULES.md](TRANSPILATION_RULES.md) for the rules used +when porting individual Smalltalk methods to C++. + +## Naming + +### Methods + +Translated directly from Smalltalk: keyword colons become trailing underscores, +one underscore per keyword. + +```cpp +at_(index) // Smalltalk: at: +at_put_(index, value) // Smalltalk: at:put: +sendLocal_to_with_(s, r, a) // Smalltalk: sendLocal:to:with: +``` + +A method that takes **no arguments** has **no trailing underscore**: + +```cpp +yourself() +parseMethod() +literalArray() +``` + +A handful of legacy zero-arg methods still carry a trailing underscore where the +unsuffixed name collides with a C++ keyword (e.g. `return_`, `new_`). Keep the +underscore in those cases and document why. + +### Instance Variables + +Prefix with `_`: + +```cpp +HeapObject* _runtime; +std::map _classes; +``` + +### Class Names + +Transpiled classes keep their Smalltalk name verbatim (`SSmalltalkParser`, +`SMethodNode`). Library / collection types use the C++ standard library +(`std::vector`, `std::map`, `std::string`) — not the Smalltalk equivalents. + +### Locals and Arguments + +Same rules as the Smalltalk style guide: name by usage > contents > type, prefix +arguments with `a` / `an`, no abbreviations, no reusing a name for a different +value. + +## Methods + +### Keep Methods Short + +A method should do one thing and have one level of iteration. Extract helper +methods aggressively. Prefer many tiny methods over one long one. + +### Switch Statements + +If a `switch` `case` block has **more than a few lines** of body, extract the +body into a helper method. Long `case` bodies turn the `switch` into a giant +function and obscure the dispatch table. + +```cpp +// Avoid +switch (lit.tag) { + case LiteralValue::LargeInteger: { + const auto& bytes = lit.asLargeIntegerBytes(); + bool negative = lit.isLargeIntegerNegative(); + ... 15 more lines of byte juggling ... + return obj; + } + case LiteralValue::Character: { + ... 8 more lines ... + } +} + +// Preferred +switch (lit.tag) { + case LiteralValue::LargeInteger: + return newLargeInteger_(lit.asLargeIntegerBytes(), + lit.isLargeIntegerNegative()); + case LiteralValue::Character: + return transferCharacter_(lit.asCharacter()); +} +``` + +A `case` body of one or two statements is fine inline. Anything longer becomes a +helper named after what it produces. + +### No Silent Failures + +Do not fail silently. Every failure path must raise visibility: + +- Programmer errors / invariants → `ASSERT(...)`. +- Runtime conditions the caller should observe → `Egg::error("...")` (aborts) or + `warning("...")` (continues). +- Returning `nullptr`, `nil`, `0`, or a default-constructed value to indicate + "something went wrong" without logging is forbidden. + +```cpp +// Forbidden +int parseDigit(char c) { + if (c >= '0' && c <= '9') return c - '0'; + return 0; // silent: caller can't tell '0' from "invalid" +} + +// Preferred +int parseDigit(char c) { + if (c >= '0' && c <= '9') return c - '0'; + error("invalid digit"); + return 0; // unreachable +} +``` + +### No Defensive Checks + +Do not add null checks or sanity checks for conditions the caller is contracted +to never produce. Validate at the system boundary, then trust internal callers. +Use `assert(...)` to document invariants without paying for runtime checks in +release builds. + +## Formatting + +- Use the existing indentation style of each file. Do not reformat files you + are not changing. +- Headers go in `*.h`, definitions in `*.cpp`. Inline only short trivial + accessors and the `LiteralValue`-style headers that need to be visible to + templates. +- Prefer `auto` for obvious types (factory results, iterators), spell the type + out when it adds information. + +## Comments + +Same rule as the Smalltalk side: comments are not allowed except as headers on +public APIs or when something genuinely surprising needs to be flagged. If you +feel the need for a comment in the middle of a method, extract a helper whose +name explains the intent. + +## Memory and Pointers + +- Use raw pointers for VM objects (`HeapObject*`, `Object*`); they are managed + by the GC, not by C++ ownership. +- Use `GCedRef` to anchor objects across allocating calls (any + `_runtime->sendLocal*`, `new*` may move things). +- Use `std::unique_ptr` for owned C++ resources (parsers, scanners). Do not + `new`/`delete` manually. + +## Error Reporting Helpers + +The runtime exposes two free functions: + +- `error(message)` — prints and aborts. Use for unrecoverable conditions. +- `warning(message)` — prints and returns. Use for recoverable conditions + where a fallback exists. + +Always include enough context in the message to identify the culprit (class +name, selector, file path, etc.). diff --git a/runtime/cpp/Compat.h b/runtime/cpp/Compat.h new file mode 100644 index 00000000..88d80ee1 --- /dev/null +++ b/runtime/cpp/Compat.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#if defined(_MSC_VER) +#include +#endif + +namespace Egg { + +// Portable signed multiplication with overflow detection on intptr_t. +// Returns true if the multiplication overflows; otherwise stores the +// product in *result and returns false. +static inline bool mul_overflow_iptr(intptr_t a, intptr_t b, intptr_t* result) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_mul_overflow(a, b, result); +#elif defined(_MSC_VER) && defined(_M_X64) + int64_t hi; + int64_t lo = _mul128(a, b, &hi); + *result = static_cast(lo); + return hi != (lo >> 63); +#else +#error "mul_overflow_iptr: unsupported compiler/architecture" +#endif +} + +} // namespace Egg diff --git a/runtime/cpp/Compiler/CMakeLists.txt b/runtime/cpp/Compiler/CMakeLists.txt index 9541726f..2297e20f 100644 --- a/runtime/cpp/Compiler/CMakeLists.txt +++ b/runtime/cpp/Compiler/CMakeLists.txt @@ -10,3 +10,7 @@ file(GLOB COMPILER_SRC add_library(egg_compiler STATIC ${COMPILER_SRC}) target_include_directories(egg_compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +if(BUILD_TESTING) + add_subdirectory(tests) +endif() + diff --git a/runtime/cpp/Compiler/LiteralValue.h b/runtime/cpp/Compiler/LiteralValue.h index d3f76e21..64674d27 100644 --- a/runtime/cpp/Compiler/LiteralValue.h +++ b/runtime/cpp/Compiler/LiteralValue.h @@ -10,6 +10,7 @@ #include #include #include +#include "Util.h" #include "Utils/egg_string.h" namespace Egg { @@ -23,6 +24,7 @@ struct LiteralValue { enum Tag { None, Integer, + LargeInteger, Float, String, Symbol, @@ -77,6 +79,59 @@ struct LiteralValue { return lit; } + // Construct a LargeInteger literal directly from little-endian bytes. + // The bytes must already be in canonical form (even length, no extra + // trailing zero pair). Used by the digit parser below. + static LiteralValue fromLargeInteger(std::vector leBytes, bool negative = false) { + LiteralValue lit; + lit.tag = LargeInteger; + lit.boolVal = negative; + lit.bytes = std::move(leBytes); + return lit; + } + + // Parse an unsigned digit string in the given base and return either an + // Integer (if it fits in a SmallInteger) or a LargeInteger. + // The base must be in [2, 36] because we accept digits 0-9 and A-Z + // (case-insensitive), giving 36 distinct symbols. + + static LiteralValue fromIntegerDigits(uint32_t base, const std::string& digits, bool negative = false) { + ASSERT(base >= 2 && base <= 36); + constexpr int64_t SMI_MAX = (int64_t)(INTPTR_MAX >> 1); + std::vector bytes; + bytes.push_back(0); + for (char c : digits) { + int d; + if (c >= '0' && c <= '9') d = c - '0'; + else if (c >= 'a' && c <= 'z') d = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') d = c - 'A' + 10; + else { ASSERT(!"invalid digit in integer literal"); d = 0; } + ASSERT((uint32_t)d < base && "digit out of range for given base"); + uint32_t carry = d; + for (auto& b : bytes) { + uint32_t prod = (uint32_t)b * base + carry; + b = prod & 0xFF; + carry = prod >> 8; + } + while (carry > 0) { + bytes.push_back(carry & 0xFF); + carry >>= 8; + } + } + while (bytes.size() > 1 && bytes.back() == 0) bytes.pop_back(); + // Try to fit into SmallInteger. + if (bytes.size() <= 8) { + uint64_t u = 0; + for (int i = (int)bytes.size() - 1; i >= 0; i--) + u = (u << 8) | bytes[i]; + uint64_t limit = negative ? (uint64_t)SMI_MAX + 1 : (uint64_t)SMI_MAX; + if (u <= limit) + return fromInteger(negative ? -(int64_t)u : (int64_t)u); + } + if (bytes.size() % 2 != 0) bytes.push_back(0); + return fromLargeInteger(std::move(bytes), negative); + } + static LiteralValue fromFloat(double v) { LiteralValue lit; lit.tag = Float; @@ -144,6 +199,7 @@ struct LiteralValue { bool isNone() const { return tag == None; } bool isInteger() const { return tag == Integer; } + bool isLargeInteger() const { return tag == LargeInteger; } bool isFloat() const { return tag == Float; } bool isString() const { return tag == String; } bool isSymbol() const { return tag == Symbol; } @@ -153,39 +209,37 @@ struct LiteralValue { bool isBoolean() const { return tag == Boolean; } bool isNil() const { return tag == Nil; } bool isBlock() const { return tag == Block; } - bool isNumber() const { return tag == Integer || tag == Float; } - - const BlockInfo& asBlock() const { assert(tag == Block); return blockInfo; } + bool isNumber() const { return tag == Integer || tag == LargeInteger || tag == Float; } - int64_t asInteger() const { assert(tag == Integer); return intVal; } - double asFloat() const { assert(tag == Float); return floatVal; } - uint32_t asCharacter() const { assert(tag == Character); return charVal; } - bool asBoolean() const { assert(tag == Boolean); return boolVal; } +const BlockInfo& asBlock() const { ASSERT(tag == Block); return blockInfo; } + + int64_t asInteger() const { ASSERT(tag == Integer); return intVal; } + const std::vector& asLargeIntegerBytes() const { ASSERT(tag == LargeInteger); return bytes; } + bool isLargeIntegerNegative() const { ASSERT(tag == LargeInteger); return boolVal; } + double asFloat() const { ASSERT(tag == Float); return floatVal; } + uint32_t asCharacter() const { ASSERT(tag == Character); return charVal; } + bool asBoolean() const { ASSERT(tag == Boolean); return boolVal; } const Egg::string& asString() const { - assert(tag == String || tag == Symbol); + ASSERT(tag == String || tag == Symbol); return strVal; } const std::vector& asArray() const { - assert(tag == Array); + ASSERT(tag == Array); return elements; } const std::vector& asByteArray() const { - assert(tag == ByteArray); + ASSERT(tag == ByteArray); return bytes; } - /** - * Returns a printable representation suitable for display / debugging. - * Also used as a backwards-compatible "value()" for code that - * previously relied on the string representation. - */ Egg::string printString() const { switch (tag) { case None: return ""; case Integer: return Egg::string(std::to_string(intVal)); + case LargeInteger: return printLargeIntegerString(); case Float: return Egg::string(std::to_string(floatVal)); case String: return strVal; case Symbol: return strVal; @@ -199,6 +253,27 @@ struct LiteralValue { return ""; } + // prints LargeIntegeras a Smalltalk radix-16 literal (e.g. 16rDEADBEEF or -16rFF) + Egg::string printLargeIntegerString() const { + ASSERT(tag == LargeInteger); + static const char hex[] = "0123456789ABCDEF"; + std::string s = boolVal ? "-16r" : "16r"; + bool started = false; + for (int i = (int)bytes.size() - 1; i >= 0; i--) { + uint8_t b = bytes[i]; + if (!started && b == 0) continue; + if (!started) { + if (b >= 0x10) s += hex[(b >> 4) & 0xF]; + } else { + s += hex[(b >> 4) & 0xF]; + } + s += hex[b & 0xF]; + started = true; + } + if (!started) s += "0"; + return Egg::string(s); + } + // UTF-8 printable string for debug output std::string printStringUtf8() const { return printString().toUtf8(); @@ -218,6 +293,7 @@ struct LiteralValue { case Array: return elements == other.elements; case ByteArray: return bytes == other.bytes; case Block: return blockInfo.id == other.blockInfo.id; + case LargeInteger: return bytes == other.bytes && boolVal == other.boolVal; } return false; } diff --git a/runtime/cpp/Compiler/MessageInliner.cpp b/runtime/cpp/Compiler/MessageInliner.cpp index dea31375..28368899 100644 --- a/runtime/cpp/Compiler/MessageInliner.cpp +++ b/runtime/cpp/Compiler/MessageInliner.cpp @@ -20,7 +20,7 @@ void MessageInliner::inline_(SMessageNode* aMessageNode) { return; } - if (_message->isCascadeMessage() && _message->receiver()->isBlock()) { + if (_message->isCascadeMessage()) { return; } diff --git a/runtime/cpp/Compiler/Parser/SSmalltalkParser.cpp b/runtime/cpp/Compiler/Parser/SSmalltalkParser.cpp index 9879c0af..c598b053 100644 --- a/runtime/cpp/Compiler/Parser/SSmalltalkParser.cpp +++ b/runtime/cpp/Compiler/Parser/SSmalltalkParser.cpp @@ -7,6 +7,7 @@ #include "../SSmalltalkCompiler.h" #include "../LiteralValue.h" #include "SSmalltalkScanner.h" +#include "../../Egg.h" #include #include #include @@ -21,15 +22,15 @@ SSmalltalkParser::SSmalltalkParser(SSmalltalkCompiler* compiler) SSmalltalkParser::~SSmalltalkParser() { } -SMethodNode* SSmalltalkParser::parseMethod_() { - return method_(); +SMethodNode* SSmalltalkParser::parseMethod() { + return method(); } -SMethodNode* SSmalltalkParser::parseExpression_() { - return headlessMethod_(); +SMethodNode* SSmalltalkParser::parseExpression() { + return headlessMethod(); } -SToken* SSmalltalkParser::next_() { +SToken* SSmalltalkParser::next() { if (_next) { _token = std::move(_next); _next.reset(); @@ -39,7 +40,7 @@ SToken* SSmalltalkParser::next_() { return _token.get(); } -SToken* SSmalltalkParser::peek_() { +SToken* SSmalltalkParser::peek() { if (_next) { return _next.get(); } @@ -60,13 +61,13 @@ SToken* SSmalltalkParser::peek_() { return _next.get(); } -SToken* SSmalltalkParser::step_() { +SToken* SSmalltalkParser::step() { SToken* save = _token.get(); - next_(); + next(); std::vector comments; while (_token && _token->isComment()) { comments.push_back(_token->value()); - next_(); + next(); } if (_token && !comments.empty()) { @@ -78,8 +79,8 @@ SToken* SSmalltalkParser::step_() { return save; } -void SSmalltalkParser::skipDots_() { - while (_token && _token->is('.')) step_(); +void SSmalltalkParser::skipDots() { + while (_token && _token->is('.')) step(); } void SSmalltalkParser::error_(const std::string& message) { @@ -96,17 +97,17 @@ void SSmalltalkParser::missingToken_(const std::string& expected) { error_("missing " + expected); } -void SSmalltalkParser::missingExpression_() { +void SSmalltalkParser::missingExpression() { error_("missing expression"); } -void SSmalltalkParser::missingArgument_() { +void SSmalltalkParser::missingArgument() { error_("argument missing"); } -SMethodNode* SSmalltalkParser::method_() { - step_(); - SMethodNode* method = methodSignature_(); +SMethodNode* SSmalltalkParser::method() { + step(); + SMethodNode* method = methodSignature(); if (!method) { return nullptr; } @@ -114,30 +115,30 @@ SMethodNode* SSmalltalkParser::method_() { return method; } -SMethodNode* SSmalltalkParser::headlessMethod_() { - step_(); +SMethodNode* SSmalltalkParser::headlessMethod() { + step(); SMethodNode* method = new SMethodNode(_compiler); _compiler->activeScript_(method); addBodyTo_(method); return method; } -SMethodNode* SSmalltalkParser::methodSignature_() { - SMethodNode* method = keywordSignature_(); +SMethodNode* SSmalltalkParser::methodSignature() { + SMethodNode* method = keywordSignature(); if (method) return method; - method = binarySignature_(); + method = binarySignature(); if (method) return method; - method = unarySignature_(); + method = unarySignature(); if (method) return method; error_("method signature expected"); return nullptr; } -SMethodNode* SSmalltalkParser::unarySignature_() { - if (!hasUnarySelector_()) { +SMethodNode* SSmalltalkParser::unarySignature() { + if (!hasUnarySelector()) { return nullptr; } @@ -145,31 +146,31 @@ SMethodNode* SSmalltalkParser::unarySignature_() { selectorNode->symbol_(_token->value()); selectorNode->position_(_token->position()); - step_(); + step(); std::vector emptyArgs; return buildMethodNode_(selectorNode, emptyArgs); } -SMethodNode* SSmalltalkParser::binarySignature_() { - if (!hasBinarySelector_()) { +SMethodNode* SSmalltalkParser::binarySignature() { + if (!hasBinarySelector()) { return nullptr; } SSelectorNode* selectorNode = new SSelectorNode(_compiler); selectorNode->symbol_(_token->value()); selectorNode->position_(_token->position()); - step_(); + step(); if (!_token || !_token->isName()) { - missingArgument_(); + missingArgument(); } SIdentifierNode* arg = new SIdentifierNode(_compiler); arg->name_(_token->value()); arg->position_(_token->position()); - step_(); + step(); std::vector args; args.push_back(arg); @@ -177,8 +178,8 @@ SMethodNode* SSmalltalkParser::binarySignature_() { return buildMethodNode_(selectorNode, args); } -SMethodNode* SSmalltalkParser::keywordSignature_() { - if (!hasKeywordSelector_()) { +SMethodNode* SSmalltalkParser::keywordSignature() { + if (!hasKeywordSelector()) { return nullptr; } @@ -188,10 +189,10 @@ SMethodNode* SSmalltalkParser::keywordSignature_() { while (_token && _token->isKeyword()) { selector += _token->value(); - step_(); + step(); if (!_token || !_token->isName()) { - missingArgument_(); + missingArgument(); } SIdentifierNode* arg = new SIdentifierNode(_compiler); @@ -199,7 +200,7 @@ SMethodNode* SSmalltalkParser::keywordSignature_() { arg->position_(_token->position()); arguments.push_back(arg); - step_(); + step(); } if (arguments.empty()) { @@ -219,12 +220,12 @@ void SSmalltalkParser::addBodyTo_(SMethodNode* method) { } void SSmalltalkParser::addTemporariesTo_(SMethodNode* method) { - method->temporaries_(temporaries_()); + method->temporaries_(temporaries()); } void SSmalltalkParser::addStatementsTo_(SMethodNode* method) { method->position_(_token->position()); - auto stmts = statements_(); + auto stmts = statements(); for (auto stmt : stmts) method->addStatement_(stmt); method->position_(Stretch(method->position().start(), _token->position().start())); if (_token && !_token->isEnd()) { @@ -232,18 +233,18 @@ void SSmalltalkParser::addStatementsTo_(SMethodNode* method) { } } -std::vector SSmalltalkParser::temporaries_() { +std::vector SSmalltalkParser::temporaries() { std::vector temps; if (!_token) return temps; if (_token->is("||")) { - step_(); + step(); return temps; } if (!_token->isBar()) { return temps; } while (true) { - step_(); + step(); if (!_token || !_token->isName()) break; SIdentifierNode* temp = new SIdentifierNode(_compiler); temp->name_(_token->value()); @@ -253,47 +254,47 @@ std::vector SSmalltalkParser::temporaries_() { if (!_token || !_token->isBar()) { missingToken_("|"); } - step_(); + step(); return temps; } -std::vector SSmalltalkParser::statements_() { +std::vector SSmalltalkParser::statements() { std::vector stmts; while (_token && !_token->endsExpression()) { - stmts.push_back(statement_()); - if (_token && _token->is('.')) skipDots_(); else break; + stmts.push_back(statement()); + if (_token && _token->is('.')) skipDots(); else break; } return stmts; } -SParseNode* SSmalltalkParser::statement_() { +SParseNode* SSmalltalkParser::statement() { if (_token && _token->is('^')) return return_(); - SParseNode* expr = expression_(); + SParseNode* expr = expression(); return expr; } SReturnNode* SSmalltalkParser::return_() { uint32_t returnPos = _token->position().start(); - step_(); - auto expr = expression_(); - if (!expr) missingExpression_(); + step(); + auto expr = expression(); + if (!expr) missingExpression(); uint32_t end = _token->position().start(); - skipDots_(); + skipDots(); auto node = buildNode_(returnPos); node->expression_(expr); node->position_(Stretch(returnPos, end)); return node; } -SParseNode* SSmalltalkParser::expression_() { - if (_token && _token->isName() && peek_() && peek_()->isAssignment()) { - return assignment_(); +SParseNode* SSmalltalkParser::expression() { + if (_token && _token->isName() && peek() && peek()->isAssignment()) { + return assignment(); } - SParseNode* prim = primary_(); + SParseNode* prim = primary(); if (!prim) { - missingExpression_(); + missingExpression(); } SParseNode* expr = unarySequence_(prim); @@ -313,80 +314,144 @@ SParseNode* SSmalltalkParser::expression_() { return expr; } -SAssignmentNode* SSmalltalkParser::assignment_() { +SAssignmentNode* SSmalltalkParser::assignment() { uint32_t position = _token->position().start(); auto variable = new SIdentifierNode(_compiler); variable->name_(_token->value()); variable->position_(_token->position()); - step_(); step_(); - auto expr = expression_(); - if (!expr) missingExpression_(); + step(); step(); + auto expr = expression(); + if (!expr) missingExpression(); auto assignment = buildNode_(position); assignment->assign_operator_(variable, nullptr); assignment->expression_(expr); return assignment; } -SParseNode* SSmalltalkParser::primary_() { +// ========================================================================= +// Number parsing helpers +// ========================================================================= + +// Parse a number literal lexeme. Detects integer vs float and dispatches. +// Radix-prefixed numbers (0x.. or NrDDD) are always treated as integers; +// '.' / 'e' / 'E' inside their digit body are hex/radix digits, not float +// markers. Float radix notation is intentionally unsupported. +LiteralValue SSmalltalkParser::parseNumberString(const std::string& v) { + bool isHexOrRadix = (v.size() > 2 && v[0] == '0' && (v[1] == 'x' || v[1] == 'X')) + || v.find('r') != std::string::npos + || v.find('R') != std::string::npos; + if (isHexOrRadix) + return parseIntegerString(v); + bool looksFloat = v.find('.') != std::string::npos + || v.find('e') != std::string::npos + || v.find('E') != std::string::npos; + if (looksFloat) + return parseFloatString(v); + return parseIntegerString(v); +} + +// Parse a float literal lexeme (decimal only). +LiteralValue SSmalltalkParser::parseFloatString(const std::string& v) { + return LiteralValue::fromFloat(std::stod(v)); +} + +// Parse an integer literal lexeme handling decimal, hex (0x), and radix +// (NrDDD) notation. The factory in LiteralValue decides Integer vs LargeInteger. +// Base is constrained to [2, 36] because we accept digits 0-9 and A-Z +// (case-insensitive), giving 36 distinct symbols. +LiteralValue SSmalltalkParser::parseIntegerString(const std::string& v) { + uint32_t base = 10; + std::string digits = v; + if (v.size() > 2 && v[0] == '0' && (v[1] == 'x' || v[1] == 'X')) { + base = 16; + digits = v.substr(2); + } else { + auto rpos = v.find('r'); + if (rpos == std::string::npos) rpos = v.find('R'); + if (rpos != std::string::npos) { + uint64_t parsed = std::stoull(v.substr(0, rpos)); + if (!(parsed >= 2 && parsed <= 36)) + Egg::error("integer literal radix out of range [2, 36]"); + base = (uint32_t)parsed; + digits = v.substr(rpos + 1); + } + } + return LiteralValue::fromIntegerDigits(base, digits, /*negative*/ false); +} + +// Convert current literal token to a LiteralValue +LiteralValue SSmalltalkParser::parseLiteralValue() { + auto* strTok = static_cast(_token.get()); + switch (strTok->literalKind()) { + case SStringToken::LitNumber: + return parseNumberString(_token->value().toUtf8()); + case SStringToken::LitCharacter: + return LiteralValue::fromCharacter(_token->value()[0]); + case SStringToken::LitSymbol: + return LiteralValue::fromSymbol(_token->value()); + case SStringToken::LitString: + default: + return LiteralValue::fromString(_token->value()); + } +} + +// Matches Smalltalk pseudoLiteralValue +LiteralValue SSmalltalkParser::pseudoLiteralValue() { + Egg::string val = _token->value(); + if (val == "nil") return LiteralValue::nil(); + if (val == "true") return LiteralValue::fromBoolean(true); + if (val == "false") return LiteralValue::fromBoolean(false); + return LiteralValue::fromSymbol(val); +} + +// Matches Smalltalk negativeNumberOrBinary +LiteralValue SSmalltalkParser::negativeNumberOrBinary() { + auto peekToken = peek(); + if (peekToken && peekToken->isLiteral()) { + auto* strTok = static_cast(peekToken); + if (strTok->literalKind() == SStringToken::LitNumber) { + step(); + LiteralValue val = parseLiteralValue(); + if (val.tag == LiteralValue::Integer) + return LiteralValue::fromInteger(-val.intVal); + if (val.tag == LiteralValue::LargeInteger) + return LiteralValue::fromLargeInteger( + std::vector(val.asLargeIntegerBytes()), true); + if (val.tag == LiteralValue::Float) + return LiteralValue::fromFloat(-val.floatVal); + } + } + return LiteralValue(); // None — signals no negative number found +} + +SParseNode* SSmalltalkParser::primary() { if (!_token) return nullptr; if (_token->isName()) { SIdentifierNode* id = new SIdentifierNode(_compiler); id->name_(_token->value()); id->position_(_token->position()); - step_(); + step(); return id; } if (_token->isLiteral()) { SLiteralNode* lit = new SLiteralNode(_compiler); - auto* strTok = static_cast(_token.get()); - switch (strTok->literalKind()) { - case SStringToken::LitNumber: { - std::string v = _token->value().toUtf8(); - if (v.find('.') != std::string::npos || v.find('e') != std::string::npos || v.find('E') != std::string::npos) { - lit->literalValue_(LiteralValue::fromFloat(std::stod(v))); - } else { - lit->literalValue_(LiteralValue::fromInteger(std::stoll(v, nullptr, 0))); - } - break; - } - case SStringToken::LitCharacter: - lit->literalValue_(LiteralValue::fromCharacter(_token->value()[0])); - break; - case SStringToken::LitSymbol: - lit->literalValue_(LiteralValue::fromSymbol(_token->value())); - break; - case SStringToken::LitString: - default: - lit->literalValue_(LiteralValue::fromString(_token->value())); - break; - } + lit->literalValue_(parseLiteralValue()); lit->position_(_token->position()); - step_(); + step(); return lit; } - if (_token->is('[')) return block_(); - if (_token->is('(')) return parenthesizedExpression_(); - if (_token->is("#(")) return literalArray_(); - if (_token->is("#[")) return literalByteArray_(); - if (_token->is('{')) return bracedArray_(); + if (_token->is('[')) return block(); + if (_token->is('(')) return parenthesizedExpression(); + if (_token->is("#(")) return literalArray(); + if (_token->is("#[")) return literalByteArray(); + if (_token->is('{')) return bracedArray(); if (_token->is('-')) { - auto peekToken = peek_(); - if (peekToken && peekToken->isLiteral()) { - step_(); + LiteralValue negVal = negativeNumberOrBinary(); + if (!negVal.isNone()) { SLiteralNode* lit = new SLiteralNode(_compiler); - auto* strTok = static_cast(_token.get()); - std::string v = _token->value().toUtf8(); - if (strTok->literalKind() == SStringToken::LitNumber) { - if (v.find('.') != std::string::npos || v.find('e') != std::string::npos || v.find('E') != std::string::npos) { - lit->literalValue_(LiteralValue::fromFloat(-std::stod(v))); - } else { - lit->literalValue_(LiteralValue::fromInteger(-std::stoll(v, nullptr, 0))); - } - } else { - lit->literalValue_(LiteralValue::fromString("-" + _token->value())); - } + lit->literalValue_(std::move(negVal)); lit->position_(Stretch(_token->position().start() - 1, _token->position().end())); - step_(); + step(); return lit; } return nullptr; @@ -394,27 +459,27 @@ SParseNode* SSmalltalkParser::primary_() { return nullptr; } -SBlockNode* SSmalltalkParser::block_() { +SBlockNode* SSmalltalkParser::block() { uint32_t position = _token->position().start(); SBlockNode* block = new SBlockNode(_compiler); block->position_(Stretch(position, _token->position().start())); block->parent_(_compiler->activeScript()); _compiler->activate_while_(block, [&]() { - step_(); - block->arguments_(blockArguments_()); - block->temporaries_(temporaries_()); - auto stmts = statements_(); + step(); + block->arguments_(blockArguments()); + block->temporaries_(temporaries()); + auto stmts = statements(); for (auto stmt : stmts) block->addStatement_(stmt); if (!_token || !_token->is(']')) { missingToken_("]"); } block->position_(Stretch(position, _token->position().end())); - step_(); + step(); }); return block; } -std::vector SSmalltalkParser::blockArguments_() { +std::vector SSmalltalkParser::blockArguments() { std::vector args; if (!_token || !_token->is(':')) { @@ -422,10 +487,10 @@ std::vector SSmalltalkParser::blockArguments_() { } while (_token && _token->is(':')) { - step_(); + step(); if (!_token || !_token->isName()) { - missingArgument_(); + missingArgument(); } SIdentifierNode* arg = new SIdentifierNode(_compiler); @@ -433,12 +498,12 @@ std::vector SSmalltalkParser::blockArguments_() { arg->position_(_token->position()); args.push_back(arg); - step_(); + step(); } if (_token && _token->isBar()) { - step_(); + step(); } else if (_token && _token->is("||")) { - step_(); // consume || as closing | for args + empty temps + step(); // consume || as closing | for args + empty temps } else { missingToken_("|"); } @@ -446,21 +511,21 @@ std::vector SSmalltalkParser::blockArguments_() { return args; } -SParseNode* SSmalltalkParser::parenthesizedExpression_() { +SParseNode* SSmalltalkParser::parenthesizedExpression() { uint32_t start = _token->position().start(); - step_(); - auto expr = expression_(); - if (!expr) missingExpression_(); + step(); + auto expr = expression(); + if (!expr) missingExpression(); if (!_token || !_token->is(')')) missingToken_(")"); uint32_t end = _token->position().end(); - step_(); + step(); if (!expr->isImmediate()) expr->position_(Stretch(start, end)); return expr; } SParseNode* SSmalltalkParser::unarySequence_(SParseNode* receiver) { auto node = receiver; - while (hasUnarySelector_()) { + while (hasUnarySelector()) { auto msg = buildMessageNode_(node); unaryMessage_(msg); node = msg; @@ -472,14 +537,14 @@ void SSmalltalkParser::unaryMessage_(SMessageNode* message) { auto selectorNode = new SSelectorNode(_compiler); selectorNode->symbol_(_token->value()); selectorNode->position_(_token->position()); - step_(); + step(); message->selector_(selectorNode); message->position_(Stretch(message->position().start(), selectorNode->position().end())); } SParseNode* SSmalltalkParser::binarySequence_(SParseNode* receiver) { auto node = receiver; - while (hasBinarySelector_()) { + while (hasBinarySelector()) { auto msg = buildMessageNode_(node); binaryMessage_(msg); node = msg; @@ -491,8 +556,8 @@ void SSmalltalkParser::binaryMessage_(SMessageNode* message) { auto selectorNode = new SSelectorNode(_compiler); selectorNode->symbol_(_token->value()); selectorNode->position_(_token->position()); - step_(); - auto prim = primary_(); + step(); + auto prim = primary(); if (!prim) error_("primary missing"); auto arg = unarySequence_(prim); message->selector_(selectorNode); @@ -501,7 +566,7 @@ void SSmalltalkParser::binaryMessage_(SMessageNode* message) { } SParseNode* SSmalltalkParser::keywordSequence_(SParseNode* receiver) { - if (!hasKeywordSelector_()) return receiver; + if (!hasKeywordSelector()) return receiver; auto message = buildMessageNode_(receiver); keywordMessage_(message); return message; @@ -513,9 +578,9 @@ void SSmalltalkParser::keywordMessage_(SMessageNode* message) { uint32_t start = _token->position().start(); while (_token && _token->isKeyword()) { selector += _token->value(); - step_(); - auto prim = primary_(); - if (!prim) missingArgument_(); + step(); + auto prim = primary(); + if (!prim) missingArgument(); auto arg = unarySequence_(prim); arg = binarySequence_(arg); arguments.push_back(arg); @@ -542,7 +607,7 @@ SParseNode* SSmalltalkParser::cascadeSequence_(SMessageNode* messageNode) { firstMsg->cascade_(cascade); cascade->addMessage_(firstMsg); while (_token && _token->is(';')) { - step_(); + step(); auto msg = buildCascadeMessageNode_(receiver); msg->cascade_(cascade); msg->position_(_token->position()); @@ -555,17 +620,17 @@ SParseNode* SSmalltalkParser::cascadeSequence_(SMessageNode* messageNode) { } void SSmalltalkParser::cascadeMessage_(SMessageNode* message) { - if (hasUnarySelector_()) unaryMessage_(message); - else if (hasBinarySelector_()) binaryMessage_(message); - else if (hasKeywordSelector_()) keywordMessage_(message); + if (hasUnarySelector()) unaryMessage_(message); + else if (hasBinarySelector()) binaryMessage_(message); + else if (hasKeywordSelector()) keywordMessage_(message); else error_("invalid cascade message"); } -bool SSmalltalkParser::hasUnarySelector_() const { +bool SSmalltalkParser::hasUnarySelector() const { return _token && _token->isName(); } -bool SSmalltalkParser::hasBinarySelector_() const { +bool SSmalltalkParser::hasBinarySelector() const { if (!_token) return false; // ST: (token isStringToken and: [token hasSymbol]) or: [token is: $^] or: [token is: $:] if (_token->isSymbolic() && _token->hasSymbol()) return true; @@ -574,113 +639,69 @@ bool SSmalltalkParser::hasBinarySelector_() const { return false; } -bool SSmalltalkParser::hasKeywordSelector_() const { +bool SSmalltalkParser::hasKeywordSelector() const { return _token && _token->isKeyword(); } -SParseNode* SSmalltalkParser::literalArray_() { +SParseNode* SSmalltalkParser::literalArray() { + // Matches Smalltalk literalArray → arrayBody → arrayElement uint32_t position = _token->position().start(); - step_(); std::vector elements; + + // Step past #( (or ( for nested arrays) + step(); while (_token && !_token->is(')') && !_token->isEnd()) { + // arrayElement if (_token->isLiteral()) { - auto* strTok = static_cast(_token.get()); - switch (strTok->literalKind()) { - case SStringToken::LitNumber: { - std::string v = _token->value().toUtf8(); - if (v.find('.') != std::string::npos) { - elements.push_back(LiteralValue::fromFloat(std::stod(v))); - } else { - elements.push_back(LiteralValue::fromInteger(std::stoll(v, nullptr, 0))); - } - break; - } - case SStringToken::LitCharacter: - elements.push_back(LiteralValue::fromCharacter(_token->value()[0])); - break; - case SStringToken::LitSymbol: - elements.push_back(LiteralValue::fromSymbol(_token->value())); - break; - case SStringToken::LitString: - default: - elements.push_back(LiteralValue::fromString(_token->value())); - break; - } + elements.push_back(parseLiteralValue()); } else if (_token->isName()) { - // pseudoLiteralValue: convert nil/true/false to actual values - Egg::string val = _token->value(); - if (val == "nil") { - elements.push_back(LiteralValue::nil()); - } else if (val == "true") { - elements.push_back(LiteralValue::fromBoolean(true)); - } else if (val == "false") { - elements.push_back(LiteralValue::fromBoolean(false)); - } else { - elements.push_back(LiteralValue::fromSymbol(val)); - } + elements.push_back(pseudoLiteralValue()); } else if (_token->isKeyword()) { - // literalKeyword: collect multi-part keyword symbol (e.g., at:put:) + // literalKeyword: collect multi-part keyword symbol Egg::string keyword = _token->value(); - step_(); + step(); while (_token && _token->isKeyword()) { keyword += _token->value(); - step_(); + step(); } elements.push_back(LiteralValue::fromSymbol(keyword)); continue; // already stepped past last keyword - } else if (_token->hasSymbol()) { - elements.push_back(LiteralValue::fromSymbol(_token->value())); } else if (_token->is('-')) { - // negative number in literal array - step_(); - if (_token && _token->isLiteral()) { - auto* strTok = static_cast(_token.get()); - if (strTok->literalKind() == SStringToken::LitNumber) { - std::string v = _token->value().toUtf8(); - if (v.find('.') != std::string::npos) { - elements.push_back(LiteralValue::fromFloat(-std::stod(v))); - } else { - elements.push_back(LiteralValue::fromInteger(-std::stoll(v, nullptr, 0))); - } - } else { - elements.push_back(LiteralValue::fromSymbol("-")); - continue; // don't step, re-process current token - } + LiteralValue neg = negativeNumberOrBinary(); + if (!neg.isNone()) { + elements.push_back(std::move(neg)); } else { elements.push_back(LiteralValue::fromSymbol("-")); - continue; // don't step, re-process current token } - } else if (_token->is('(')) { - // nested literal array (without #) - auto* nested = static_cast(literalArray_()); - elements.push_back(nested->literalValue()); - continue; - } else if (_token->is("#(")) { - auto* nested = static_cast(literalArray_()); + } else if (_token->hasSymbol()) { + elements.push_back(LiteralValue::fromSymbol(_token->value())); + } else if (_token->is('(') || _token->is("#(")) { + // nested literal array + auto* nested = static_cast(literalArray()); elements.push_back(nested->literalValue()); - continue; + continue; // literalArray already stepped past ) } else if (_token->is("#[")) { - auto* nested = static_cast(literalByteArray_()); + auto* nested = static_cast(literalByteArray()); elements.push_back(nested->literalValue()); - continue; + continue; // literalByteArray already stepped past ] } else { - error_("invalid literal entry"); + error_("invalid literal array element"); } - step_(); - } - if (!_token || !_token->is(')')) { - missingToken_(")"); + step(); } + + if (_token && _token->isEnd()) missingToken_(")"); + auto lit = new SLiteralNode(_compiler); lit->literalValue_(LiteralValue::fromArray(std::move(elements))); - lit->position_(Stretch(position, _token->position().end())); - step_(); + lit->position_(Stretch(position, _token ? _token->position().end() : position)); + step(); // past ) return lit; } -SParseNode* SSmalltalkParser::literalByteArray_() { +SParseNode* SSmalltalkParser::literalByteArray() { uint32_t position = _token->position().start(); - step_(); + step(); std::vector bytes; while (_token && !_token->is(']') && !_token->isEnd()) { if (_token->isLiteral()) { @@ -689,7 +710,7 @@ SParseNode* SSmalltalkParser::literalByteArray_() { int val = static_cast(std::stol(v, nullptr, 0)); bytes.push_back(static_cast(val)); } - step_(); + step(); } if (!_token || !_token->is(']')) { missingToken_("]"); @@ -697,35 +718,35 @@ SParseNode* SSmalltalkParser::literalByteArray_() { auto lit = new SLiteralNode(_compiler); lit->literalValue_(LiteralValue::fromByteArray(std::move(bytes))); lit->position_(Stretch(position, _token->position().end())); - step_(); + step(); return lit; } -SBraceNode* SSmalltalkParser::bracedArray_() { +SBraceNode* SSmalltalkParser::bracedArray() { uint32_t position = _token->position().start(); - step_(); + step(); SBraceNode* brace = new SBraceNode(_compiler); brace->position_(Stretch(position, _token->position().start())); while (_token && !_token->is('}') && !_token->isEnd()) { - SParseNode* expr = expression_(); + SParseNode* expr = expression(); if (expr) { brace->addElement_(expr); } if (_token && _token->is('.')) { - step_(); + step(); } } if (!_token || !_token->is('}')) { missingToken_("}"); } brace->position_(Stretch(position, _token->position().end())); - step_(); + step(); return brace; } void SSmalltalkParser::addPragmaTo_(SMethodNode* method) { if (attachPragmaTo_(method)) { - step_(); + step(); } } @@ -735,24 +756,24 @@ bool SSmalltalkParser::attachPragmaTo_(SMethodNode* method) { } uint32_t start = _token->position().start(); - step_(); + step(); - SPragmaNode* pragma = nullptr; + SPragmaNode* node = nullptr; if (_token && _token->isKeyword()) { Egg::string keyword = _token->value(); if (keyword == "primitive:") { - pragma = pragma_(); + node = pragma(); } else { - pragma = symbolicPragma_(); + node = symbolicPragma(); } } else { - pragma = symbolicPragma_(); + node = symbolicPragma(); } - if (pragma) { - pragma->position_(Stretch(start, _token->position().end())); - method->pragma_(pragma); + if (node) { + node->position_(Stretch(start, _token->position().end())); + method->pragma_(node); } if (!_token || !_token->is('>')) { @@ -762,24 +783,24 @@ bool SSmalltalkParser::attachPragmaTo_(SMethodNode* method) { return true; } -SPragmaNode* SSmalltalkParser::pragma_() { - step_(); +SPragmaNode* SSmalltalkParser::pragma() { + step(); if (!_token) { error_("missing pragma value"); } if (_token->isLiteral()) { - return numberedPrimitive_(); + return numberedPrimitive(); } else if (_token->isName()) { - return namedPrimitive_(); + return namedPrimitive(); } error_("invalid pragma format"); return nullptr; } -SPragmaNode* SSmalltalkParser::numberedPrimitive_() { +SPragmaNode* SSmalltalkParser::numberedPrimitive() { int number = 0; try { number = std::stoi(_token->value().toUtf8()); @@ -792,11 +813,11 @@ SPragmaNode* SSmalltalkParser::numberedPrimitive_() { pragma->bePrimitive_(number, ""); pragma->position_(Stretch(position, _token->position().end())); - step_(); + step(); return pragma; } -SPragmaNode* SSmalltalkParser::namedPrimitive_() { +SPragmaNode* SSmalltalkParser::namedPrimitive() { Egg::string name = _token->value(); uint32_t position = _token->position().start(); @@ -804,11 +825,11 @@ SPragmaNode* SSmalltalkParser::namedPrimitive_() { pragma->bePrimitive_(0, name); pragma->position_(Stretch(position, _token->position().end())); - step_(); + step(); return pragma; } -SPragmaNode* SSmalltalkParser::symbolicPragma_() { +SPragmaNode* SSmalltalkParser::symbolicPragma() { Egg::string symbol = _token->value(); uint32_t position = _token->position().start(); @@ -816,7 +837,7 @@ SPragmaNode* SSmalltalkParser::symbolicPragma_() { pragma->beSymbolic_(symbol); pragma->position_(Stretch(position, _token->position().end())); - step_(); + step(); return pragma; } diff --git a/runtime/cpp/Compiler/Parser/SSmalltalkParser.h b/runtime/cpp/Compiler/Parser/SSmalltalkParser.h index 7be81f7f..051b8ba0 100644 --- a/runtime/cpp/Compiler/Parser/SSmalltalkParser.h +++ b/runtime/cpp/Compiler/Parser/SSmalltalkParser.h @@ -11,6 +11,7 @@ #include #include "SToken.h" #include "../SSmalltalkCompiler.h" +#include "../LiteralValue.h" #include "../AST/SParseNode.h" #include "../AST/SIdentifierNode.h" #include "../AST/SLiteralNode.h" @@ -47,20 +48,20 @@ class SSmalltalkParser { SSmalltalkParser(SSmalltalkCompiler* compiler); ~SSmalltalkParser(); - SMethodNode* parseMethod_(); - SMethodNode* parseExpression_(); + SMethodNode* parseMethod(); + SMethodNode* parseExpression(); - SMethodNode* method_(); - SMethodNode* headlessMethod_(); - SMethodNode* methodSignature_(); - SMethodNode* unarySignature_(); - SMethodNode* binarySignature_(); - SMethodNode* keywordSignature_(); + SMethodNode* method(); + SMethodNode* headlessMethod(); + SMethodNode* methodSignature(); + SMethodNode* unarySignature(); + SMethodNode* binarySignature(); + SMethodNode* keywordSignature(); - SParseNode* expression_(); - SParseNode* primary_(); - SParseNode* statement_(); - std::vector statements_(); + SParseNode* expression(); + SParseNode* primary(); + SParseNode* statement(); + std::vector statements(); SParseNode* unarySequence_(SParseNode* receiver); SParseNode* binarySequence_(SParseNode* receiver); @@ -72,13 +73,13 @@ class SSmalltalkParser { void keywordMessage_(SMessageNode* message); void cascadeMessage_(SMessageNode* message); - SBlockNode* block_(); - std::vector blockArguments_(); + SBlockNode* block(); + std::vector blockArguments(); SReturnNode* return_(); - SAssignmentNode* assignment_(); + SAssignmentNode* assignment(); - std::vector temporaries_(); + std::vector temporaries(); void addBodyTo_(SMethodNode* method); void addTemporariesTo_(SMethodNode* method); @@ -86,30 +87,37 @@ class SSmalltalkParser { void addPragmaTo_(SMethodNode* method); bool attachPragmaTo_(SMethodNode* method); - SParseNode* literalArray_(); - SParseNode* literalByteArray_(); - SBraceNode* bracedArray_(); + SParseNode* literalArray(); + SParseNode* literalByteArray(); + SBraceNode* bracedArray(); - SPragmaNode* pragma_(); - SPragmaNode* numberedPrimitive_(); - SPragmaNode* namedPrimitive_(); - SPragmaNode* symbolicPragma_(); + LiteralValue parseLiteralValue(); + LiteralValue parseNumberString(const std::string& v); + LiteralValue parseFloatString(const std::string& v); + LiteralValue parseIntegerString(const std::string& v); + LiteralValue pseudoLiteralValue(); + LiteralValue negativeNumberOrBinary(); - SParseNode* parenthesizedExpression_(); - bool hasUnarySelector_() const; - bool hasBinarySelector_() const; - bool hasKeywordSelector_() const; + SPragmaNode* pragma(); + SPragmaNode* numberedPrimitive(); + SPragmaNode* namedPrimitive(); + SPragmaNode* symbolicPragma(); - SToken* step_(); - SToken* peek_(); - SToken* next_(); - void skipDots_(); + SParseNode* parenthesizedExpression(); + bool hasUnarySelector() const; + bool hasBinarySelector() const; + bool hasKeywordSelector() const; + + SToken* step(); + SToken* peek(); + SToken* next(); + void skipDots(); void error_(const std::string& message); void error_(const std::string& message, uint32_t position); void missingToken_(const std::string& expected); - void missingExpression_(); - void missingArgument_(); + void missingExpression(); + void missingArgument(); template T* buildNode_(uint32_t position) { diff --git a/runtime/cpp/Compiler/README.md b/runtime/cpp/Compiler/README.md new file mode 100644 index 00000000..70f70bc6 --- /dev/null +++ b/runtime/cpp/Compiler/README.md @@ -0,0 +1,81 @@ +# Egg Smalltalk Compiler — C++ Port + +A literal port of the Egg Smalltalk compiler from `modules/Compiler/` to C++, +enabling bootstrapping directly from C++, no other subsystem required. + +## Status + +**All components ported and functional.** The compiler parses, analyzes, and +builds compiled methods for the full Egg Smalltalk language. + +- Kernel bootstrap: all methods compiled, no failures +- TinyBenchmarks: runs successfully +- All compiler and bootstrap tests pass + +See `../PORT_STATUS.md` for the full method-by-method porting table. +See `../TRANSPILATION_RULES.md` for the ST -> C++ translation rules. + +## Architecture + +``` +Source Code → Scanner → Tokens → Parser → AST → SemanticVisitor → SMethodNode::buildMethod() + ↓ + SCompiledMethod (literals, metadata) + ↓ + TreecodeEncoder (treecode bytecodes) +``` + +## Directory Structure + +``` +Compiler/ +├── *.h/cpp Pipeline entry points (SCompiler, SSmalltalkCompiler), +│ semantic analysis, message inlining, treecode encoding, +│ shared types (LiteralValue, Stretch, CompilationError, ...) +├── Parser/ Scanner, recursive-descent parser, token hierarchy and +│ character stream utilities +├── AST/ Parse-node class hierarchy and visitor interface +├── Binding/ Variable bindings (locals, args, fields, globals, pseudo-vars) +│ and the scope / environment chain used during semantic analysis +├── Backend/ Compiler output objects (SCompiledMethod / SCompiledBlock) +└── tests/ Catch2 unit tests for the scanner and parser +``` + +## Build + +See [`runtime/cpp/README.md`](../README.md) for build and test instructions. +The compiler is built into `egg_compiler` (a static library inside the `egg` +binary) and exercised by the `compiler_tests` CTest target under +[`tests/`](tests/). + +## Usage + +```cpp +#include "Compiler/SSmalltalkCompiler.h" + +Egg::SSmalltalkCompiler compiler; + +// Compile a method (parse + semantic analysis + build) +auto result = compiler.compileMethod_( + "factorial\n" + " ^self > 1\n" + " ifTrue: [self * (self - 1) factorial]\n" + " ifFalse: [1]" +); +``` + +## Key Differences from Smalltalk + +| Aspect | Smalltalk | C++ | +|--------|-----------|-----| +| Memory | Garbage collected | `std::unique_ptr` / `new` with manual ownership | +| Collections | OrderedCollection, Dictionary | `std::vector`, `std::map` | +| Strings | Symbol, String | `egg::string` (UTF-32 wrapper) | +| Literals | Polymorphic objects | `LiteralValue` tagged union | +| Comments | Scanner returns comment tokens | Scanner absorbs comments internally | +| Errors | Exception/signal system | `CompilationError` (std::runtime_error) | + +## Remaining Work + +1. **Error recovery** — no `protect:` equivalent for graceful failure +2. **FFI pragmas** — `` and `` not ported (not needed for bootstrap) diff --git a/runtime/cpp/Compiler/SSmalltalkCompiler.cpp b/runtime/cpp/Compiler/SSmalltalkCompiler.cpp index 8915a0e3..23c6fe81 100644 --- a/runtime/cpp/Compiler/SSmalltalkCompiler.cpp +++ b/runtime/cpp/Compiler/SSmalltalkCompiler.cpp @@ -190,7 +190,7 @@ void SSmalltalkCompiler::parseFragment() { reset(); scanner()->on_(_source); try { - _ast = parser()->parseMethod_(); + _ast = parser()->parseMethod(); } catch (...) { } if (_result) { @@ -211,7 +211,7 @@ void SSmalltalkCompiler::parseMethod() { _headless = false; reset(); scanner()->on_(_source); - _ast = parser()->parseMethod_(); + _ast = parser()->parseMethod(); if (_result) _result->ast_(_ast); } diff --git a/runtime/cpp/Compiler/tests/CMakeLists.txt b/runtime/cpp/Compiler/tests/CMakeLists.txt new file mode 100644 index 00000000..8f9fe1fc --- /dev/null +++ b/runtime/cpp/Compiler/tests/CMakeLists.txt @@ -0,0 +1,36 @@ +# Compiler Tests CMake Configuration + +cmake_minimum_required(VERSION 3.10) +project(CompilerTests CXX) + +find_package(Catch2 REQUIRED) + +# Compiler test executable +add_executable(compiler_tests + test_main.cpp + ScannerTest.cpp + ParserTest.cpp + MessageInlinerTest.cpp +) + +target_link_libraries(compiler_tests PRIVATE + egg_compiler + bootstrapper_lib + egg_runtime + Catch2::Catch2 +) + +target_include_directories(compiler_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../.. +) + +target_link_libraries(compiler_tests PRIVATE Catch2::Catch2) + +# Enable testing +enable_testing() +add_test(NAME CompilerTests COMMAND compiler_tests) + +# Add individual test tags for selective running +add_test(NAME ScannerTests COMMAND compiler_tests "[scanner]") +add_test(NAME ParserTests COMMAND compiler_tests "[parser]") diff --git a/runtime/cpp/Compiler/tests/MessageInlinerTest.cpp b/runtime/cpp/Compiler/tests/MessageInlinerTest.cpp new file mode 100644 index 00000000..f1df705c --- /dev/null +++ b/runtime/cpp/Compiler/tests/MessageInlinerTest.cpp @@ -0,0 +1,75 @@ +/* + Copyright (c) 2026, Javier Pimás. + See (MIT) license in root directory. + */ + +#include "catch2/catch.hpp" +#include "../SCompiler.h" +#include "../SSmalltalkCompiler.h" +#include "../MessageInliner.h" +#include "../Parser/SSmalltalkParser.h" +#include "../Parser/SSmalltalkScanner.h" +#include "../AST/SParseNode.h" +#include "../AST/SMethodNode.h" +#include "../AST/SMessageNode.h" +#include "../AST/SCascadeMessageNode.h" + +using namespace Egg; + +class SMessageInlinerTestFixture { +protected: + SSmalltalkCompiler compiler; + MessageInliner inliner; + + SMethodNode* parse(const std::string& source) { + compiler.scanner()->on_(source); + return compiler.parser()->parseMethod(); + } + + void runInlinerOver(SMethodNode* method) { + method->nodesDo_([this](SParseNode* n) { + if (n->isMessage()) { + inliner.inline_(static_cast(n)); + } + }); + } +}; + +TEST_CASE_METHOD(SMessageInlinerTestFixture, + "Inliner: cascade ifTrue: is not inlined", "[inliner]") { + // Regression: matches the original InternalReadStream>>peekFor: pattern + // ^self peek = token ifTrue: [position := position + 1]; yourself + SMethodNode* method = parse("foo ^true ifTrue: [42]; yourself"); + REQUIRE(method != nullptr); + + runInlinerOver(method); + + bool sawCascade = false; + method->nodesDo_([&](SParseNode* n) { + if (n->isMessage()) { + SMessageNode* m = static_cast(n); + if (m->isCascadeMessage()) { + sawCascade = true; + REQUIRE_FALSE(m->isInlined()); + } + } + }); + REQUIRE(sawCascade); +} + +TEST_CASE_METHOD(SMessageInlinerTestFixture, + "Inliner: non-cascade ifTrue: is still inlined", "[inliner]") { + SMethodNode* method = parse("foo ^true ifTrue: [42]"); + REQUIRE(method != nullptr); + + runInlinerOver(method); + + bool sawInlined = false; + method->nodesDo_([&](SParseNode* n) { + if (n->isMessage()) { + SMessageNode* m = static_cast(n); + if (m->isInlined()) sawInlined = true; + } + }); + REQUIRE(sawInlined); +} diff --git a/runtime/cpp/Compiler/tests/ParserTest.cpp b/runtime/cpp/Compiler/tests/ParserTest.cpp new file mode 100644 index 00000000..6a349d5f --- /dev/null +++ b/runtime/cpp/Compiler/tests/ParserTest.cpp @@ -0,0 +1,565 @@ +/* + Copyright (c) 2025-2026, Javier Pimás. + See (MIT) license in root directory. + */ + +#include "catch2/catch.hpp" +#include "../SCompiler.h" +#include "../SSmalltalkCompiler.h" +#include "../Parser/SSmalltalkParser.h" +#include "../Parser/SSmalltalkScanner.h" +#include "../AST/SParseNode.h" + +using namespace Egg; + +class SSmalltalkParserTestFixture { +protected: + SSmalltalkCompiler compiler; + + void setUp() { + } + + void tearDown() { + } + + SMethodNode* parse(const std::string& source) { + compiler.scanner()->on_(source); + return compiler.parser()->parseMethod(); + } + + SMethodNode* parseExpression(const std::string& source) { + compiler.scanner()->on_(source); + return compiler.parser()->parseExpression(); + } +}; + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Unary method signature", "[parser]") { + setUp(); + + SMethodNode* method = parse("unary ^true"); + + REQUIRE(method != nullptr); + REQUIRE(method->isMethod()); + REQUIRE(method->selector() != nullptr); + // Selector should be 'unary' + REQUIRE(method->arguments().empty()); + REQUIRE(method->statements().size() == 1); + REQUIRE(method->statements()[0]->isReturn()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Binary method signature", "[parser]") { + setUp(); + + SMethodNode* method = parse("+ arg ^self basicAdd: arg"); + + REQUIRE(method != nullptr); + REQUIRE(method->isMethod()); + REQUIRE(method->selector() != nullptr); + // Selector should be '+' + REQUIRE(method->arguments().size() == 1); + REQUIRE(method->arguments()[0]->name() == "arg"); + REQUIRE(method->statements().size() == 1); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Keyword method signature", "[parser]") { + setUp(); + + SMethodNode* method = parse("at: index put: value ^self basicAt: index put: value"); + + REQUIRE(method != nullptr); + REQUIRE(method->isMethod()); + REQUIRE(method->selector() != nullptr); + // Selector should be 'at:put:' + REQUIRE(method->arguments().size() == 2); + REQUIRE(method->arguments()[0]->name() == "index"); + REQUIRE(method->arguments()[1]->name() == "value"); + REQUIRE(method->statements().size() == 1); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Simple assignment", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("a := 3"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isAssignment()); + + SAssignmentNode* assignment = static_cast(stmt); + REQUIRE(assignment->assignees().size() == 1); + REQUIRE(assignment->assignees()[0]->name() == "a"); + REQUIRE(assignment->expression() != nullptr); + REQUIRE(assignment->expression()->isLiteral()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Unary message", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("obj message"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->receiver() != nullptr); + REQUIRE(msg->receiver()->isIdentifier()); + REQUIRE(msg->selector() != nullptr); + REQUIRE(msg->arguments().empty()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Binary message", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("3 + 4"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->receiver() != nullptr); + REQUIRE(msg->receiver()->isLiteral()); + REQUIRE(msg->selector() != nullptr); + REQUIRE(msg->arguments().size() == 1); + REQUIRE(msg->arguments()[0]->isLiteral()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Keyword message", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("dict at: key put: value"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->receiver() != nullptr); + REQUIRE(msg->receiver()->isIdentifier()); + REQUIRE(msg->selector() != nullptr); + REQUIRE(msg->arguments().size() == 2); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Message precedence", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("a unary + b keyword: c"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + // Top level should be keyword message + SMessageNode* keyword = static_cast(stmt); + REQUIRE(keyword->arguments().size() == 1); + + // Receiver should be binary message + REQUIRE(keyword->receiver()->isMessage()); + SMessageNode* binary = static_cast(keyword->receiver()); + + // Binary receiver should be unary message + REQUIRE(binary->receiver()->isMessage()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Simple block", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("[123]"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isBlock()); + + SBlockNode* block = static_cast(stmt); + REQUIRE(block->arguments().empty()); + REQUIRE(block->temporaries().empty()); + REQUIRE(block->statements().size() == 1); + REQUIRE(block->statements()[0]->isLiteral()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Block with arguments", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("[:a :b | a + b]"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isBlock()); + + SBlockNode* block = static_cast(stmt); + REQUIRE(block->arguments().size() == 2); + REQUIRE(block->arguments()[0]->name() == "a"); + REQUIRE(block->arguments()[1]->name() == "b"); + REQUIRE(block->statements().size() == 1); + REQUIRE(block->statements()[0]->isMessage()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Block with temporaries", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("[:i | | a b | a := i. b := i. a + b]"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isBlock()); + + SBlockNode* block = static_cast(stmt); + REQUIRE(block->arguments().size() == 1); + REQUIRE(block->temporaries().size() == 2); + REQUIRE(block->temporaries()[0]->name() == "a"); + REQUIRE(block->temporaries()[1]->name() == "b"); + REQUIRE(block->statements().size() == 3); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Return statement", "[parser]") { + setUp(); + + SMethodNode* method = parse("m ^42"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isReturn()); + + SReturnNode* ret = static_cast(stmt); + REQUIRE(ret->expression() != nullptr); + REQUIRE(ret->expression()->isLiteral()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Cascade", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("obj msg1; msg2"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isCascade()); + + SCascadeNode* cascade = static_cast(stmt); + REQUIRE(cascade->receiver() != nullptr); + REQUIRE(cascade->receiver()->isIdentifier()); + REQUIRE(cascade->messages().size() == 2); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Cascade with different message types", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("obj unary; + 2; keyword: 3"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isCascade()); + + SCascadeNode* cascade = static_cast(stmt); + REQUIRE(cascade->messages().size() == 3); + + // First message is unary + REQUIRE(cascade->messages()[0]->arguments().empty()); + + // Second message is binary + REQUIRE(cascade->messages()[1]->arguments().size() == 1); + + // Third message is keyword + REQUIRE(cascade->messages()[2]->arguments().size() == 1); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Method with temporaries", "[parser]") { + setUp(); + + SMethodNode* method = parse("m | a b c | a := 1. b := 2. c := 3. ^a + b + c"); + + REQUIRE(method != nullptr); + REQUIRE(method->temporaries().size() == 3); + REQUIRE(method->temporaries()[0]->name() == "a"); + REQUIRE(method->temporaries()[1]->name() == "b"); + REQUIRE(method->temporaries()[2]->name() == "c"); + REQUIRE(method->statements().size() == 4); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Empty temporaries", "[parser]") { + setUp(); + + SMethodNode* method = parse("m || ^42"); + + REQUIRE(method != nullptr); + REQUIRE(method->temporaries().empty()); + REQUIRE(method->statements().size() == 1); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Parenthesized expression", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("(3 + 4) * 5"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* mult = static_cast(stmt); + // Receiver should be the addition (3 + 4) + REQUIRE(mult->receiver()->isMessage()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Multiple statements", "[parser]") { + setUp(); + + SMethodNode* method = parse("m a := 1. b := 2. c := 3"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 3); + REQUIRE(method->statements()[0]->isAssignment()); + REQUIRE(method->statements()[1]->isAssignment()); + REQUIRE(method->statements()[2]->isAssignment()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Identifier", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("variable"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isIdentifier()); + + SIdentifierNode* id = static_cast(stmt); + REQUIRE(id->name() == "variable"); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Literal number", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("42"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isLiteral()); + + SLiteralNode* lit = static_cast(stmt); + REQUIRE(lit->value() == "42"); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Literal string", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("'hello world'"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isLiteral()); + + SLiteralNode* lit = static_cast(stmt); + REQUIRE(lit->value() == "hello world"); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Complex expression", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("dict at: (index + 1) put: (value * 2)"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->arguments().size() == 2); + + // Both arguments should be binary messages (from parentheses) + REQUIRE(msg->arguments()[0]->isMessage()); + REQUIRE(msg->arguments()[1]->isMessage()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Binary power operator", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("2 ^ 3"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->receiver()->isLiteral()); + REQUIRE(msg->arguments().size() == 1); + REQUIRE(msg->arguments()[0]->isLiteral()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Binary colon operator", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("3 : 4"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + SMessageNode* msg = static_cast(stmt); + REQUIRE(msg->receiver()->isLiteral()); + REQUIRE(msg->arguments().size() == 1); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Chained unary messages", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("obj first second third"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + // Should be nested messages: ((obj first) second) third + SMessageNode* third = static_cast(stmt); + REQUIRE(third->receiver()->isMessage()); + + SMessageNode* second = static_cast(third->receiver()); + REQUIRE(second->receiver()->isMessage()); + + SMessageNode* first = static_cast(second->receiver()); + REQUIRE(first->receiver()->isIdentifier()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Chained binary messages", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("1 + 2 + 3 + 4"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isMessage()); + + // Should be nested: ((1 + 2) + 3) + 4 + SMessageNode* plus4 = static_cast(stmt); + REQUIRE(plus4->receiver()->isMessage()); + + SMessageNode* plus3 = static_cast(plus4->receiver()); + REQUIRE(plus3->receiver()->isMessage()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Empty block", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("[]"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isBlock()); + + SBlockNode* block = static_cast(stmt); + REQUIRE(block->arguments().empty()); + REQUIRE(block->temporaries().empty()); + REQUIRE(block->statements().empty()); + + tearDown(); +} + +TEST_CASE_METHOD(SSmalltalkParserTestFixture, "Parser: Empty block temporaries", "[parser]") { + setUp(); + + SMethodNode* method = parseExpression("[:i || i + 1]"); + + REQUIRE(method != nullptr); + REQUIRE(method->statements().size() == 1); + + SParseNode* stmt = method->statements()[0]; + REQUIRE(stmt->isBlock()); + + SBlockNode* block = static_cast(stmt); + REQUIRE(block->arguments().size() == 1); + REQUIRE(block->temporaries().empty()); + REQUIRE(block->statements().size() == 1); + + tearDown(); +} diff --git a/runtime/cpp/Compiler/tests/README.md b/runtime/cpp/Compiler/tests/README.md new file mode 100644 index 00000000..e48a3da4 --- /dev/null +++ b/runtime/cpp/Compiler/tests/README.md @@ -0,0 +1,26 @@ +# Compiler Tests + +Catch2-based unit tests for the Egg Smalltalk compiler. + +See [`runtime/cpp/README.md`](../../README.md) for build & test instructions. +This suite is registered with CTest as `compiler_tests` (with `[scanner]` +and `[parser]` Catch2 tags also exposed as `ScannerTests` and `ParserTests`). + +## Test files + +- `ScannerTest.cpp` — lexical analyzer tests (ported from + `runtime/pharo/Powerlang-SCompiler-Tests/SmalltalkScannerTest.class.st`) +- `ParserTest.cpp` — parser tests (ported from + `runtime/pharo/Powerlang-SCompiler-Tests/SmalltalkParserTest.class.st`) +- `test_main.cpp` — Catch2 entry point + +## Adding a test + +```cpp +TEST_CASE_METHOD(ScannerTestFixture, "Description", "[scanner]") { + setUp(); + scan("source code"); + auto token = next(); + REQUIRE(token->isName()); +} +``` diff --git a/runtime/cpp/Compiler/tests/ScannerTest.cpp b/runtime/cpp/Compiler/tests/ScannerTest.cpp new file mode 100644 index 00000000..517dae6f --- /dev/null +++ b/runtime/cpp/Compiler/tests/ScannerTest.cpp @@ -0,0 +1,429 @@ +/* + Copyright (c) 2025-2026, Javier Pimás. + See (MIT) license in root directory. + */ + +#include "catch2/catch.hpp" +#include "../Parser/SSmalltalkScanner.h" +#include "../Parser/SToken.h" +#include "../SCompiler.h" +#include "../SSmalltalkCompiler.h" +#include + +using namespace Egg; + +// Helper class for scanner tests +class SSmalltalkScannerTestFixture { +protected: + std::unique_ptr compiler; + std::unique_ptr scanner; + + void setUp() { + compiler = std::make_unique(); + scanner = std::make_unique(compiler.get()); + } + + void scan(const std::string& source) { + scanner->sourceCode_(source); + } + + std::unique_ptr next() { + return scanner->nextToken(); + } +}; + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Empty string", "[scanner]") { + setUp(); + scan(""); + auto token = next(); + REQUIRE(token->isEnd()); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Simple identifier", "[scanner]") { + setUp(); + scan("a"); + auto token = next(); + REQUIRE(token->isName()); + REQUIRE(token->value() == "a"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Multiple identifiers", "[scanner]") { + setUp(); + scan(" a1"); + auto token = next(); + REQUIRE(token->isName()); + REQUIRE(token->value() == "a1"); + + setUp(); + scan("_a"); + token = next(); + REQUIRE(token->isName()); + REQUIRE(token->value() == "_a"); + + setUp(); + scan("a_1b"); + token = next(); + REQUIRE(token->isName()); + REQUIRE(token->value() == "a_1b"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Keywords", "[scanner]") { + setUp(); + scan("a:"); + auto token = next(); + REQUIRE(token->isKeyword()); + REQUIRE(token->value() == "a:"); + + setUp(); + scan("ab:cd:"); + auto token1 = next(); + auto token2 = next(); + REQUIRE(token1->isKeyword()); + REQUIRE(token1->value() == "ab:"); + REQUIRE(token2->isKeyword()); + REQUIRE(token2->value() == "cd:"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Integer numbers", "[scanner]") { + setUp(); + scan("0 12"); + auto token = next(); + REQUIRE(token->value() == "0"); + + token = next(); + REQUIRE(token->value() == "12"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Negative numbers with binary", "[scanner]") { + setUp(); + scan("-35"); + auto token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "-"); + + token = next(); + REQUIRE(token->value() == "35"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: String literals", "[scanner]") { + setUp(); + scan("''"); + auto token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->isString()); + REQUIRE(token->value() == ""); + + setUp(); + scan("'Hello World!'"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "Hello World!"); + + setUp(); + scan("''''"); // Single quote escaped + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "'"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Character literals", "[scanner]") { + setUp(); + scan("$a$b"); + auto token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "a"); + + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "b"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Binary selectors", "[scanner]") { + setUp(); + scan("-\n--\n---\n==>"); + + auto token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "-"); + + token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "--"); + + token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "---"); + + token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "==>"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Complex binary selector", "[scanner]") { + setUp(); + scan("~!|\\/%&*+=><"); + auto token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "~!|\\/%&*+=><"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Symbols", "[scanner]") { + setUp(); + scan("#-"); + auto token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "-"); + + setUp(); + scan("#a:"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "a:"); + + setUp(); + scan("#-!"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "-!"); + + setUp(); + scan("#a:b:"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "a:b:"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Symbol followed by identifier", "[scanner]") { + setUp(); + scan("#a:b"); + auto token1 = next(); + REQUIRE(token1->isLiteral()); + REQUIRE(token1->value() == "a:"); + + auto token2 = next(); + REQUIRE(token2->value() == "b"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Binary symbols", "[scanner]") { + setUp(); + scan("#="); + auto token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "="); + + setUp(); + scan("#++"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "++"); + + setUp(); + scan("#//"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "//"); + + setUp(); + scan("#--"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "--"); + + setUp(); + scan("#+-"); + token = next(); + REQUIRE(token->isLiteral()); + REQUIRE(token->value() == "+-"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Array prefixes", "[scanner]") { + setUp(); + scan("#()"); + auto token = next(); + REQUIRE(token->is_("#(")); + REQUIRE(token->isDelimiter()); + + setUp(); + scan("#["); + token = next(); + REQUIRE(token->is_("#[")); + REQUIRE(token->isDelimiter()); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Quoted symbol", "[scanner]") { + setUp(); + scan("#'hello'"); + auto token = next(); + REQUIRE(token->value() == "hello"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Colon", "[scanner]") { + setUp(); + scan(":a"); + auto token = next(); + REQUIRE(token->is_(':')); + REQUIRE(token->isDelimiter()); + + token = next(); + REQUIRE(token->value() == "a"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Assignment operator", "[scanner]") { + setUp(); + scan(":="); + auto token = next(); + REQUIRE(token->isDelimiter()); + REQUIRE(token->value() == ":="); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Double colon", "[scanner]") { + setUp(); + scan("::"); + auto token = next(); + REQUIRE(token->isSymbolic()); + REQUIRE(token->value() == "::"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Comments are consumed", "[scanner]") { + setUp(); + scan("\"comment\"a"); + auto token = next(); + // Comment should be skipped, next token should be 'a' + REQUIRE(token->value() == "a"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Binary colon for message", "[scanner]") { + setUp(); + scan("3:4"); + auto token1 = next(); + REQUIRE(token1->value() == "3"); + + auto token2 = next(); + REQUIRE(token2->value() == ":"); + + auto token3 = next(); + REQUIRE(token3->value() == "4"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Return operator", "[scanner]") { + setUp(); + scan("^"); + auto token = next(); + REQUIRE(token->is_('^')); + REQUIRE(token->isDelimiter()); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Binary power operator", "[scanner]") { + setUp(); + scan("2^3"); + auto token1 = next(); + REQUIRE(token1->value() == "2"); + + auto token2 = next(); + REQUIRE(token2->value() == "^"); + + auto token3 = next(); + REQUIRE(token3->value() == "3"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Parentheses and brackets", "[scanner]") { + setUp(); + scan("()[]{}"); + + auto token = next(); + REQUIRE(token->is_('(')); + REQUIRE(token->isDelimiter()); + + token = next(); + REQUIRE(token->is_(')')); + + token = next(); + REQUIRE(token->is_('[')); + + token = next(); + REQUIRE(token->is_(']')); + + token = next(); + REQUIRE(token->is_('{')); + + token = next(); + REQUIRE(token->is_('}')); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Period separator", "[scanner]") { + setUp(); + scan("a.b"); + + auto token = next(); + REQUIRE(token->value() == "a"); + + token = next(); + REQUIRE(token->is_('.')); + + token = next(); + REQUIRE(token->value() == "b"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Semicolon cascade", "[scanner]") { + setUp(); + scan("a;b"); + + auto token = next(); + REQUIRE(token->value() == "a"); + + token = next(); + REQUIRE(token->is_(';')); + + token = next(); + REQUIRE(token->value() == "b"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Assignment variants", "[scanner]") { + setUp(); + scan("a := 1"); + + auto token = next(); + REQUIRE(token->value() == "a"); + + token = next(); + REQUIRE(token->value() == ":="); + REQUIRE(token->isDelimiter()); + + token = next(); + REQUIRE(token->value() == "1"); + + // Test underscore assignment + setUp(); + scan("a _ 1"); + + token = next(); + REQUIRE(token->value() == "a"); + + token = next(); + REQUIRE(token->value() == ":="); + + token = next(); + REQUIRE(token->value() == "1"); +} + +TEST_CASE_METHOD(SSmalltalkScannerTestFixture, "Scanner: Complex expression", "[scanner]") { + setUp(); + scan("self factorial: n - 1"); + + auto token = next(); + REQUIRE(token->value() == "self"); + + token = next(); + REQUIRE(token->value() == "factorial:"); + REQUIRE(token->isKeyword()); + + token = next(); + REQUIRE(token->value() == "n"); + + token = next(); + REQUIRE(token->value() == "-"); + REQUIRE(token->isSymbolic()); + + token = next(); + REQUIRE(token->value() == "1"); +} diff --git a/runtime/cpp/Compiler/tests/test_main.cpp b/runtime/cpp/Compiler/tests/test_main.cpp new file mode 100644 index 00000000..39d5553a --- /dev/null +++ b/runtime/cpp/Compiler/tests/test_main.cpp @@ -0,0 +1,10 @@ +/* + Copyright (c) 2025, Javier Pimás. + See (MIT) license in root directory. + */ + +#define CATCH_CONFIG_MAIN +#include "catch2/catch.hpp" + +// This file contains the main() function for Catch2 tests +// Individual test files will include catch2/catch.hpp without this define diff --git a/runtime/cpp/Evaluator/Evaluator.cpp b/runtime/cpp/Evaluator/Evaluator.cpp index b7016f46..dbee7d13 100644 --- a/runtime/cpp/Evaluator/Evaluator.cpp +++ b/runtime/cpp/Evaluator/Evaluator.cpp @@ -26,9 +26,13 @@ #include #include #include +#include #include +#include #include +#include "Compat.h" + using namespace Egg; // as libffi cannot directly call C++ lambdas, here is a plain C wrapper @@ -182,6 +186,12 @@ void Evaluator::initializePrimitives() this->addPrimitive("HostCurrentMilliseconds", &Evaluator::primitiveHostCurrentMilliseconds); this->addPrimitive("HostLog", &Evaluator::primitiveHostLog); this->addPrimitive("HostReadFile", &Evaluator::primitiveHostReadFile); + this->addPrimitive("HostWriteFile", &Evaluator::primitiveHostWriteFile); + this->addPrimitive("HostCreateDirectory", &Evaluator::primitiveHostCreateDirectory); + this->addPrimitive("HostPathExists", &Evaluator::primitiveHostPathExists); + this->addPrimitive("HostCurrentDirectory", &Evaluator::primitiveHostCurrentDirectory); + this->addPrimitive("HostGetEnv", &Evaluator::primitiveHostGetEnv); + this->addPrimitive("HostLoadModuleFromPath", &Evaluator::primitiveHostLoadModuleFromPath); //this->addPrimitive("PrepareForExecution", &Evaluator::primitivePrepareForExecution); @@ -810,11 +820,12 @@ Object* Evaluator::primitiveFloatSqrt() { Object* Evaluator::primitiveFloatTimesTwoPower() { auto arg = this->_context->firstArgument(); - if (_runtime->speciesOf_(arg) != _runtime->_floatClass) + if (!arg->isSmallInteger()) return this->failPrimitive(); auto self = this->_context->self(); - return this->boolObject(*(double*)self == *(double*)arg); + int exp = (int)arg->asSmallInteger()->asNative(); + return this->newDoubleObject(std::ldexp(*(double*)self, exp)); } Object* Evaluator::primitiveFloatTruncated() { @@ -906,6 +917,58 @@ Object* Evaluator::primitiveHostLoadModule() { return module; } +Object* Evaluator::primitiveHostWriteFile() { + auto filename = this->_context->firstArgument()->asHeapObject()->asLocalString(); + auto contents = this->_context->secondArgument()->asHeapObject()->asLocalString(); + std::ofstream file(filename, std::ios::binary); + if (!file) + return this->failPrimitive(); + file.write(contents.data(), contents.size()); + return (Object*)this->_context->self(); +} + +Object* Evaluator::primitiveHostCreateDirectory() { + namespace fs = std::filesystem; + auto path = this->_context->firstArgument()->asHeapObject()->asLocalString(); + std::error_code ec; + fs::create_directories(path, ec); + return (Object*)this->_runtime->booleanFor_(!ec); +} + +Object* Evaluator::primitiveHostPathExists() { + namespace fs = std::filesystem; + auto path = this->_context->firstArgument()->asHeapObject()->asLocalString(); + std::error_code ec; + bool exists = fs::exists(path, ec); + return (Object*)this->_runtime->booleanFor_(exists && !ec); +} + +Object* Evaluator::primitiveHostCurrentDirectory() { + namespace fs = std::filesystem; + std::error_code ec; + auto cwd = fs::current_path(ec); + if (ec) + return this->failPrimitive(); + return (Object*)this->_runtime->newString_(cwd.string()); +} + +Object* Evaluator::primitiveHostGetEnv() { + auto name = this->_context->firstArgument()->asHeapObject()->asLocalString(); + const char *value = std::getenv(name.c_str()); + if (value == nullptr) + return (Object*)this->_runtime->_nilObj; + return (Object*)this->_runtime->newString_(std::string(value)); +} + +Object* Evaluator::primitiveHostLoadModuleFromPath() { + auto guard = this->_runtime->_heap->atGCUnsafepoint(); + auto path = this->_context->firstArgument()->asHeapObject()->asLocalString(); + std::cout << "loading from " << path << "..." << std::endl; + auto module = (Object*)this->_runtime->loadModuleFromPath_(path); + std::cout << " done loading " << path << std::endl; + return module; +} + Object* Evaluator::primitiveNew() { auto guard = this->_runtime->_heap->atGCSafepoint(); return (Object*)this->_runtime->newSlotsOf_(this->_context->self()->asHeapObject()); @@ -963,11 +1026,15 @@ Object* Evaluator::primitivePrimeFor_(auto anInteger) { } Object* Evaluator::primitiveSMIBitAnd() { - return newIntObject((this->_context->self()->asSmallInteger()->asNative() & this->_context->firstArgument()->asSmallInteger()->asNative())); + auto arg = this->_context->firstArgument(); + if (!arg->isSmallInteger()) return failPrimitive(); + return newIntObject((this->_context->self()->asSmallInteger()->asNative() & arg->asSmallInteger()->asNative())); } Object* Evaluator::primitiveSMIBitOr() { - return newIntObject((this->_context->self()->asSmallInteger()->asNative() | this->_context->firstArgument()->asSmallInteger()->asNative())); + auto arg = this->_context->firstArgument(); + if (!arg->isSmallInteger()) return failPrimitive(); + return newIntObject((this->_context->self()->asSmallInteger()->asNative() | arg->asSmallInteger()->asNative())); } Object* Evaluator::primitiveSMIBitShift() { @@ -978,11 +1045,15 @@ Object* Evaluator::primitiveSMIBitShift() { return failPrimitive(); return newIntObject(self << firstArg); } - return newIntObject(self >> -firstArg); + auto rshift = -firstArg; + if (rshift >= 63) return newIntObject(self < 0 ? -1 : 0); + return newIntObject(self >> rshift); } Object* Evaluator::primitiveSMIBitXor() { - return newIntObject(this->_context->self()->asSmallInteger()->asNative() ^ (this->_context->firstArgument()->asSmallInteger()->asNative())); + auto arg = this->_context->firstArgument(); + if (!arg->isSmallInteger()) return failPrimitive(); + return newIntObject(this->_context->self()->asSmallInteger()->asNative() ^ arg->asSmallInteger()->asNative()); } Object* Evaluator::primitiveSMIEqual() { @@ -1036,7 +1107,11 @@ Object* Evaluator::primitiveSMIIntQuot() { } Object* Evaluator::primitiveSMIMinus() { - return newIntObject((this->_context->self()->asSmallInteger()->asNative() - this->_context->firstArgument()->asSmallInteger()->asNative())); + auto arg = this->_context->firstArgument(); + if (!arg->isSmallInteger()) return this->failPrimitive(); + auto result = this->_context->self()->asSmallInteger()->asNative() - arg->asSmallInteger()->asNative(); + if (result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) return this->failPrimitive(); + return newIntObject(result); } Object* Evaluator::primitiveSMINotEqual() { @@ -1048,9 +1123,10 @@ Object* Evaluator::primitiveSMINotEqual() { Object* Evaluator::primitiveSMIPlus() { auto arg = this->_context->firstArgument(); - return arg->isSmallInteger() ? - newIntObject((this->_context->self()->asSmallInteger()->asNative() + this->_context->firstArgument()->asSmallInteger()->asNative())) : - this->failPrimitive(); + if (!arg->isSmallInteger()) return this->failPrimitive(); + auto result = this->_context->self()->asSmallInteger()->asNative() + arg->asSmallInteger()->asNative(); + if (result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) return this->failPrimitive(); + return newIntObject(result); } Object* Evaluator::primitiveSMISize() { @@ -1063,9 +1139,13 @@ Object* Evaluator::primitiveSMISize() { Object* Evaluator::primitiveSMITimes() { auto arg = this->_context->firstArgument(); - return arg->isSmallInteger() ? - newIntObject((this->_context->self()->asSmallInteger()->asNative() * arg->asSmallInteger()->asNative())) : - this->failPrimitive(); + if (!arg->isSmallInteger()) return this->failPrimitive(); + intptr_t a = this->_context->self()->asSmallInteger()->asNative(); + intptr_t b = arg->asSmallInteger()->asNative(); + intptr_t result; + if (mul_overflow_iptr(a, b, &result) || result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) + return this->failPrimitive(); + return newIntObject(result); } Object* Evaluator::primitiveSetBehavior() { @@ -1100,13 +1180,17 @@ Object* Evaluator::primitiveStringReplaceFromToWithStartingAt() { auto toint = to->asSmallInteger()->asNative(); auto startingint = starting->asSmallInteger()->asNative(); - if (toint > receiver->size()) + // Empty replacement (from > to) is a no-op. + if (fromint > toint) + return (Object*)receiver; + + if (fromint < 1 || toint > (intptr_t)receiver->size()) return this->failPrimitive(); - auto len = to - from + 1; + auto len = toint - fromint + 1; auto last = startingint + len - 1; auto hsource = source->asHeapObject(); - if (last > hsource->size()) + if (startingint < 1 || last > (intptr_t)hsource->size()) return this->failPrimitive(); receiver->replaceBytesFrom_to_with_startingAt_( @@ -1393,7 +1477,10 @@ Object* Evaluator::underprimitiveSMIBitShiftLeft(Object *receiver, std::vector &args) { - return newIntObject((receiver->asSmallInteger()->asNative() >> args[0]->asSmallInteger()->asNative())); + auto value = receiver->asSmallInteger()->asNative(); + auto shift = args[0]->asSmallInteger()->asNative(); + if (shift >= 63) return newIntObject(value < 0 ? -1 : 0); + return newIntObject(value >> shift); } Object* Evaluator::underprimitiveSMIEquals(Object *receiver, std::vector &args) { @@ -1417,11 +1504,17 @@ Object* Evaluator::underprimitiveSMILowerThan(Object *receiver, std::vector &args) { - return newIntObject((receiver->asSmallInteger()->asNative() - args[0]->asSmallInteger()->asNative())); + auto result = receiver->asSmallInteger()->asNative() - args[0]->asSmallInteger()->asNative(); + if (result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) + return (Object*)_nilObj; + return newIntObject(result); } Object* Evaluator::underprimitiveSMIPlus(Object *receiver, std::vector &args) { - return newIntObject((receiver->asSmallInteger()->asNative() + args[0]->asSmallInteger()->asNative())); + auto result = receiver->asSmallInteger()->asNative() + args[0]->asSmallInteger()->asNative(); + if (result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) + return (Object*)_nilObj; + return newIntObject(result); } Object* Evaluator::underprimitiveSMIQuotientTowardZero(Object *receiver, std::vector &args) { @@ -1433,7 +1526,12 @@ Object* Evaluator::underprimitiveSMIRemainderTowardZero(Object *receiver, std::v } Object* Evaluator::underprimitiveSMITimes(Object *receiver, std::vector &args) { - return newIntObject((receiver->asSmallInteger()->asNative() * args[0]->asSmallInteger()->asNative())); + intptr_t a = receiver->asSmallInteger()->asNative(); + intptr_t b = args[0]->asSmallInteger()->asNative(); + intptr_t result; + if (mul_overflow_iptr(a, b, &result) || result < SmallInteger::SMALLINT_MIN || result > SmallInteger::SMALLINT_MAX) + return (Object*)_nilObj; + return newIntObject(result); } Object* Evaluator::underprimitiveSmallIntegerByteAt(Object *receiver, std::vector &args) { diff --git a/runtime/cpp/Evaluator/Evaluator.h b/runtime/cpp/Evaluator/Evaluator.h index bf28b90d..6b483236 100644 --- a/runtime/cpp/Evaluator/Evaluator.h +++ b/runtime/cpp/Evaluator/Evaluator.h @@ -221,10 +221,16 @@ class Evaluator : public SExpressionVisitor { Object* primitiveHostCurrentMilliseconds(); Object* primitiveHostFixOverrides(); Object* primitiveHostInitializeFFI(); + Object* primitiveHostCreateDirectory(); + Object* primitiveHostCurrentDirectory(); + Object* primitiveHostGetEnv(); Object* primitiveHostLoadModule(); + Object* primitiveHostLoadModuleFromPath(); Object* primitiveHostLog(); + Object* primitiveHostPathExists(); Object* primitiveHostPlatformName(); Object* primitiveHostReadFile(); + Object* primitiveHostWriteFile(); Object* primitiveNew(); Object* primitiveNewBytes(); Object* primitiveNewObjectHeap(); diff --git a/runtime/cpp/Evaluator/Runtime.cpp b/runtime/cpp/Evaluator/Runtime.cpp index a4be2095..634909a4 100644 --- a/runtime/cpp/Evaluator/Runtime.cpp +++ b/runtime/cpp/Evaluator/Runtime.cpp @@ -22,6 +22,7 @@ Runtime::Runtime(Loader* loader, ImageSegment* kernel, SymbolProvider* symbolPro _symbolProvider(symbolProvider), _lastHash(0) { + debugRuntime = this; this->initializeKernelObjects(); KnownObjects::initializeFrom(this); _heap = new GCHeap(this); @@ -177,10 +178,19 @@ HeapObject *Runtime::loadModule_(HeapObject *name) { return _loader->loadModule_(name->asLocalString()); } +HeapObject *Runtime::loadModuleFromPath_(const std::string &path) { + return _loader->loadModuleFromPath_(path); +} + void Runtime::addSegmentSpace_(ImageSegment* segment) { GCSpace *space = GCSpace::allocatedAt_limit_(segment->spaceStart(), segment->spaceEnd(), false); - space->_name = this->moduleName_(segment->header.module)->asLocalString(); + // Handle bootstrapped kernel which doesn't have a module object yet + if (segment->header.module != nullptr) { + space->_name = this->moduleName_(segment->header.module)->asLocalString(); + } else { + space->_name = "BootstrappedKernel"; + } this->_heap->addSpace_(space); } diff --git a/runtime/cpp/Evaluator/Runtime.h b/runtime/cpp/Evaluator/Runtime.h index 9869bdc6..7b67aced 100644 --- a/runtime/cpp/Evaluator/Runtime.h +++ b/runtime/cpp/Evaluator/Runtime.h @@ -105,6 +105,7 @@ class Runtime { HeapObject* addSymbol_(const std::string &str); void switchToDynamicSymbolProvider_(HeapObject* symbolTable); HeapObject* loadModule_(HeapObject *name); + HeapObject* loadModuleFromPath_(const std::string &path); void addSegmentSpace_(ImageSegment *segment); uintptr_t hashFor_(Object *anObject); @@ -509,6 +510,7 @@ class Runtime { this->_ephemeronClass = _kernel->_exports["Ephemeron"]; this->_processStackClass = _kernel->_exports["ProcessVMStack"]; this->_openHashTableClass = _kernel->_exports["OpenHashTable"]; + this->_characterClass = _kernel->_exports["Character"]; this->_smallIntegerBehavior = this->speciesInstanceBehavior_(_smallIntegerClass); } @@ -535,6 +537,7 @@ class Runtime { HeapObject *_ephemeronClass; HeapObject *_processStackClass; HeapObject *_openHashTableClass; + HeapObject *_characterClass; HeapObject *_closureReturnMethod; HeapObject *_smallIntegerBehavior; diff --git a/runtime/cpp/Evaluator/SExpressionLinearizer.cpp b/runtime/cpp/Evaluator/SExpressionLinearizer.cpp index 3c64a770..51f4e415 100644 --- a/runtime/cpp/Evaluator/SExpressionLinearizer.cpp +++ b/runtime/cpp/Evaluator/SExpressionLinearizer.cpp @@ -556,23 +556,23 @@ void SExpressionLinearizer::visitMethod(SMethod *anSMethod) { void SExpressionLinearizer::visitMethod(SMethod *anSMethod, HeapObject *method) { this->reset(); - auto primitive = anSMethod->pragma(); - if (primitive != nullptr) { + auto pragmaObj = anSMethod->pragma(); + if (pragmaObj != nullptr) { auto name = (_runtime->methodIsFFI_(method)) ? _runtime->existingSymbolFrom_("FFICall") : (Object*)anSMethod->primitive(); - PrimitivePointer primitive; auto it = this->_primitives.find(name); if (it == this->_primitives.end()) { - error_("primitive " + name->printString() + " not found"); + std::string symStr = name->asHeapObject()->printString(); + fprintf(stderr, "Warning: primitive not found: %s, falling through to Smalltalk code\n", symStr.c_str()); } else { - primitive = it->second; + PrimitivePointer prim = it->second; + this->primitive_(prim); + this->returnOp(); } - - this->primitive_(primitive); - this->returnOp(); } - this->_stackTop = _runtime->methodTempCount_(anSMethod->compiledCode()); + auto cc = anSMethod->compiledCode(); + this->_stackTop = _runtime->methodTempCount_(cc); auto statements = anSMethod->statements(); for (auto node : statements) { node->acceptVisitor_(this); diff --git a/runtime/cpp/Loader.cpp b/runtime/cpp/Loader.cpp index d12f582b..e21b9e43 100644 --- a/runtime/cpp/Loader.cpp +++ b/runtime/cpp/Loader.cpp @@ -7,6 +7,7 @@ #include "Bootstrap/Bootstrapper.h" #include "Bootstrap/SourceModuleLoader.h" #include "FileImageSegment.h" +#include #include namespace Egg { @@ -47,7 +48,7 @@ bool Loader::hasSourceDir_(const std::string& name) { namespace fs = std::filesystem; auto modulesRoot = findModulesDir_(); if (modulesRoot.empty()) return false; - auto moduleDir = fs::path(modulesRoot) / name; + auto moduleDir = fs::path(modulesRoot) / modulePath_(name); return fs::exists(moduleDir) && fs::is_directory(moduleDir); } @@ -75,8 +76,9 @@ Runtime* Loader::loadKernel() { HeapObject* Loader::loadModule_(const std::string& name) { // 1. Already loaded? auto it = _loadedModules.find(name); - if (it != _loadedModules.end()) + if (it != _loadedModules.end()) { return it->second; + } HeapObject* module = nullptr; @@ -89,9 +91,9 @@ HeapObject* Loader::loadModule_(const std::string& name) { else if (hasSourceDir_(name)) { namespace fs = std::filesystem; auto modulesRoot = findModulesDir_(); - auto modulePath = (fs::path(modulesRoot) / name).string(); + auto modPath = (fs::path(modulesRoot) / modulePath_(name)).string(); SourceModuleLoader sourceLoader(_runtime); - module = sourceLoader.loadModuleFromSource(modulePath); + module = sourceLoader.loadModuleFromSource(modPath); } else { error(("Module not found: " + name).c_str()); @@ -102,6 +104,53 @@ HeapObject* Loader::loadModule_(const std::string& name) { return module; } +std::string Loader::modulePath_(const std::string& name) { + std::string path = name; + std::replace(path.begin(), path.end(), '.', '/'); + return path; +} + +HeapObject* Loader::loadModuleFromPath_(const std::string& path) { + namespace fs = std::filesystem; + fs::path fsPath(path); + HeapObject* module = nullptr; + + // Determine the module name from the path + std::string name = fsPath.filename().string(); + // Strip .ems extension if present + if (name.size() > 4 && name.substr(name.size() - 4) == ".ems") + name = name.substr(0, name.size() - 4); + + // Already loaded? + auto it = _loadedModules.find(name); + if (it != _loadedModules.end()) + return it->second; + + // If path ends with .ems, load as image segment + if (fsPath.extension() == ".ems" && fs::exists(fsPath)) { + auto stream = std::ifstream(fsPath, std::ios::binary); + auto imageSegment = new FileImageSegment(&stream); + _segments[name] = imageSegment; + std::vector imports; + this->bindModuleImports(imageSegment, imports); + imageSegment->fixPointerSlots(imports); + this->_runtime->addSegmentSpace_(imageSegment); + module = imageSegment->_exports["__module__"]; + } + // If path is a directory, load from source + else if (fs::is_directory(fsPath)) { + SourceModuleLoader sourceLoader(_runtime); + module = sourceLoader.loadModuleFromSource(path); + } + else { + error(("Module not found at path: " + path).c_str()); + return nullptr; + } + + _loadedModules[name] = module; + return module; +} + // .ems loading support methods FileImageSegment* Loader::loadModuleFromFile(const std::string &filename) { diff --git a/runtime/cpp/Loader.h b/runtime/cpp/Loader.h index dd420111..0bff07a0 100644 --- a/runtime/cpp/Loader.h +++ b/runtime/cpp/Loader.h @@ -32,6 +32,8 @@ class Loader { Runtime* loadKernel(); HeapObject* loadModule_(const std::string& name); + HeapObject* loadModuleFromPath_(const std::string& path); + std::string modulePath_(const std::string& name); // .ems loading support FileImageSegment* loadModuleFromFile(const std::string &filename); diff --git a/runtime/cpp/Makefile b/runtime/cpp/Makefile index a2b5db86..ae291f50 100644 --- a/runtime/cpp/Makefile +++ b/runtime/cpp/Makefile @@ -12,7 +12,7 @@ BUILD_TYPE ?= Debug BUILD_DIR := build/$(PLATFORM)-$(ARCH)-$(BUILD_TYPE) all: - conan install . --output-folder=$(BUILD_DIR) --build=missing -s compiler.cppstd=20 -s build_type=$(BUILD_TYPE) + conan install . --output-folder=$(BUILD_DIR) --build=missing -s build_type=$(BUILD_TYPE) cmake -B $(BUILD_DIR) -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DCMAKE_PREFIX_PATH=$(BUILD_DIR) cmake -S . -B $(BUILD_DIR) && cmake --build $(BUILD_DIR) diff --git a/runtime/cpp/README.md b/runtime/cpp/README.md index a6a263c6..adc2d8cc 100644 --- a/runtime/cpp/README.md +++ b/runtime/cpp/README.md @@ -24,28 +24,50 @@ Hello, world! ## Building the VM -To compile the VM, be sure to install dependencies listed in next section and then just run from cli: +Dependencies (Ubuntu): `sudo apt install g++ cmake conan`. On macOS use Homebrew. +[Conan](https://conan.io) is a C++ package manager that fetches the few required +C++ dependencies (libffi, Catch2). We try to keep deps as minimal as possible. + +The simplest path is the top-level `Makefile`, which picks the right build dir +for your platform (`build/--`): + +``` +cd runtime/cpp +make # Debug build (default) +make BUILD_TYPE=Release # Release build +``` + +Or invoke conan + cmake by hand: ``` cd runtime/cpp -conan install . --output-folder=build --build=missing -s compiler.cppstd=20 -s build_type=Debug -cmake -B build -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH=build -cmake --build build +BUILD_DIR=build/$(uname -s)-$(uname -m)-Debug. # i.e. build/Darwin-arm64-Debug +conan install . --output-folder=$BUILD_DIR --build=missing -s build_type=Debug +cmake -B $BUILD_DIR -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH=$BUILD_DIR +cmake --build $BUILD_DIR -j ``` -This will build you an executable, you still need to run modules. +The resulting executable is `$BUILD_DIR/egg`. + +## Running tests + +The C++ tree carries two CTest suites: -### Dependencies +- `compiler_tests` — scanner & parser unit tests (`runtime/cpp/Compiler/tests`). +- `bootstrapper_parser_tests` — Tonel parsing and bootstrap integration + (`runtime/cpp/Bootstrap/tests`). -We aim for minimality everywhere, however you'll need some tools to be -able to compile egg. In ubuntu, this should be all: +After building, run them through CTest from the build directory: ``` -sudo apt install g++ cmake conan +cd runtime/cpp/build/$(uname -s)-$(uname -m)-Debug +ctest --output-on-failure # everything +ctest -R CompilerTests --output-on-failure # one suite +ctest -R BootstrapperParserTests ``` -That should be all for compiling the VM. Conan is a C++ package manager that -knows how to fetch the few required C++ dependencies such as libffi. +The individual binaries also accept Catch2 tag filters, e.g. +`./Compiler/tests/compiler_tests "[scanner]"`. ## Building module snapshots diff --git a/runtime/cpp/SymbolProvider.cpp b/runtime/cpp/SymbolProvider.cpp index 33a99b38..22e0443d 100644 --- a/runtime/cpp/SymbolProvider.cpp +++ b/runtime/cpp/SymbolProvider.cpp @@ -19,7 +19,7 @@ Object* BootstrapSymbolProvider::existingSymbolFor_(const Egg::string& name) { } DynamicSymbolProvider::DynamicSymbolProvider(Runtime* runtime, HeapObject* symbolTable) - : _runtime(runtime), _symbolTable(symbolTable) {} + : _runtime(runtime), _symbolTable(new GCedRef((Object*)symbolTable)) {} Object* DynamicSymbolProvider::existingSymbolFor_(const Egg::string& name) { auto it = _cache.find(name); @@ -37,7 +37,7 @@ Object* DynamicSymbolProvider::existingSymbolFor_(const Egg::string& name) { std::string bytesStr(bytes, isLatin1 ? name.size() : name.size() * 4); // Linear scan of symbol table (HashTable with 'policy' ivar at slot 1, elements from slot 2) - HeapObject* table = _symbolTable->slotAt_(2)->asHeapObject(); + HeapObject* table = _symbolTable->get()->asHeapObject()->slotAt_(2)->asHeapObject(); for (int i = 2; i <= table->size(); i++) { auto symbol = table->slotAt_(i); if (symbol != (Object*)_runtime->_nilObj) { diff --git a/runtime/cpp/SymbolProvider.h b/runtime/cpp/SymbolProvider.h index fb21e168..13f14fcb 100644 --- a/runtime/cpp/SymbolProvider.h +++ b/runtime/cpp/SymbolProvider.h @@ -34,7 +34,7 @@ class BootstrapSymbolProvider : public SymbolProvider { class DynamicSymbolProvider : public SymbolProvider { Runtime* _runtime; - HeapObject* _symbolTable; + GCedRef* _symbolTable; std::map _cache; public: DynamicSymbolProvider(Runtime* runtime, HeapObject* symbolTable); @@ -42,7 +42,7 @@ class DynamicSymbolProvider : public SymbolProvider { Object* symbolFor_(const Egg::string& name) override; Object* existingSymbolFor_(const Egg::string& name) override; - void symbolTable_(HeapObject* table) { _symbolTable = table; } + void symbolTable_(HeapObject* table) { _symbolTable->set_((Object*)table); } std::map& cache() { return _cache; } }; diff --git a/runtime/cpp/conanfile.py b/runtime/cpp/conanfile.py new file mode 100644 index 00000000..80619026 --- /dev/null +++ b/runtime/cpp/conanfile.py @@ -0,0 +1,16 @@ +from conan import ConanFile + + +class EggConan(ConanFile): + settings = "os", "compiler", "build_type", "arch" + generators = "CMakeDeps", "CMakeToolchain" + requires = ( + "libffi/3.4.6", + "cxxopts/3.2.0", + "catch2/2.13.10", + ) + + def configure(self): + # Egg requires C++20; pin cppstd so users don't have to pass + # `-s compiler.cppstd=20` on the conan install command line. + self.settings.compiler.cppstd = "20" diff --git a/runtime/cpp/conanfile.txt b/runtime/cpp/conanfile.txt deleted file mode 100644 index 46df666c..00000000 --- a/runtime/cpp/conanfile.txt +++ /dev/null @@ -1,7 +0,0 @@ -[requires] -libffi/3.4.6 -cxxopts/3.2.0 - -[generators] -CMakeDeps -CMakeToolchain diff --git a/runtime/pharo/Powerlang-Core/EggEvaluator.class.st b/runtime/pharo/Powerlang-Core/EggEvaluator.class.st index 06e98b6f..d6dd1857 100644 --- a/runtime/pharo/Powerlang-Core/EggEvaluator.class.st +++ b/runtime/pharo/Powerlang-Core/EggEvaluator.class.st @@ -203,11 +203,15 @@ EggEvaluator >> initializePrimitives [ at: #BootstrapDictAt put: self primitiveBootstrapDictAt; at: #BootstrapDictAtPut put: self primitiveBootstrapDictAtPut; at: #HostSuspendedBecause put: self primitiveHostSuspendedBecause; + at: #HostCurrentDirectory put: self primitiveHostCurrentDirectory; at: #HostCurrentMilliseconds put: self primitiveHostCurrentMilliseconds; + at: #HostFixOverrides put: self primitiveHostFixOverrides; + at: #HostGetEnv put: self primitiveHostGetEnv; at: #HostLoadModule put: self primitiveHostLoadModule; - at: #HostReadFile put: self primitiveHostReadFile; + at: #HostLoadModuleFromPath put: self primitiveHostLoadModuleFromPath; + at: #HostPathExists put: self primitiveHostPathExists; at: #HostPlatformName put: self primitiveHostPlatformName; - at: #HostFixOverrides put: self primitiveHostFixOverrides; + at: #HostReadFile put: self primitiveHostReadFile; at: #PrimeFor put: self primitivePrimeFor; at: #FlushFromCaches put: self primitiveFlushFromCaches; at: #PrepareForExecution put: self primitivePrepareForExecution; @@ -457,6 +461,11 @@ EggEvaluator >> primitiveHash [ ^ [ runtime newInteger: (runtime hashFor: context self) ] ] +{ #category : 'primitives' } +EggEvaluator >> primitiveHostCurrentDirectory [ + ^ [ runtime newString: FileLocator workingDirectory fullName ] +] + { #category : 'primitives' } EggEvaluator >> primitiveHostCurrentMilliseconds [ @@ -468,9 +477,28 @@ EggEvaluator >> primitiveHostFixOverrides [ ^ [ runtime fixOverrides ] ] +{ #category : 'primitives' } +EggEvaluator >> primitiveHostGetEnv [ + ^ [ + (OSEnvironment current at: context firstArgument asLocalString ifAbsent: nil) + ifNil: [nilObj] + ifNotNil: [:value | runtime newString: value]] +] + { #category : 'primitives' } EggEvaluator >> primitiveHostLoadModule [ - ^ [ runtime loadModule: context firstArgument] + ^ [ runtime loadModule: context firstArgument ] +] + +{ #category : 'primitives' } +EggEvaluator >> primitiveHostLoadModuleFromPath [ + ^ [ runtime loadModuleFromPath: context firstArgument ] +] + +{ #category : 'primitives' } +EggEvaluator >> primitiveHostPathExists [ + + ^ [ runtime booleanFor: context firstArgument asLocalString asFileReference exists ] ] { #category : 'primitives' } diff --git a/runtime/pharo/Powerlang-Core/EggMetacircularRuntime.class.st b/runtime/pharo/Powerlang-Core/EggMetacircularRuntime.class.st index 93e09e6a..7f643110 100644 --- a/runtime/pharo/Powerlang-Core/EggMetacircularRuntime.class.st +++ b/runtime/pharo/Powerlang-Core/EggMetacircularRuntime.class.st @@ -436,6 +436,11 @@ EggMetacircularRuntime >> loadModule: symbol [ ^ bootstrapper loadModule: symbol asLocalString ] +{ #category : 'initialization' } +EggMetacircularRuntime >> loadModuleFromPath: symbol [ + ^ bootstrapper loadModuleFromPath: symbol asLocalString asFileReference +] + { #category : 'initialization' } EggMetacircularRuntime >> localMethodTreecodes: method [ @@ -854,6 +859,9 @@ EggMetacircularRuntime >> print: anLMRObject on: aStream [ nextPutAll: '>>#'; nextPutAll: selector; nextPutAll: '>' ]. + ((self isMetaclass: species)or: [name = 'Metaclass']) + ifTrue: [ ^aStream nextPut: $<; nextPutAll: (self speciesLocalName: anLMRObject); nextPut: $> ]. + aStream nextPut: $<; nextPutAll: name withArticle; diff --git a/runtime/pharo/Powerlang-Core/Ring2MetacircularConverter.class.st b/runtime/pharo/Powerlang-Core/Ring2MetacircularConverter.class.st index 28840ca2..df9f6d98 100644 --- a/runtime/pharo/Powerlang-Core/Ring2MetacircularConverter.class.st +++ b/runtime/pharo/Powerlang-Core/Ring2MetacircularConverter.class.st @@ -548,9 +548,18 @@ Ring2MetacircularConverter >> instantiateMetaobjects [ { #category : 'initialization' } Ring2MetacircularConverter >> loadModule: symbol [ - | spec module extensions sorted classes | - spec := self readModuleSpec: symbol. - module := self createModuleClassAndInstance: symbol spec: spec. + | path | + path := self findModulePath: symbol. + ^self loadModuleFromPath: path +] + +{ #category : 'initialization' } +Ring2MetacircularConverter >> loadModuleFromPath: path [ + + | name spec module extensions sorted classes | + name := path basename. + spec := EggBootstrapModule readSpec: name at: path parent. + module := self createModuleClassAndInstance: name spec: spec. sorted := self classesSortedBySuperclass: spec classes. classes := sorted collect: [ :classSpec | self createNewClassFrom: classSpec in: module ]. @@ -647,15 +656,6 @@ Ring2MetacircularConverter >> readFile: pathname [ ^reference readStream contents ] -{ #category : 'initialization' } -Ring2MetacircularConverter >> readModuleSpec: aSymbol [ - | path | - path := self findModulePath: aSymbol. - ^EggBootstrapModule readSpec: aSymbol at: path parent - - -] - { #category : 'initialization' } Ring2MetacircularConverter >> removeBootstrapDictionaryNewOverride [ | spec species behavior selector |