From 707df58acd90f07f0653410ea3e83ab297b57e24 Mon Sep 17 00:00:00 2001 From: pseudodionysius <36469636+pseudodionysius@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:53:31 +0100 Subject: [PATCH 1/2] feat: implement nlp engine --- src/engine/nlp/index.ts | 5 + src/engine/nlp/nlpEngine.ts | 89 ++++- src/engine/nlp/nlpTypes.ts | 171 ++++++++- .../propositional/evaluationEngine.ts | 167 ++++++++- .../syntax/propositional/syntaxEngine.ts | 303 +++++++++++++++- .../propositionalTheoryBuilder.ts | 53 ++- test/engine/nlp/nlpEngine.spec.ts | 143 +++++++- .../propositional/evaluationEngine.spec.ts | 189 +++++++++- .../syntax/propositional/syntaxEngine.spec.ts | 333 +++++++++++++++++- .../propositional/propositionalTheory.spec.ts | 19 +- tsconfig.json | 1 + 11 files changed, 1405 insertions(+), 68 deletions(-) diff --git a/src/engine/nlp/index.ts b/src/engine/nlp/index.ts index 811c67e..b29349b 100644 --- a/src/engine/nlp/index.ts +++ b/src/engine/nlp/index.ts @@ -1,2 +1,7 @@ export * from './nlpTypes'; export * from './nlpEngine'; +export * from './textSegmenter'; +export * from './sentenceClassifier'; +export * from './argumentAnalyser'; +export * from './formalAnnotator'; +export * from './formalTranslator'; diff --git a/src/engine/nlp/nlpEngine.ts b/src/engine/nlp/nlpEngine.ts index 60f1153..6dda9c8 100644 --- a/src/engine/nlp/nlpEngine.ts +++ b/src/engine/nlp/nlpEngine.ts @@ -1,25 +1,88 @@ import { NLPResult } from './nlpTypes'; +import { TextSegmenter } from './textSegmenter'; +import { SentenceClassifier } from './sentenceClassifier'; +import { ArgumentAnalyser } from './argumentAnalyser'; +import { FormalAnnotator } from './formalAnnotator'; +import { FormalTranslator } from './formalTranslator'; /** - * NLPEngine parses arbitrary input strings to identify alethic assertoric - * sentence candidates that can be handed off to a formal language engine. + * NLPEngine — the top-level orchestrator for the NLP pipeline. * - * TODO: Implement sentence segmentation, mood classification, and - * confidence scoring. Consider dependency on a POS tagger or - * lightweight grammar for declarative-mood detection. + * Pipeline: + * raw text → TextSegmenter → SentenceClassifier → ArgumentAnalyser + * → FormalAnnotator → FormalTranslator + * + * All processing is rule-based and zero-dependency. */ export class NLPEngine { + private readonly _segmenter = new TextSegmenter(); + private readonly _classifier = new SentenceClassifier(); + private readonly _analyser = new ArgumentAnalyser(); + private readonly _annotator = new FormalAnnotator(); + private readonly _translator = new FormalTranslator(); + + /** + * Parse an arbitrary input string and return the full NLP pipeline result. + * + * Steps: + * 1. Segment the text into sentence-candidate strings. + * 2. Classify each candidate — discard non-assertoric sentences. + * 3. Annotate each assertoric sentence with logical features. + * 4. Analyse argument structure across the sentence set. + * 5. Translate into propositional, quantificational, and modal formula strings. + * + * @param input - The natural language string to analyse. + * @returns A fully populated `NLPResult`. + */ + parse(input: string): NLPResult { + const segments = this._segmenter.segment(input); + const candidates = this._classifier.classifyAll(segments); + const sentenceSet = { sentences: candidates }; + + const annotated = this._annotator.annotateAll(candidates); + const argument = this._analyser.analyse(candidates); + const translations = this._translator.translate(sentenceSet, annotated); + + return { + input, + candidates, + sentenceSet, + annotated, + argument, + translations, + }; + } + /** - * Parse an arbitrary input string and return zero or more alethic - * assertoric sentence candidates found within it. + * Collect all chunks from an `AsyncIterable` source and run the + * full pipeline over the concatenated text. + * + * Works with any async string source: Node.js `stream.Readable` (in text + * mode), web `ReadableStream` readers, `fs/promises` async iteration, etc. * - * @param _input - The natural language string to analyse. - * @returns An NLPResult containing the original input and any - * assertoric candidates identified. - * @throws Error Until the engine is implemented. + * @param source - An async iterable yielding string chunks. + * @returns A promise resolving to the full `NLPResult`. */ - parse(_input: string): NLPResult { - throw new Error('NLPEngine.parse is not yet implemented'); + async parseStream(source: AsyncIterable): Promise { + const sentences = await this._segmenter.segmentStream(source); + const candidates = this._classifier.classifyAll(sentences); + const sentenceSet = { sentences: candidates }; + + const annotated = this._annotator.annotateAll(candidates); + const argument = this._analyser.analyse(candidates); + const translations = this._translator.translate(sentenceSet, annotated); + + // Reconstruct input from collected sentences for the result record + const input = sentences.join(' '); + + return { + input, + candidates, + sentenceSet, + annotated, + argument, + translations, + }; } } diff --git a/src/engine/nlp/nlpTypes.ts b/src/engine/nlp/nlpTypes.ts index 2f60923..7d01014 100644 --- a/src/engine/nlp/nlpTypes.ts +++ b/src/engine/nlp/nlpTypes.ts @@ -1,19 +1,170 @@ -import { AlethicAssertoric } from '../../language/shared/types'; +import { AlethicAssertoric, SentenceSet } from '../../language/shared/types'; -/** - * Re-export AlethicAssertoric so callers can import it from the NLP module - * without needing to know where the canonical definition lives. - */ export { AlethicAssertoric }; -/** - * The result of processing an input string through NLPEngine.parse(). - * Carries the original input alongside any alethic assertoric sentence - * candidates identified within it. - */ +// --------------------------------------------------------------------------- +// Sentence mood +// --------------------------------------------------------------------------- + +export type MoodType = 'declarative' | 'interrogative' | 'imperative' | 'exclamatory'; + +// --------------------------------------------------------------------------- +// Feature annotations — individual logical features extracted from a sentence +// --------------------------------------------------------------------------- + +/** A binary connective detected in the sentence text. */ +export interface ConnectiveAnnotation { + /** The matched trigger text (e.g. "if … then", "and"). */ + text: string; + /** The corresponding formal binary operator. */ + operator: '&' | '|' | '->' | '<->'; + /** Character offsets [start, end) within the sentence's raw text. */ + span: [number, number]; +} + +/** A quantifier expression detected in the sentence text. */ +export interface QuantifierAnnotation { + /** The matched trigger text (e.g. "all", "there exists"). */ + text: string; + /** The corresponding formal quantifier (¬∃ encodes "no/none"). */ + quantifier: '∀' | '∃' | '¬∃'; + /** Character offsets [start, end) within the sentence's raw text. */ + span: [number, number]; +} + +/** A modal adverb or phrase detected in the sentence text. */ +export interface ModalAnnotation { + /** The matched trigger text (e.g. "necessarily", "it is possible that"). */ + text: string; + /** The corresponding modal operator. */ + operator: '□' | '◇'; + /** Character offsets [start, end) within the sentence's raw text. */ + span: [number, number]; +} + +/** A negation marker detected in the sentence text. */ +export interface NegationAnnotation { + /** The matched trigger text (e.g. "not", "it is not the case that"). */ + text: string; + /** Character offsets [start, end) within the sentence's raw text. */ + span: [number, number]; +} + +/** An atomic proposition candidate extracted from the sentence. */ +export interface PropositionAnnotation { + /** The text fragment this proposition represents. */ + text: string; + /** The assigned logical label (p, q, r, …). */ + label: string; + /** Character offsets [start, end) within the sentence's raw text. */ + span: [number, number]; +} + +// --------------------------------------------------------------------------- +// SentenceFeatures — all features for one sentence +// --------------------------------------------------------------------------- + +export interface SentenceFeatures { + mood: MoodType; + connectives: ConnectiveAnnotation[]; + quantifiers: QuantifierAnnotation[]; + modalAdverbs: ModalAnnotation[]; + negations: NegationAnnotation[]; + propositions: PropositionAnnotation[]; +} + +// --------------------------------------------------------------------------- +// AnnotatedSentence — an assertoric sentence plus its extracted features +// --------------------------------------------------------------------------- + +export interface AnnotatedSentence { + source: AlethicAssertoric; + features: SentenceFeatures; +} + +// --------------------------------------------------------------------------- +// ArgumentAnalyser output +// --------------------------------------------------------------------------- + +export type ArgumentRelation = 'supports' | 'opposes' | 'independent'; + +export interface SentencePair { + from: AlethicAssertoric; + to: AlethicAssertoric; + relation: ArgumentRelation; +} + +export interface AnalysedArgument { + sentences: AlethicAssertoric[]; + premises: AlethicAssertoric[]; + conclusions: AlethicAssertoric[]; + relations: SentencePair[]; +} + +// --------------------------------------------------------------------------- +// FormalTranslationSet — one translation per supported formal language +// --------------------------------------------------------------------------- + +export interface PropositionalSentenceTranslation { + source: AlethicAssertoric; + /** Maps proposition label (p, q, …) to the text fragment it represents. */ + propositionMap: Record; + /** Human-readable formula string, e.g. "p -> q". */ + formulaString: string; +} + +export interface PropositionalTranslation { + sentences: PropositionalSentenceTranslation[]; +} + +export interface QuantificationalSentenceTranslation { + source: AlethicAssertoric; + propositionMap: Record; + /** Detected quantifier prefix, e.g. "∀x" or "∃x". Null if none detected. */ + quantifierPrefix: string | null; + /** Suggested predicate name derived from the proposition text. */ + suggestedPredicate: string | null; + formulaString: string; +} + +export interface QuantificationalTranslation { + sentences: QuantificationalSentenceTranslation[]; +} + +export interface ModalSentenceTranslation { + source: AlethicAssertoric; + propositionMap: Record; + /** Detected modal operator prefix, e.g. "□" or "◇". Null if none detected. */ + modalPrefix: string | null; + formulaString: string; +} + +export interface ModalTranslation { + sentences: ModalSentenceTranslation[]; +} + +export interface FormalTranslationSet { + source: SentenceSet; + propositional: PropositionalTranslation; + quantificational: QuantificationalTranslation; + modal: ModalTranslation; +} + +// --------------------------------------------------------------------------- +// NLPResult — the full output of NLPEngine.parse() +// --------------------------------------------------------------------------- + export interface NLPResult { /** The original input string passed to NLPEngine.parse(). */ input: string; /** Zero or more assertoric sentence candidates found in the input. */ candidates: AlethicAssertoric[]; + /** The same sentences as an ordered SentenceSet. */ + sentenceSet: SentenceSet; + /** Feature-annotated version of every assertoric sentence. */ + annotated: AnnotatedSentence[]; + /** Detected argument structure across the sentence set. */ + argument: AnalysedArgument; + /** Formal language translations for each sentence. */ + translations: FormalTranslationSet; } diff --git a/src/engine/semantics/propositional/evaluationEngine.ts b/src/engine/semantics/propositional/evaluationEngine.ts index cdb668b..12fe58e 100644 --- a/src/engine/semantics/propositional/evaluationEngine.ts +++ b/src/engine/semantics/propositional/evaluationEngine.ts @@ -1,9 +1,164 @@ +import { WFF } from '../../../language/propositional/propositionalTypes'; +import { PropositionalVariable } from '../../../language/propositional/propositionalVariable'; +import { PropositionalSyntaxEngine } from '../../syntax/propositional/syntaxEngine'; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/** The semantic classification of a propositional formula. */ +export type WFFClassification = 'tautology' | 'contradiction' | 'contingency'; + +/** A single row in a truth table. */ +export interface TruthTableRow { + /** The variable assignment for this row. */ + assignment: Record; + /** The formula's truth value under this assignment. */ + value: boolean; +} + +/** A complete truth table for a propositional formula. */ +export interface TruthTable { + /** The proposition letters, in evaluation order. */ + variables: string[]; + /** All 2^n rows, ordered by ascending bitmask over the variables. */ + rows: TruthTableRow[]; +} + +/** The full result of evaluating a propositional formula. */ +export interface EvaluationResult { + /** Tautology, contradiction, or contingency. */ + classification: WFFClassification; + /** The complete truth table. */ + truthTable: TruthTable; +} + +// --------------------------------------------------------------------------- +// PropositionalEvaluationEngine +// --------------------------------------------------------------------------- + /** - * Propositional evaluation engine — not yet implemented. + * Semantic evaluation of propositional WFFs. * - * Responsible for semantic evaluation of propositional WFFs beyond the - * consistency checking already provided by PropositionalTheory. Planned - * features include truth-table generation, tautology/contradiction/contingency - * classification, and extended proof support. + * Provides truth-table generation and tautology / contradiction / contingency + * classification via exhaustive enumeration over all 2^n variable assignments. + * + * Works with WFF instances constructed by hand or via PropositionalSyntaxEngine. */ -export {}; +export class PropositionalEvaluationEngine { + + private readonly _syntax = new PropositionalSyntaxEngine(); + + /** + * Evaluate a WFF over all possible variable assignments. + * + * @param formula - The formula to evaluate. + * @param variables - The PropositionalVariables the formula depends on. + * Passed in explicitly so the engine does not need to + * traverse the formula tree to discover them. + * @returns Classification and full truth table. + */ + evaluate( + formula: WFF, + variables: Map, + ): EvaluationResult { + const varNames = Array.from(variables.keys()); + const n = varNames.length; + const total = Math.pow(2, n); + const rows: TruthTableRow[] = []; + + let allTrue = true; + let allFalse = true; + + for (let mask = 0; mask < total; mask++) { + const assignment: Record = {}; + + varNames.forEach((name, i) => { + const v = Boolean((mask >> i) & 1); + variables.get(name)!.assign(v); + assignment[name] = v; + }); + + const value = formula.value(); + rows.push({ assignment, value }); + + if (!value) allTrue = false; + if (value) allFalse = false; + } + + const classification: WFFClassification = allTrue + ? 'tautology' + : allFalse + ? 'contradiction' + : 'contingency'; + + return { classification, truthTable: { variables: varNames, rows } }; + } + + /** + * Parse a formula string and evaluate it in one step. + * + * @param formulaString - A formula string (e.g. "p -> (p | q)"). + * @returns Classification and full truth table. + * @throws SyntaxError if the formula string is malformed. + */ + evaluateString(formulaString: string): EvaluationResult { + const { formula, variables } = this._syntax.parse(formulaString); + return this.evaluate(formula, variables); + } + + /** + * Classify a formula string as tautology, contradiction, or contingency. + * + * Convenience wrapper around `evaluateString()`. + * + * @param formulaString - A formula string. + * @returns The classification. + * @throws SyntaxError if the formula string is malformed. + */ + classify(formulaString: string): WFFClassification { + return this.evaluateString(formulaString).classification; + } + + /** + * Generate a truth table for a formula string. + * + * Convenience wrapper around `evaluateString()`. + * + * @param formulaString - A formula string. + * @returns The truth table. + * @throws SyntaxError if the formula string is malformed. + */ + truthTable(formulaString: string): TruthTable { + return this.evaluateString(formulaString).truthTable; + } + + /** + * Print a formatted truth table to the console. + * + * @param formulaString - A formula string. + */ + printTruthTable(formulaString: string): void { + const { classification, truthTable } = this.evaluateString(formulaString); + const { variables, rows } = truthTable; + + console.log(`\nTRUTH TABLE — ${formulaString}`); + console.log('═'.repeat(Math.max(40, formulaString.length + 15))); + + // Header row + const header = [...variables.map(v => v.padEnd(5)), 'VALUE'].join('│ '); + console.log(header); + console.log('─'.repeat(header.length)); + + // Data rows + for (const row of rows) { + const cells = variables.map(v => (row.assignment[v] ? 'T' : 'F').padEnd(5)); + cells.push(row.value ? 'T' : 'F'); + console.log(cells.join('│ ')); + } + + console.log('─'.repeat(header.length)); + console.log(`Classification: ${classification.toUpperCase()}`); + console.log(''); + } +} diff --git a/src/engine/syntax/propositional/syntaxEngine.ts b/src/engine/syntax/propositional/syntaxEngine.ts index 022db9e..25df117 100644 --- a/src/engine/syntax/propositional/syntaxEngine.ts +++ b/src/engine/syntax/propositional/syntaxEngine.ts @@ -1,9 +1,300 @@ +import { WFF } from '../../../language/propositional/propositionalTypes'; +import { AtomImpl } from '../../../language/propositional/atom'; +import { ComplexImpl } from '../../../language/propositional/complex'; +import { PropositionalVariable } from '../../../language/propositional/propositionalVariable'; + +// --------------------------------------------------------------------------- +// Tokens +// --------------------------------------------------------------------------- + +type TT = 'ATOM' | 'NOT' | 'AND' | 'OR' | 'IMPLIES' | 'IFF' | 'LPAREN' | 'RPAREN' | 'EOF'; + +interface Token { + type: TT; + value: string; + pos: number; +} + +// --------------------------------------------------------------------------- +// Lexer +// --------------------------------------------------------------------------- + +function tokenize(input: string): Token[] { + const tokens: Token[] = []; + let i = 0; + + while (i < input.length) { + // skip whitespace + if (/\s/.test(input[i])) { i++; continue; } + + // multi-char operators first + if (input.startsWith('<->', i)) { + tokens.push({ type: 'IFF', value: '<->', pos: i }); + i += 3; continue; + } + if (input.startsWith('->', i)) { + tokens.push({ type: 'IMPLIES', value: '->', pos: i }); + i += 2; continue; + } + + const ch = input[i]; + switch (ch) { + case '~': tokens.push({ type: 'NOT', value: ch, pos: i }); i++; break; + case '&': tokens.push({ type: 'AND', value: ch, pos: i }); i++; break; + case '|': tokens.push({ type: 'OR', value: ch, pos: i }); i++; break; + case '(': tokens.push({ type: 'LPAREN', value: ch, pos: i }); i++; break; + case ')': tokens.push({ type: 'RPAREN', value: ch, pos: i }); i++; break; + default: + if (/[a-z]/.test(ch)) { + // atom: one lowercase letter optionally followed by digits + let j = i + 1; + while (j < input.length && /[0-9]/.test(input[j])) j++; + tokens.push({ type: 'ATOM', value: input.slice(i, j), pos: i }); + i = j; + } else { + throw new SyntaxError( + `Unexpected character '${ch}' at position ${i} in formula: "${input}"`, + ); + } + } + } + + tokens.push({ type: 'EOF', value: '', pos: i }); + return tokens; +} + +// --------------------------------------------------------------------------- +// Negation helper — toggles the unary operator on a freshly constructed WFF +// --------------------------------------------------------------------------- + +function applyNegation(wff: WFF): WFF { + const atom = wff as AtomImpl; + const complex = wff as ComplexImpl; + + if (atom.proposition !== undefined) { + // It's an AtomImpl + const wasNegated = atom.unaryOperator === '~'; + return new AtomImpl(wasNegated ? undefined : '~', atom.proposition); + } + + // It's a ComplexImpl + const wasNegated = complex.unaryOperator === '~'; + return new ComplexImpl( + wasNegated ? undefined : '~', + complex.left, + complex.binaryOperator, + complex.right, + ); +} + +// --------------------------------------------------------------------------- +// Recursive-descent parser +// +// Precedence (lowest → highest): +// <-> right-associative +// -> right-associative +// | left-associative +// & left-associative +// ~ prefix (right-associative) +// atom / ( formula ) +// --------------------------------------------------------------------------- + +class Parser { + private readonly tokens: Token[]; + private pos = 0; + private readonly variables: Map; + + constructor(tokens: Token[], variables: Map) { + this.tokens = tokens; + this.variables = variables; + } + + parse(): WFF { + const wff = this.parseIff(); + if (this.peek().type !== 'EOF') { + const tok = this.peek(); + throw new SyntaxError( + `Unexpected token '${tok.value}' at position ${tok.pos}. ` + + `Expected end of formula.`, + ); + } + return wff; + } + + private peek(): Token { + return this.tokens[this.pos]; + } + + private consume(): Token { + return this.tokens[this.pos++]; + } + + private expect(type: TT): Token { + const tok = this.consume(); + if (tok.type !== type) { + throw new SyntaxError( + `Expected '${type}' but found '${tok.value}' at position ${tok.pos}.`, + ); + } + return tok; + } + + private parseIff(): WFF { + const left = this.parseImplies(); + if (this.peek().type === 'IFF') { + this.consume(); + const right = this.parseIff(); // right-associative + return new ComplexImpl(undefined, left, '<->', right); + } + return left; + } + + private parseImplies(): WFF { + const left = this.parseOr(); + if (this.peek().type === 'IMPLIES') { + this.consume(); + const right = this.parseImplies(); // right-associative + return new ComplexImpl(undefined, left, '->', right); + } + return left; + } + + private parseOr(): WFF { + let left = this.parseAnd(); + while (this.peek().type === 'OR') { + this.consume(); + const right = this.parseAnd(); + left = new ComplexImpl(undefined, left, '|', right); + } + return left; + } + + private parseAnd(): WFF { + let left = this.parseNot(); + while (this.peek().type === 'AND') { + this.consume(); + const right = this.parseNot(); + left = new ComplexImpl(undefined, left, '&', right); + } + return left; + } + + private parseNot(): WFF { + if (this.peek().type === 'NOT') { + this.consume(); + const inner = this.parseNot(); + return applyNegation(inner); + } + return this.parsePrimary(); + } + + private parsePrimary(): WFF { + const tok = this.peek(); + + if (tok.type === 'LPAREN') { + this.consume(); + const inner = this.parseIff(); + this.expect('RPAREN'); + return inner; + } + + if (tok.type === 'ATOM') { + this.consume(); + const name = tok.value; + if (!this.variables.has(name)) { + this.variables.set(name, new PropositionalVariable(name)); + } + return this.variables.get(name)!.atom(); + } + + throw new SyntaxError( + `Unexpected token '${tok.value}' at position ${tok.pos}. ` + + `Expected a proposition letter or '('.`, + ); + } +} + +// --------------------------------------------------------------------------- +// Public result type +// --------------------------------------------------------------------------- + +/** The result of parsing a propositional formula string. */ +export interface PropositionalParseResult { + /** The parsed WFF, ready for evaluation. */ + formula: WFF; + /** Named variables referenced in the formula, keyed by letter. */ + variables: Map; +} + +// --------------------------------------------------------------------------- +// PropositionalSyntaxEngine +// --------------------------------------------------------------------------- + /** - * Propositional syntax engine — not yet implemented. + * Parses propositional formula strings into typed WFF instances. * - * Responsible for parsing formula strings and JSON objects into typed WFF - * instances, validating syntactic correctness against the propositional - * grammar. Once implemented, this will power - * PropositionalTheoryBuilder.fromSentenceSet(). + * Formula syntax: + * - Proposition letters: single lowercase letter optionally followed by digits + * (p, q, r, p1, q2, …) + * - Negation: ~p ~(p & q) + * - Conjunction: p & q + * - Disjunction: p | q + * - Implication: p -> q (right-associative) + * - Biconditional: p <-> q (right-associative) + * - Parentheses: (p | q) -> r + * + * Operator precedence (tightest binding first): + * ~ > & > | > -> > <-> + * + * Double negation (~~p) is reduced to p during parsing. + * + * All proposition letters in a formula share PropositionalVariable instances, + * so assigning a variable updates every atom derived from it simultaneously — + * consistent with PropositionalTheory's evaluation contract. */ -export {}; +export class PropositionalSyntaxEngine { + + /** + * Parse a formula string, creating fresh PropositionalVariables for each + * distinct proposition letter encountered. + * + * @param input - A propositional formula string (e.g. "p -> (q & ~r)"). + * @returns The parsed WFF and the variable registry. + * @throws SyntaxError on malformed input. + */ + parse(input: string): PropositionalParseResult { + const variables = new Map(); + const formula = this._doParse(input, variables); + return { formula, variables }; + } + + /** + * Parse a formula string into an existing variable registry. + * + * Variables already present in the registry are reused, so the returned WFF + * participates in the same truth-value assignment as other formulas in the + * same theory. Variables not yet in the registry are created and inserted. + * + * Use this when building a PropositionalTheory with multiple sentences that + * share proposition letters. + * + * @param input - A propositional formula string. + * @param variables - The shared variable registry to parse into. + * @returns The parsed WFF. + * @throws SyntaxError on malformed input. + */ + parseInto(input: string, variables: Map): WFF { + return this._doParse(input, variables); + } + + // ------------------------------------------------------------------------- + + private _doParse(input: string, variables: Map): WFF { + const trimmed = input.trim(); + if (trimmed.length === 0) { + throw new SyntaxError('Cannot parse an empty formula string.'); + } + const tokens = tokenize(trimmed); + return new Parser(tokens, variables).parse(); + } +} diff --git a/src/language/propositional/propositionalTheoryBuilder.ts b/src/language/propositional/propositionalTheoryBuilder.ts index a1ed408..6fcb8c0 100644 --- a/src/language/propositional/propositionalTheoryBuilder.ts +++ b/src/language/propositional/propositionalTheoryBuilder.ts @@ -2,6 +2,9 @@ import { WFF } from './propositionalTypes'; import { PropositionalVariable } from './propositionalVariable'; import { PropositionalFormalSentence, PropositionalTheory } from './propositionalTheory'; import { AlethicAssertoric, SentenceSet } from '../shared/types'; +import { FormalAnnotator } from '../../engine/nlp/formalAnnotator'; +import { FormalTranslator } from '../../engine/nlp/formalTranslator'; +import { PropositionalSyntaxEngine } from '../../engine/syntax/propositional/syntaxEngine'; /** * Fluent builder for constructing a PropositionalTheory. @@ -75,18 +78,46 @@ export class PropositionalTheoryBuilder { } /** - * Placeholder: future entry point for NLP-driven construction. - * Will accept SentenceSet output from NLPEngine and parse each - * AlethicAssertoric into a WFF using the propositional SyntaxEngine. + * Build a PropositionalTheory directly from a SentenceSet produced by + * NLPEngine or constructed manually. * - * @param _set - The sentence set to formalise. - * @throws Error until PropositionalSyntaxEngine is implemented. + * Pipeline: + * 1. FormalAnnotator extracts logical features from each sentence. + * 2. FormalTranslator produces a propositional formula string and + * proposition map for each sentence. + * 3. PropositionalSyntaxEngine parses each formula string into a WFF, + * sharing a single variable registry across all sentences so that + * the same proposition letter in different sentences refers to the + * same PropositionalVariable. + * + * Labels are assigned as φ1, φ2, … in sentence order. + * + * @param set - The sentence set to formalise. + * @returns A PropositionalTheory ready for consistency checking. */ - // eslint-disable-next-line @typescript-eslint/no-unused-vars - fromSentenceSet(_set: SentenceSet): PropositionalTheory { - throw new Error( - 'PropositionalTheoryBuilder.fromSentenceSet is not yet implemented. ' + - 'Awaiting PropositionalSyntaxEngine.', - ); + fromSentenceSet(set: SentenceSet): PropositionalTheory { + const annotator = new FormalAnnotator(); + const translator = new FormalTranslator(); + const syntax = new PropositionalSyntaxEngine(); + + const annotated = annotator.annotateAll(set.sentences); + const translations = translator.translate(set, annotated); + + // Single shared variable registry — same letter → same variable across sentences + const variables = new Map(); + let counter = 1; + + const sentences: PropositionalFormalSentence[] = translations.propositional.sentences.map(t => { + const formula = syntax.parseInto(t.formulaString, variables); + const variableNames = Object.keys(t.propositionMap); + return { + source: t.source, + formula, + variableNames, + label: `φ${counter++}`, + }; + }); + + return new PropositionalTheory(sentences, variables); } } diff --git a/test/engine/nlp/nlpEngine.spec.ts b/test/engine/nlp/nlpEngine.spec.ts index 41b9713..3ba37e1 100644 --- a/test/engine/nlp/nlpEngine.spec.ts +++ b/test/engine/nlp/nlpEngine.spec.ts @@ -1,13 +1,144 @@ +import { NLPEngine } from '../../../src/engine/nlp/nlpEngine'; + describe('NLPEngine', () => { - test.skip('TODO: should identify alethic assertoric candidates from a declarative sentence', () => { - fail(); + let engine: NLPEngine; + + beforeEach(() => { engine = new NLPEngine(); }); + + describe('parse() — basic pipeline', () => { + test('returns the original input string', () => { + const result = engine.parse('The cat is on the mat.'); + expect(result.input).toBe('The cat is on the mat.'); + }); + + test('identifies an alethic assertoric candidate from a declarative sentence', () => { + const result = engine.parse('All men are mortal.'); + expect(result.candidates).toHaveLength(1); + expect(result.candidates[0].raw).toBe('All men are mortal.'); + }); + + test('returns empty candidates for a question', () => { + const result = engine.parse('Is it raining?'); + expect(result.candidates).toHaveLength(0); + }); + + test('returns empty candidates for an imperative', () => { + const result = engine.parse('Go to the store.'); + expect(result.candidates).toHaveLength(0); + }); + + test('sentenceSet.sentences equals candidates', () => { + const result = engine.parse('All men are mortal.'); + expect(result.sentenceSet.sentences).toEqual(result.candidates); + }); + + test('annotated has one entry per candidate', () => { + const result = engine.parse('All men are mortal. Socrates is a man.'); + expect(result.annotated).toHaveLength(result.candidates.length); + }); + }); + + describe('parse() — multi-sentence input', () => { + test('segments and classifies multiple sentences', () => { + const text = 'All men are mortal. Socrates is a man. Therefore Socrates is mortal.'; + const result = engine.parse(text); + expect(result.candidates.length).toBeGreaterThanOrEqual(2); + }); + + test('drops interrogative from multi-sentence input', () => { + const text = 'All men are mortal. Is Socrates a man? Socrates is indeed a man.'; + const result = engine.parse(text); + const raws = result.candidates.map(c => c.raw); + expect(raws.some(r => r.includes('?'))).toBe(false); + }); + }); + + describe('parse() — argument structure', () => { + test('argument.sentences references the same objects as candidates', () => { + const result = engine.parse('All men are mortal. Socrates is a man. Therefore Socrates is mortal.'); + result.argument.sentences.forEach(s => { + expect(result.candidates).toContain(s); + }); + }); + + test('argument identifies at least one conclusion in syllogism', () => { + const text = 'All men are mortal. Socrates is a man. Therefore Socrates is mortal.'; + const result = engine.parse(text); + expect(result.argument.conclusions.length).toBeGreaterThanOrEqual(1); + }); + }); + + describe('parse() — translations', () => { + test('translations.source equals sentenceSet', () => { + const result = engine.parse('All men are mortal.'); + expect(result.translations.source).toBe(result.sentenceSet); + }); + + test('propositional translation has one sentence per candidate', () => { + const result = engine.parse('All men are mortal. Socrates is a man.'); + expect(result.translations.propositional.sentences).toHaveLength(result.candidates.length); + }); + + test('quantificational translation detects universal quantifier', () => { + const result = engine.parse('All men are mortal.'); + const qt = result.translations.quantificational.sentences[0]; + expect(qt.quantifierPrefix).toBe('∀x'); + }); + + test('modal translation detects necessity operator', () => { + const result = engine.parse('Necessarily all bachelors are unmarried.'); + const mt = result.translations.modal.sentences[0]; + expect(mt.modalPrefix).toBe('□'); + }); + + test('propositional formula string is non-empty', () => { + const result = engine.parse('Socrates is a man.'); + const f = result.translations.propositional.sentences[0].formulaString; + expect(f.trim().length).toBeGreaterThan(0); + }); }); - test.skip('TODO: should return empty candidates for non-declarative input (questions, commands)', () => { - fail(); + describe('parseStream()', () => { + async function* makeStream(chunks: string[]): AsyncIterable { + for (const chunk of chunks) yield chunk; + } + + test('produces same result as parse() for the same text', async () => { + const text = 'All men are mortal. Socrates is a man.'; + const parseResult = engine.parse(text); + const streamResult = await engine.parseStream(makeStream([text])); + expect(streamResult.candidates.map(c => c.raw)) + .toEqual(parseResult.candidates.map(c => c.raw)); + }); + + test('handles chunked input correctly', async () => { + const stream = makeStream(['All men are mortal.', ' Socrates is a man.']); + const result = await engine.parseStream(stream); + expect(result.candidates.length).toBeGreaterThanOrEqual(1); + }); + + test('returns empty candidates for empty stream', async () => { + const result = await engine.parseStream(makeStream([])); + expect(result.candidates).toHaveLength(0); + }); + + test('result contains translations', async () => { + const stream = makeStream(['All men are mortal.']); + const result = await engine.parseStream(stream); + expect(result.translations).toBeDefined(); + expect(result.translations.propositional.sentences).toHaveLength( + result.candidates.length, + ); + }); }); - test.skip('TODO: should assign confidence scores to candidates', () => { - fail(); + describe('parse() — confidence scoring', () => { + test('all candidates have confidence between 0 and 1', () => { + const result = engine.parse('The cat is on the mat. Socrates is a philosopher.'); + result.candidates.forEach(c => { + expect(c.confidence).toBeGreaterThanOrEqual(0); + expect(c.confidence).toBeLessThanOrEqual(1); + }); + }); }); }); diff --git a/test/engine/semantics/propositional/evaluationEngine.spec.ts b/test/engine/semantics/propositional/evaluationEngine.spec.ts index 55b4c3c..5ec5521 100644 --- a/test/engine/semantics/propositional/evaluationEngine.spec.ts +++ b/test/engine/semantics/propositional/evaluationEngine.spec.ts @@ -1,17 +1,188 @@ -describe('Propositional EvaluationEngine', () => { - test.skip('TODO: should classify a tautology', () => { - fail(); +import { PropositionalEvaluationEngine } from '../../../../src/engine/semantics/propositional/evaluationEngine'; +import { PropositionalSyntaxEngine } from '../../../../src/engine/syntax/propositional/syntaxEngine'; + +describe('PropositionalEvaluationEngine', () => { + let engine: PropositionalEvaluationEngine; + let syntax: PropositionalSyntaxEngine; + + beforeEach(() => { + engine = new PropositionalEvaluationEngine(); + syntax = new PropositionalSyntaxEngine(); }); - test.skip('TODO: should classify a contradiction', () => { - fail(); + // ── classify() ───────────────────────────────────────────────────────────── + + describe('classify() — tautologies', () => { + test('p | ~p is a tautology', () => { + expect(engine.classify('p | ~p')).toBe('tautology'); + }); + + test('p -> p is a tautology', () => { + expect(engine.classify('p -> p')).toBe('tautology'); + }); + + test('~(p & ~p) is a tautology', () => { + expect(engine.classify('~(p & ~p)')).toBe('tautology'); + }); + + test('(p -> q) -> (~q -> ~p) is a tautology (contraposition)', () => { + expect(engine.classify('(p -> q) -> (~q -> ~p)')).toBe('tautology'); + }); + + test('((p -> q) & (q -> r)) -> (p -> r) is a tautology (hypothetical syllogism)', () => { + expect(engine.classify('((p -> q) & (q -> r)) -> (p -> r)')).toBe('tautology'); + }); + + test('(p & q) -> p is a tautology (simplification)', () => { + expect(engine.classify('(p & q) -> p')).toBe('tautology'); + }); + + test('p -> (p | q) is a tautology (addition)', () => { + expect(engine.classify('p -> (p | q)')).toBe('tautology'); + }); + + test('(p <-> q) <-> (q <-> p) is a tautology (biconditional symmetry)', () => { + expect(engine.classify('(p <-> q) <-> (q <-> p)')).toBe('tautology'); + }); }); - test.skip('TODO: should classify a contingency', () => { - fail(); + describe('classify() — contradictions', () => { + test('p & ~p is a contradiction', () => { + expect(engine.classify('p & ~p')).toBe('contradiction'); + }); + + test('~(p | ~p) is a contradiction', () => { + expect(engine.classify('~(p | ~p)')).toBe('contradiction'); + }); + + test('(p -> q) & p & ~q is a contradiction (modus ponens violation)', () => { + expect(engine.classify('(p -> q) & p & ~q')).toBe('contradiction'); + }); }); - test.skip('TODO: should generate a truth table for a given WFF', () => { - fail(); + describe('classify() — contingencies', () => { + test('p is a contingency', () => { + expect(engine.classify('p')).toBe('contingency'); + }); + + test('p & q is a contingency', () => { + expect(engine.classify('p & q')).toBe('contingency'); + }); + + test('p | q is a contingency', () => { + expect(engine.classify('p | q')).toBe('contingency'); + }); + + test('p -> q is a contingency', () => { + expect(engine.classify('p -> q')).toBe('contingency'); + }); + + test('p <-> q is a contingency', () => { + expect(engine.classify('p <-> q')).toBe('contingency'); + }); + }); + + // ── truthTable() ─────────────────────────────────────────────────────────── + + describe('truthTable()', () => { + test('single variable produces 2 rows', () => { + const tt = engine.truthTable('p'); + expect(tt.rows).toHaveLength(2); + }); + + test('two variables produce 4 rows', () => { + const tt = engine.truthTable('p & q'); + expect(tt.rows).toHaveLength(4); + }); + + test('three variables produce 8 rows', () => { + const tt = engine.truthTable('p & q & r'); + expect(tt.rows).toHaveLength(8); + }); + + test('variables field lists exactly the letters in the formula', () => { + const tt = engine.truthTable('p & q'); + expect(tt.variables).toContain('p'); + expect(tt.variables).toContain('q'); + expect(tt.variables).toHaveLength(2); + }); + + test('tautology has all rows with value true', () => { + const tt = engine.truthTable('p | ~p'); + expect(tt.rows.every(r => r.value)).toBe(true); + }); + + test('contradiction has all rows with value false', () => { + const tt = engine.truthTable('p & ~p'); + expect(tt.rows.every(r => !r.value)).toBe(true); + }); + + test('contingency has both true and false rows', () => { + const tt = engine.truthTable('p'); + expect(tt.rows.some(r => r.value)).toBe(true); + expect(tt.rows.some(r => !r.value)).toBe(true); + }); + + test('p -> q: only false when p=T, q=F', () => { + const tt = engine.truthTable('p -> q'); + const falseRows = tt.rows.filter(r => !r.value); + expect(falseRows).toHaveLength(1); + expect(falseRows[0].assignment['p']).toBe(true); + expect(falseRows[0].assignment['q']).toBe(false); + }); + + test('each row assignment contains all variables', () => { + const tt = engine.truthTable('p & q'); + tt.rows.forEach(row => { + expect('p' in row.assignment).toBe(true); + expect('q' in row.assignment).toBe(true); + }); + }); + }); + + // ── evaluate() with WFF instance ────────────────────────────────────────── + + describe('evaluate() — with pre-parsed WFF', () => { + test('classifies a tautology correctly', () => { + const { formula, variables } = syntax.parse('p | ~p'); + const result = engine.evaluate(formula, variables); + expect(result.classification).toBe('tautology'); + }); + + test('classifies a contradiction correctly', () => { + const { formula, variables } = syntax.parse('p & ~p'); + const result = engine.evaluate(formula, variables); + expect(result.classification).toBe('contradiction'); + }); + + test('classifies a contingency correctly', () => { + const { formula, variables } = syntax.parse('p -> q'); + const result = engine.evaluate(formula, variables); + expect(result.classification).toBe('contingency'); + }); + + test('truth table rows match manual evaluation', () => { + const { formula, variables } = syntax.parse('p & q'); + const result = engine.evaluate(formula, variables); + // p=F, q=F → F; p=F, q=T → F; p=T, q=F → F; p=T, q=T → T + const trueRows = result.truthTable.rows.filter(r => r.value); + expect(trueRows).toHaveLength(1); + expect(trueRows[0].assignment['p']).toBe(true); + expect(trueRows[0].assignment['q']).toBe(true); + }); + }); + + // ── evaluateString() ─────────────────────────────────────────────────────── + + describe('evaluateString()', () => { + test('returns both classification and truth table', () => { + const result = engine.evaluateString('p -> p'); + expect(result.classification).toBe('tautology'); + expect(result.truthTable.rows).toHaveLength(2); + }); + + test('throws SyntaxError for malformed formula', () => { + expect(() => engine.evaluateString('p &')).toThrow(SyntaxError); + }); }); }); diff --git a/test/engine/syntax/propositional/syntaxEngine.spec.ts b/test/engine/syntax/propositional/syntaxEngine.spec.ts index 3cce17e..16b436e 100644 --- a/test/engine/syntax/propositional/syntaxEngine.spec.ts +++ b/test/engine/syntax/propositional/syntaxEngine.spec.ts @@ -1,9 +1,332 @@ -describe('Propositional SyntaxEngine', () => { - test.skip('TODO: should parse a valid propositional formula string into a WFF', () => { - fail(); +import { PropositionalSyntaxEngine } from '../../../../src/engine/syntax/propositional/syntaxEngine'; + +describe('PropositionalSyntaxEngine', () => { + let engine: PropositionalSyntaxEngine; + + beforeEach(() => { engine = new PropositionalSyntaxEngine(); }); + + // ── parse() — atoms ──────────────────────────────────────────────────────── + + describe('parse() — atoms', () => { + test('parses a single proposition letter', () => { + const { formula, variables } = engine.parse('p'); + variables.get('p')!.assign(true); + expect(formula.value()).toBe(true); + variables.get('p')!.assign(false); + expect(formula.value()).toBe(false); + }); + + test('parses a proposition letter with a digit suffix', () => { + const { formula, variables } = engine.parse('p1'); + variables.get('p1')!.assign(true); + expect(formula.value()).toBe(true); + }); + + test('variable registry contains exactly the letters used', () => { + const { variables } = engine.parse('p'); + expect(variables.has('p')).toBe(true); + expect(variables.size).toBe(1); + }); + }); + + // ── parse() — negation ───────────────────────────────────────────────────── + + describe('parse() — negation', () => { + test('~p is false when p is true', () => { + const { formula, variables } = engine.parse('~p'); + variables.get('p')!.assign(true); + expect(formula.value()).toBe(false); + }); + + test('~p is true when p is false', () => { + const { formula, variables } = engine.parse('~p'); + variables.get('p')!.assign(false); + expect(formula.value()).toBe(true); + }); + + test('double negation ~~p reduces to p', () => { + const { formula, variables } = engine.parse('~~p'); + variables.get('p')!.assign(true); + expect(formula.value()).toBe(true); + variables.get('p')!.assign(false); + expect(formula.value()).toBe(false); + }); + + test('triple negation ~~~p behaves as ~p', () => { + const { formula, variables } = engine.parse('~~~p'); + variables.get('p')!.assign(true); + expect(formula.value()).toBe(false); + variables.get('p')!.assign(false); + expect(formula.value()).toBe(true); + }); + }); + + // ── parse() — conjunction ────────────────────────────────────────────────── + + describe('parse() — conjunction (&)', () => { + test.each([ + [true, true, true ], + [true, false, false], + [false, true, false], + [false, false, false], + ])('p=%s & q=%s → %s', (p, q, expected) => { + const { formula, variables } = engine.parse('p & q'); + variables.get('p')!.assign(p); + variables.get('q')!.assign(q); + expect(formula.value()).toBe(expected); + }); + }); + + // ── parse() — disjunction ────────────────────────────────────────────────── + + describe('parse() — disjunction (|)', () => { + test.each([ + [true, true, true ], + [true, false, true ], + [false, true, true ], + [false, false, false], + ])('p=%s | q=%s → %s', (p, q, expected) => { + const { formula, variables } = engine.parse('p | q'); + variables.get('p')!.assign(p); + variables.get('q')!.assign(q); + expect(formula.value()).toBe(expected); + }); + }); + + // ── parse() — implication ────────────────────────────────────────────────── + + describe('parse() — implication (->)', () => { + test.each([ + [true, true, true ], + [true, false, false], + [false, true, true ], + [false, false, true ], + ])('p=%s -> q=%s → %s', (p, q, expected) => { + const { formula, variables } = engine.parse('p -> q'); + variables.get('p')!.assign(p); + variables.get('q')!.assign(q); + expect(formula.value()).toBe(expected); + }); + }); + + // ── parse() — biconditional ──────────────────────────────────────────────── + + describe('parse() — biconditional (<->)', () => { + test.each([ + [true, true, true ], + [true, false, false], + [false, true, false], + [false, false, true ], + ])('p=%s <-> q=%s → %s', (p, q, expected) => { + const { formula, variables } = engine.parse('p <-> q'); + variables.get('p')!.assign(p); + variables.get('q')!.assign(q); + expect(formula.value()).toBe(expected); + }); + }); + + // ── parse() — precedence ─────────────────────────────────────────────────── + + describe('parse() — operator precedence', () => { + test('~ binds tighter than &: ~p & q ≡ (~p) & q', () => { + const { formula, variables } = engine.parse('~p & q'); + variables.get('p')!.assign(true); + variables.get('q')!.assign(true); + expect(formula.value()).toBe(false); // ~T & T = F & T = F + }); + + test('& binds tighter than |: p & q | r ≡ (p & q) | r', () => { + const { formula, variables } = engine.parse('p & q | r'); + variables.get('p')!.assign(false); + variables.get('q')!.assign(false); + variables.get('r')!.assign(true); + expect(formula.value()).toBe(true); // (F & F) | T = F | T = T + }); + + test('| binds tighter than ->: p | q -> r ≡ (p | q) -> r', () => { + const { formula, variables } = engine.parse('p | q -> r'); + variables.get('p')!.assign(true); + variables.get('q')!.assign(false); + variables.get('r')!.assign(false); + expect(formula.value()).toBe(false); // (T | F) -> F = T -> F = F + }); + + test('-> is right-associative: p -> q -> r ≡ p -> (q -> r)', () => { + const { formula, variables } = engine.parse('p -> q -> r'); + variables.get('p')!.assign(true); + variables.get('q')!.assign(true); + variables.get('r')!.assign(false); + // p -> (q -> r) = T -> (T -> F) = T -> F = F + expect(formula.value()).toBe(false); + }); + + test('<-> has lowest precedence: p & q <-> q | r', () => { + const { formula, variables } = engine.parse('p & q <-> q | r'); + variables.get('p')!.assign(true); + variables.get('q')!.assign(true); + variables.get('r')!.assign(false); + // (T & T) <-> (T | F) = T <-> T = T + expect(formula.value()).toBe(true); + }); + }); + + // ── parse() — parentheses ────────────────────────────────────────────────── + + describe('parse() — parentheses', () => { + test('(p | q) -> r — parentheses override default precedence', () => { + const { formula, variables } = engine.parse('(p | q) -> r'); + variables.get('p')!.assign(false); + variables.get('q')!.assign(false); + variables.get('r')!.assign(false); + expect(formula.value()).toBe(true); // (F | F) -> F = F -> F = T + }); + + test('~(p & q) — negation of a parenthesised group', () => { + const { formula, variables } = engine.parse('~(p & q)'); + variables.get('p')!.assign(true); + variables.get('q')!.assign(true); + expect(formula.value()).toBe(false); // ~(T & T) = ~T = F + variables.get('p')!.assign(true); + variables.get('q')!.assign(false); + expect(formula.value()).toBe(true); // ~(T & F) = ~F = T + }); + + test('nested parentheses', () => { + const { formula, variables } = engine.parse('((p -> q) & (q -> r)) -> (p -> r)'); + // This is a tautology (hypothetical syllogism) + const v = variables; + let allTrue = true; + for (const pv of [true, false]) { + for (const qv of [true, false]) { + for (const rv of [true, false]) { + v.get('p')!.assign(pv); + v.get('q')!.assign(qv); + v.get('r')!.assign(rv); + if (!formula.value()) { allTrue = false; } + } + } + } + expect(allTrue).toBe(true); + }); + }); + + // ── parse() — variable sharing ───────────────────────────────────────────── + + describe('parse() — variable sharing', () => { + test('same letter in the same formula shares one variable', () => { + const { formula, variables } = engine.parse('p -> p'); + // This is a tautology + variables.get('p')!.assign(true); + expect(formula.value()).toBe(true); + variables.get('p')!.assign(false); + expect(formula.value()).toBe(true); + }); + + test('two letters produce two independent variables', () => { + const { variables } = engine.parse('p & q'); + expect(variables.size).toBe(2); + expect(variables.has('p')).toBe(true); + expect(variables.has('q')).toBe(true); + }); + }); + + // ── parseInto() ──────────────────────────────────────────────────────────── + + describe('parseInto()', () => { + test('reuses existing variables from the registry', () => { + const shared = new Map(); + const f1 = engine.parseInto('p & q', shared); + const f2 = engine.parseInto('p | r', shared); + // both f1 and f2 reference the same PropositionalVariable for 'p' + expect(shared.size).toBe(3); // p, q, r + shared.get('p')!.assign(true); + shared.get('q')!.assign(false); + shared.get('r')!.assign(false); + expect(f1.value()).toBe(false); // T & F = F + expect(f2.value()).toBe(true); // T | F = T + }); + + test('changing shared variable updates both formulas', () => { + const shared = new Map(); + const f1 = engine.parseInto('p', shared); + const f2 = engine.parseInto('~p', shared); + shared.get('p')!.assign(true); + expect(f1.value()).toBe(true); + expect(f2.value()).toBe(false); + shared.get('p')!.assign(false); + expect(f1.value()).toBe(false); + expect(f2.value()).toBe(true); + }); + }); + + // ── parse() — known tautologies ─────────────────────────────────────────── + + describe('parse() — known tautologies', () => { + function isTautology(formulaString: string): boolean { + const { formula, variables } = engine.parse(formulaString); + const varNames = Array.from(variables.keys()); + const n = varNames.length; + for (let mask = 0; mask < Math.pow(2, n); mask++) { + varNames.forEach((name, i) => variables.get(name)!.assign(Boolean((mask >> i) & 1))); + if (!formula.value()) return false; + } + return true; + } + + test('p | ~p (excluded middle)', () => { + expect(isTautology('p | ~p')).toBe(true); + }); + + test('p -> p (reflexivity)', () => { + expect(isTautology('p -> p')).toBe(true); + }); + + test('~(p & ~p) (non-contradiction)', () => { + expect(isTautology('~(p & ~p)')).toBe(true); + }); + + test('(p -> q) -> (~q -> ~p) (contraposition)', () => { + expect(isTautology('(p -> q) -> (~q -> ~p)')).toBe(true); + }); + + test('((p -> q) & (q -> r)) -> (p -> r) (hypothetical syllogism)', () => { + expect(isTautology('((p -> q) & (q -> r)) -> (p -> r)')).toBe(true); + }); + + test('(p & q) -> p (simplification)', () => { + expect(isTautology('(p & q) -> p')).toBe(true); + }); + + test('p -> (p | q) (addition)', () => { + expect(isTautology('p -> (p | q)')).toBe(true); + }); }); - test.skip('TODO: should reject malformed formula strings', () => { - fail(); + // ── parse() — error cases ────────────────────────────────────────────────── + + describe('parse() — error cases', () => { + test('throws SyntaxError for empty string', () => { + expect(() => engine.parse('')).toThrow(SyntaxError); + }); + + test('throws SyntaxError for unmatched opening parenthesis', () => { + expect(() => engine.parse('(p & q')).toThrow(SyntaxError); + }); + + test('throws SyntaxError for unmatched closing parenthesis', () => { + expect(() => engine.parse('p & q)')).toThrow(SyntaxError); + }); + + test('throws SyntaxError for dangling operator', () => { + expect(() => engine.parse('p &')).toThrow(SyntaxError); + }); + + test('throws SyntaxError for invalid character', () => { + expect(() => engine.parse('p @ q')).toThrow(SyntaxError); + }); + + test('throws SyntaxError for uppercase proposition letter', () => { + expect(() => engine.parse('P')).toThrow(SyntaxError); + }); }); }); diff --git a/test/language/propositional/propositionalTheory.spec.ts b/test/language/propositional/propositionalTheory.spec.ts index 3dc88a3..5a05377 100644 --- a/test/language/propositional/propositionalTheory.spec.ts +++ b/test/language/propositional/propositionalTheory.spec.ts @@ -62,9 +62,24 @@ describe('PropositionalTheoryBuilder', () => { expect(theory.sentences[0].label).toBe('P1'); }); - test('fromSentenceSet() throws (not yet implemented)', () => { + test('fromSentenceSet() builds a theory from a SentenceSet', () => { const builder = new PropositionalTheoryBuilder(); - expect(() => builder.fromSentenceSet({ sentences: [] })).toThrow(); + const set = { + sentences: [ + { raw: 'If it is raining then the streets are wet.', confidence: 1.0 }, + { raw: 'It is raining.', confidence: 1.0 }, + ], + }; + const theory = builder.fromSentenceSet(set); + expect(theory.sentences).toHaveLength(2); + expect(theory.sentences[0].label).toBe('φ1'); + expect(theory.sentences[1].label).toBe('φ2'); + }); + + test('fromSentenceSet() returns an empty theory for an empty SentenceSet', () => { + const builder = new PropositionalTheoryBuilder(); + const theory = builder.fromSentenceSet({ sentences: [] }); + expect(theory.sentences).toHaveLength(0); }); }); diff --git a/tsconfig.json b/tsconfig.json index b1b275b..95ef2a8 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,7 @@ { "compilerOptions": { "target": "es2017", + "lib": ["es2019", "dom"], "module": "commonjs", "strict": true, "esModuleInterop": true, From f137dffa2d2568028a50d4b2567ffc47098ae94b Mon Sep 17 00:00:00 2001 From: pseudodionysius <36469636+pseudodionysius@users.noreply.github.com> Date: Fri, 10 Apr 2026 10:46:50 +0100 Subject: [PATCH 2/2] feat: natural language syntax parsing --- CLAUDE.md | 154 +++++- README.md | 160 +++++- docs/engine/nlp/DESIGN.md | 195 +++++++ src/engine/nlp/argumentAnalyser.ts | 164 ++++++ src/engine/nlp/formalAnnotator.ts | 294 ++++++++++ src/engine/nlp/formalTranslator.ts | 216 ++++++++ src/engine/nlp/nlpTypes.ts | 3 + src/engine/nlp/sentenceClassifier.ts | 108 ++++ src/engine/nlp/textSegmenter.ts | 130 +++++ src/engine/syntax/index.ts | 4 + .../syntax/naturalLanguageSyntaxParser.ts | 517 ++++++++++++++++++ src/engine/syntax/syntaxTreePrinter.ts | 154 ++++++ src/engine/syntax/syntaxTypes.ts | 168 ++++++ test/engine/nlp/argumentAnalyser.spec.ts | 115 ++++ test/engine/nlp/formalAnnotator.spec.ts | 174 ++++++ test/engine/nlp/formalTranslator.spec.ts | 156 ++++++ test/engine/nlp/sentenceClassifier.spec.ts | 96 ++++ test/engine/nlp/textSegmenter.spec.ts | 109 ++++ .../naturalLanguageSyntaxParser.spec.ts | 210 +++++++ test/engine/syntax/syntaxTreePrinter.spec.ts | 127 +++++ 20 files changed, 3234 insertions(+), 20 deletions(-) create mode 100644 docs/engine/nlp/DESIGN.md create mode 100644 src/engine/nlp/argumentAnalyser.ts create mode 100644 src/engine/nlp/formalAnnotator.ts create mode 100644 src/engine/nlp/formalTranslator.ts create mode 100644 src/engine/nlp/sentenceClassifier.ts create mode 100644 src/engine/nlp/textSegmenter.ts create mode 100644 src/engine/syntax/index.ts create mode 100644 src/engine/syntax/naturalLanguageSyntaxParser.ts create mode 100644 src/engine/syntax/syntaxTreePrinter.ts create mode 100644 src/engine/syntax/syntaxTypes.ts create mode 100644 test/engine/nlp/argumentAnalyser.spec.ts create mode 100644 test/engine/nlp/formalAnnotator.spec.ts create mode 100644 test/engine/nlp/formalTranslator.spec.ts create mode 100644 test/engine/nlp/sentenceClassifier.spec.ts create mode 100644 test/engine/nlp/textSegmenter.spec.ts create mode 100644 test/engine/syntax/naturalLanguageSyntaxParser.spec.ts create mode 100644 test/engine/syntax/syntaxTreePrinter.spec.ts diff --git a/CLAUDE.md b/CLAUDE.md index f370f79..82e7b36 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,13 +78,24 @@ src/ index.ts engine/ nlp/ - nlpTypes.ts # NLPResult (imports AlethicAssertoric from shared) - nlpEngine.ts # NLPEngine stub + nlpTypes.ts # NLPResult, AnnotatedSentence, SentenceFeatures, FormalTranslationSet, … + nlpEngine.ts # NLPEngine — top-level pipeline orchestrator + textSegmenter.ts # TextSegmenter — sentence boundary detection + AsyncIterable support + sentenceClassifier.ts # SentenceClassifier — mood detection + confidence scoring + argumentAnalyser.ts # ArgumentAnalyser — premise/conclusion detection, pairwise relations + formalAnnotator.ts # FormalAnnotator — feature extraction + syntax tree attachment + formalTranslator.ts # FormalTranslator — formula string generation for all three languages index.ts syntax/ - propositional/syntaxEngine.ts # TODO + syntaxTypes.ts # SyntaxTree DTOs — TaggedToken, TerminalNode, PhraseNode, SyntaxTree + naturalLanguageSyntaxParser.ts # NaturalLanguageSyntaxParser — constituency parse tree builder + syntaxTreePrinter.ts # SyntaxTreePrinter — box-drawing, token list, bracketed notation + index.ts + propositional/ + syntaxEngine.ts # PropositionalSyntaxEngine — formula string → WFF (recursive descent) semantics/ - propositional/evaluationEngine.ts # TODO + propositional/ + evaluationEngine.ts # PropositionalEvaluationEngine — truth tables, classification test/ language/ @@ -114,9 +125,18 @@ test/ meta-logic/ completeness.spec.ts # K axiom, modal duality, distribution, non-theorems engine/ - nlp/nlpEngine.spec.ts # Skipped placeholder - syntax/propositional/syntaxEngine.spec.ts # Skipped placeholder - semantics/propositional/evaluationEngine.spec.ts # Skipped placeholder + nlp/ + nlpEngine.spec.ts + textSegmenter.spec.ts + sentenceClassifier.spec.ts + argumentAnalyser.spec.ts + formalAnnotator.spec.ts + formalTranslator.spec.ts + syntax/ + naturalLanguageSyntaxParser.spec.ts + syntaxTreePrinter.spec.ts + propositional/syntaxEngine.spec.ts + semantics/propositional/evaluationEngine.spec.ts ``` ### TypeScript Config Split @@ -291,19 +311,125 @@ Five concrete `ModalSystemSpec` objects are provided: `docs/modal/design.md` covers Kripke semantics, system hierarchy, and the analogy table between quantificational and modal constructs (domain ↔ worlds, ∀/∃ ↔ □/◇, etc.). -## NLP Engine — Design Intent +## NLP Engine — What's Implemented + +The pipeline is fully implemented in `src/engine/nlp/`. All processing is rule-based and zero-dependency. + +### Pipeline stages + +**`TextSegmenter`** — splits raw text into sentence strings. + +- Paragraph breaks (double newlines) always act as boundaries. +- Dot-based boundaries require whitespace + uppercase following and respect common abbreviations (`Mr.`, `Dr.`, etc.) and decimal numbers. +- `segment(text: string): string[]` — eager, for string input. +- `segmentStream(source: AsyncIterable): Promise` — lazy, buffers chunks then segments. + +**`SentenceClassifier`** — filters to alethic assertoric sentences and scores confidence. + +- `classify(sentence): AlethicAssertoric | null` — returns `null` for interrogative, imperative, and exclamatory moods. +- Confidence scoring: base 0.5 ± rule adjustments (copula, epistemic markers, hedging language, sentence length, subject-verb pattern). Clamped to [0.05, 1.0]. +- `classifyAll(sentences): AlethicAssertoric[]` — batch variant, drops non-assertoric. + +**`FormalAnnotator`** — extracts logical features and attaches a constituency syntax tree. + +- Trigger tables for connectives, quantifiers, modal adverbs, and negations (span-based, non-overlapping). +- Proposition extraction: inverts the occupied spans to find free text fragments. +- `annotate(sentence): AnnotatedSentence` — populates `features` and `syntaxTree`. +- `annotateAll(sentences): AnnotatedSentence[]` — batch variant. + +**`ArgumentAnalyser`** — detects argument structure. + +- Conclusion markers: `therefore`, `thus`, `hence`, `consequently`, etc. +- Premise markers: `because`, `since`, `given that`, etc. +- When no explicit conclusion is found, the last sentence is promoted. +- Pairwise relations: `supports`, `opposes` (negation asymmetry + content-word overlap), or `independent`. +- `analyse(sentences): AnalysedArgument` + +**`FormalTranslator`** — generates formula strings for all three formal languages. + +- **Propositional**: interleaves proposition labels with connective operators → `"p -> q"`. +- **Quantificational**: prefixes with detected quantifier → `"∀x. p -> q"`. +- **Modal**: wraps with modal operator → `"□(p -> q)"`. +- `translate(source, annotated): FormalTranslationSet` + +**`NLPEngine`** — top-level orchestrator. + +- `parse(input: string): NLPResult` — runs the full pipeline over a string. +- `parseStream(source: AsyncIterable): Promise` — async variant for file/stream input. + +### `NLPResult` shape + +```ts +interface NLPResult { + input: string; + candidates: AlethicAssertoric[]; + sentenceSet: SentenceSet; + annotated: AnnotatedSentence[]; // includes syntaxTree per sentence + argument: AnalysedArgument; + translations: FormalTranslationSet; +} +``` + +## Syntax Engine — What's Implemented + +### Constituency parse trees (`src/engine/syntax/`) + +**`syntaxTypes.ts`** — serialization-ready DTOs designed for future protobuf/JSON/CBOR support: + +| Type | Role | +| --- | --- | +| `PhraseLabel` | Constituent categories: `S`, `NP`, `VP`, `PP`, `AP`, `AdvP`, `CP`, `QP` | +| `POSTag` | POS tags: `DET`, `QUANT`, `N`, `PN`, `PRON`, `V`, `COP`, `AUX`, `MODAL`, `ADJ`, `ADV`, `PREP`, `CONJ`, `COMP`, `NEG`, `PART`, `PUNCT`, `UNKNOWN` | +| `TaggedToken` | `{ text, pos, index }` — the pre-terminal layer | +| `TerminalNode` | Leaf: `{ kind: 'terminal', pos, text, index }` | +| `PhraseNode` | Internal: `{ kind: 'phrase', label, children, startIndex, endIndex }` | +| `SyntaxNode` | Discriminated union: `TerminalNode \| PhraseNode` | +| `SyntaxTree` | Root DTO: `{ schemaVersion, source, tokens, root }` | + +The `kind` discriminator maps directly to a protobuf `oneof`. `PhraseLabel` and `POSTag` map to protobuf enums. Arrays (not Maps) are used throughout. `schemaVersion = '1'` enables forward-compatible evolution. + +**`NaturalLanguageSyntaxParser`** — rule-based constituency parser. + +- POS tagging via lexicon lookup with morphological heuristics (`-ly` → ADV, `-tion` → N, `-ous/-al/-ful/-ive/-able` → ADJ, `-ing` → V, etc.). +- Handles: simple declarative NP+VP, quantified NP (`QUANT N`), modal adverb opening (`AdvP`), conditional sentences (`if/then` → CP), copular constructions with PP, negation inside VP. +- `parse(sentence: string): SyntaxTree` + +**`SyntaxTreePrinter`** — three rendering modes: + +| Method | Output | +| --- | --- | +| `render(tree, header?)` | Box-drawing tree (`└─`, `├─`, `│`) | +| `renderTokens(tree)` | Flat indexed list: `[0] QUANT "All"` | +| `renderBracketed(tree)` | Inline bracketed notation: `[S [NP ...][VP ...]]` | +| `print/printTokens/printBracketed` | Console variants of each | + +### Propositional formula parser (`src/engine/syntax/propositional/syntaxEngine.ts`) + +**`PropositionalSyntaxEngine`** — recursive-descent parser for formula strings. + +- Operator precedence (lowest → highest): `<->` < `->` < `|` < `&` < `~` < atoms/parentheses. +- `<->` and `->` are right-associative; `|` and `&` are left-associative. +- Double negation (`~~p`) is eliminated during parse by toggling `unaryOperator`. +- `parse(input: string): PropositionalParseResult` — returns `{ formula: WFF, variables: Map }`. +- `parseInto(input, variables)` — parses into a shared variable registry (used by `fromSentenceSet`). + +**`PropositionalTheoryBuilder.fromSentenceSet(set: SentenceSet): PropositionalTheory`** — now implemented. Runs `FormalAnnotator` → `FormalTranslator` → `PropositionalSyntaxEngine` per sentence, sharing a single variable registry across all formulas. + +## Semantics Engine — What's Implemented + +**`PropositionalEvaluationEngine`** (`src/engine/semantics/propositional/evaluationEngine.ts`): -`NLPEngine.parse(input: string): NLPResult` accepts any string and returns zero or more `AlethicAssertoric` candidates. The output `SentenceSet` feeds directly into `PropositionalTheoryBuilder.fromSentenceSet()`. +- `evaluate(formula, variables): EvaluationResult` — exhaustive 2^n enumeration. +- `evaluateString(formulaString): EvaluationResult` — parse + evaluate in one step. +- `classify(formulaString): WFFClassification` — `'tautology'` | `'contradiction'` | `'contingency'`. +- `truthTable(formulaString): TruthTable` — `{ variables, rows: { assignment, value }[] }`. +- `printTruthTable(formulaString): void` — formatted console output with box-drawing separator. ## What Is Not Yet Implemented -- `NLPEngine` — sentence segmentation, mood classification, confidence scoring -- `PropositionalSyntaxEngine` — parsing formula strings into WFF instances -- `PropositionalEvaluationEngine` — truth tables, tautology/contradiction classification -- `PropositionalTheoryBuilder.fromSentenceSet()` — awaits SyntaxEngine - Quantificational function symbols (e.g., `f(x)`) - `QuantificationalSyntaxEngine` — parsing formula strings into QFF instances -- `QuantificationalTheoryBuilder.fromSentenceSet()` — awaits SyntaxEngine +- `QuantificationalTheoryBuilder.fromSentenceSet()` — awaits QuantificationalSyntaxEngine - `ModalSyntaxEngine` — parsing formula strings into MFF instances - `ModalTheoryBuilder.fromSentenceSet()` — awaits ModalSyntaxEngine - Quantified modal logic (combining QFF quantifiers with modal operators) diff --git a/README.md b/README.md index 5850899..dc65475 100644 --- a/README.md +++ b/README.md @@ -523,21 +523,169 @@ Shared propositions: ## NLP Engine -> In Progress +`NLPEngine.parse(input)` accepts any string and returns an `NLPResult`. All processing is rule-based — no external ML dependencies. -`NLPEngine.parse(input)` accepts any string and returns an `NLPResult` containing zero or more `AlethicAssertoric` candidates — declarative sentences that make a truth claim. The candidates can be passed as a `SentenceSet` to `PropositionalTheoryBuilder.fromSentenceSet()` once the syntax engine is implemented. +```ts +import { NLPEngine } from 'logic-engine'; + +const engine = new NLPEngine(); +const result = engine.parse( + 'All men are mortal. Socrates is a man. Therefore Socrates is mortal.', +); + +// result.candidates — AlethicAssertoric[] (assertoric sentences only) +// result.sentenceSet — SentenceSet ready for PropositionalTheoryBuilder.fromSentenceSet() +// result.annotated — features + constituency syntax tree per sentence +// result.argument — { premises, conclusions, relations } +// result.translations — propositional / quantificational / modal formula strings +``` + +**Stream support** — pass any `AsyncIterable` for file or network input: + +```ts +const result = await engine.parseStream(fs.createReadStream('argument.txt', 'utf-8')); +``` + +### Pipeline stages + +| Stage | Class | Responsibility | +| --- | --- | --- | +| 1 | `TextSegmenter` | Sentence boundary detection — respects abbreviations and decimals | +| 2 | `SentenceClassifier` | Mood detection; confidence scoring (0.05–1.0) | +| 3 | `FormalAnnotator` | Extracts connectives, quantifiers, modals, negations, propositions; builds syntax tree | +| 4 | `ArgumentAnalyser` | Identifies premises and conclusions; computes pairwise relations | +| 5 | `FormalTranslator` | Generates formula strings for propositional, quantificational, and modal languages | + +### `AnnotatedSentence` + +Each sentence in `result.annotated` carries: + +```ts +interface AnnotatedSentence { + source: AlethicAssertoric; + features: SentenceFeatures; // connectives, quantifiers, modalAdverbs, negations, propositions, mood + syntaxTree: SyntaxTree; // constituency parse tree +} +``` + +### Formal translations + +`result.translations` contains formula strings for all three languages: + +```ts +// e.g. for "If it rains then the streets are wet" +propositional: { formulaString: 'p -> q', propositionMap: { p: 'it rains', q: 'the streets are wet' } } +quantificational: { formulaString: '∀x. p -> q', quantifierPrefix: null, suggestedPredicate: null } +modal: { formulaString: 'p -> q', modalPrefix: null } +``` ## Syntax Engine -> In Progress +### Constituency syntax trees + +`NaturalLanguageSyntaxParser` builds a constituency parse tree for any sentence. Trees are serialization-ready DTOs — no methods, plain data, compatible with JSON/protobuf/CBOR. + +```ts +import { NaturalLanguageSyntaxParser, SyntaxTreePrinter } from 'logic-engine'; + +const parser = new NaturalLanguageSyntaxParser(); +const printer = new SyntaxTreePrinter(); + +const tree = parser.parse('All men are mortal.'); + +printer.print(tree); // box-drawing tree +printer.printTokens(tree); // flat POS-tagged token list +printer.printBracketed(tree); // [S [NP [QUANT All][N men]][VP [COP are][AP [ADJ mortal]]]] +``` + +Box-drawing output example: -`PropositionalSyntaxEngine` will parse formula strings and JSON into typed `WFF` instances, enabling `PropositionalTheoryBuilder.fromSentenceSet()` to accept NLP engine output directly. +```text +Syntax Tree — "All men are mortal." +══════════════════════════════════════ +S +├── NP +│ ├── QUANT "All" +│ └── N "men" +└── VP + ├── COP "are" + └── AP + └── ADJ "mortal" +``` + +**`SyntaxTree` DTO** — self-contained and schema-versioned: + +```ts +interface SyntaxTree { + schemaVersion: string; // '1' — bump on breaking shape changes + source: string; // original sentence + tokens: TaggedToken[]; // POS-tagged token sequence + root: PhraseNode; // root S node +} +``` + +Node types use a `kind` discriminator (`'terminal'` | `'phrase'`) that maps directly to a protobuf `oneof`. Phrase labels (`S`, `NP`, `VP`, `PP`, `AP`, `AdvP`, `CP`, `QP`) and POS tags map to protobuf enums. + +### Propositional formula parser + +`PropositionalSyntaxEngine` parses formula strings into typed `WFF` instances. + +```ts +import { PropositionalSyntaxEngine } from 'logic-engine'; + +const engine = new PropositionalSyntaxEngine(); + +const { formula, variables } = engine.parse('p -> (q | ~p)'); +// formula — a WFF ready for value() evaluation +// variables — Map + +variables.get('p')!.assign(true); +variables.get('q')!.assign(false); +formula.value(); // → true +``` + +Operator precedence (lowest → highest): `<->` < `->` < `|` < `&` < `~`. Double negation (`~~p`) is eliminated during parse. Parentheses override precedence as expected. + +**`PropositionalTheoryBuilder.fromSentenceSet()`** uses the syntax engine internally — pass `NLPEngine` output directly into a propositional theory: + +```ts +const nlp = new NLPEngine(); +const result = nlp.parse('It is raining. If it rains the ground is wet. The ground is wet.'); + +const theory = new PropositionalTheoryBuilder().fromSentenceSet(result.sentenceSet); +theory.printProof(); +``` ## Evaluation Engine -> In Progress +`PropositionalEvaluationEngine` provides truth-table generation and semantic classification. + +```ts +import { PropositionalEvaluationEngine } from 'logic-engine'; + +const engine = new PropositionalEvaluationEngine(); + +engine.classify('p | ~p'); // → 'tautology' +engine.classify('p & ~p'); // → 'contradiction' +engine.classify('p -> q'); // → 'contingency' + +engine.printTruthTable('p -> q'); +``` + +```text +TRUTH TABLE — p -> q +═════════════════════════ +p │ q │ VALUE +───────────────────────── +F │ F │ T +F │ T │ T +T │ F │ F +T │ T │ T +``` + +**Classification:** `'tautology'` (true under all assignments), `'contradiction'` (false under all), or `'contingency'` (both). -`PropositionalEvaluationEngine` will provide truth table generation, tautology / contradiction / contingency classification, and proof support beyond the truth-table consistency check already available in `PropositionalTheory`. +**`evaluate(formula, variables)`** — works directly with WFF instances when you need the full `EvaluationResult` including the `TruthTable` data structure. ## License diff --git a/docs/engine/nlp/DESIGN.md b/docs/engine/nlp/DESIGN.md new file mode 100644 index 0000000..a8856ce --- /dev/null +++ b/docs/engine/nlp/DESIGN.md @@ -0,0 +1,195 @@ +# NLP Engine — Design + +## Overview + +The NLP engine is a zero-dependency, rule-based pipeline that turns raw text into +data structures usable by the formal language engines (propositional, quantificational, modal). + +``` +raw text / stream + │ + ▼ TextSegmenter +sentence strings + │ + ▼ SentenceClassifier +AlethicAssertoric[] (non-assertoric sentences dropped) + │ + ├─► ArgumentAnalyser → AnalysedArgument + │ (premise/conclusion roles, support/oppose relations) + │ + └─► FormalAnnotator → AnnotatedSentence[] + │ (connectives, quantifiers, modal adverbs, propositions) + ▼ + FormalTranslator → FormalTranslationSet + ├─ propositional (WFF-ready formula strings + proposition map) + ├─ quantificational (QFF annotations + formula strings) + └─ modal (MFF annotations + formula strings) +``` + +`NLPEngine.parse()` runs the full pipeline and returns an `NLPResult` containing +the `SentenceSet`, `AnnotatedSentence[]`, `AnalysedArgument`, and `FormalTranslationSet`. + +`NLPEngine.parseStream()` accepts any `AsyncIterable` (Node.js Readable, +file reader, web stream, etc.), collects all chunks, then runs the same pipeline. + +--- + +## TextSegmenter + +Splits raw text into sentence-candidate strings using a regex-based boundary detector. + +**Rules (in priority order):** +1. Split on double newline `\n\n` — paragraph boundary, always a sentence break. +2. Split on `.` / `!` / `?` followed by whitespace + uppercase — standard EOS marker. +3. **Do not** split on `.` preceded by a known abbreviation (`Mr`, `Dr`, `St`, `vs`, `etc`, `e.g`, `i.e`, `approx`, single letters). +4. **Do not** split on `.` inside a decimal number (`3.14`). +5. Trim and discard blank segments. + +--- + +## SentenceClassifier + +Decides whether a sentence string is an *alethic assertoric* sentence and scores +its confidence. Non-assertoric sentences are silently dropped. + +### Mood classification + +| Signal | Mood | Assertoric? | +|--------|------|------------| +| Ends with `?` | Interrogative | No | +| Ends with `!` | Exclamatory | No | +| Starts with imperative verb (see list below) | Imperative | No | +| Otherwise | Declarative | Candidate | + +**Imperative verb starters:** `Go`, `Stop`, `Run`, `Please`, `Do`, `Don't`, `Make`, +`Let`, `Be`, `Come`, `Take`, `Get`, `Give`, `Look`, `Show`, `Tell`, `Try`, `Use`, +`Find`, `Put`, `Keep`, `Turn`, `Open`, `Close`, `Start`, `Wait`, `Help`, `Move`. + +### Confidence scoring + +Base confidence for any declarative sentence: **0.5** + +| Signal | Δ confidence | +|--------|-------------| +| Contains copula (`is`, `are`, `was`, `were`, `will be`, `has been`) | +0.15 | +| Contains epistemic/modal marker (`necessarily`, `must`, `certainly`, `it is certain`) | +0.15 | +| Contains hedging language (`maybe`, `perhaps`, `I think`, `I believe`, `probably`) | −0.15 | +| Subject-verb structure detected (starts with noun phrase + verb) | +0.10 | +| Very short sentence (< 4 words) | −0.10 | +| Sentence begins with `It is` / `There is` / `There are` | +0.10 | + +Confidence is clamped to `[0.05, 1.0]`. + +--- + +## ArgumentAnalyser + +Detects argument structure within a `SentenceSet` — which sentences are premises, +which are conclusions, and what relations hold between pairs. + +### Conclusion markers (sentence-initial) +`therefore`, `thus`, `hence`, `so`, `consequently`, `it follows that`, +`we can conclude`, `this means`, `in conclusion`, `as a result`. + +### Premise markers (sentence-initial) +`because`, `since`, `given that`, `assuming`, `for`, `as we know`, +`it is known that`, `suppose`, `if we assume`. + +### Relations + +| Relation | Detection | +|----------|-----------| +| `supports` | Premise marker on sentence A, no explicit negation relative to B | +| `opposes` | Negation of a key proposition present in B detected in A | +| `independent` | No structural relation detected | + +--- + +## FormalAnnotator + +Extracts logical features from each `AlethicAssertoric` sentence, returning an +`AnnotatedSentence` with the following sub-structures. + +### Connective triggers → BinaryOperator + +| Natural language triggers | Operator | +|--------------------------|----------| +| `if … then`, `only if`, `implies`, `entails` | `->` | +| `if and only if`, `iff`, `just in case` | `<->` | +| `and`, `but`, `moreover`, `furthermore`, `both … and` | `&` | +| `or`, `either … or`, `unless` | `\|` | + +### Negation triggers + +`not`, `no`, `never`, `it is not the case that`, `it is false that`. + +### Quantifier triggers → Quantifier + +| Triggers | Quantifier | +|---------- |-----------| +| `all`, `every`, `each`, `any`, `for all`, `for every` | `∀` | +| `some`, `there is`, `there are`, `there exists`, `at least one` | `∃` | +| `no`, `none`, `nothing`, `no one`, `nobody`, `never` | `¬∃` | + +### Modal adverb triggers → ModalOperator + +| Triggers | Operator | +|---------|---------| +| `necessarily`, `must`, `it is necessary that`, `it is certain that`, `certainly` | `□` | +| `possibly`, `might`, `may`, `could`, `it is possible that`, `perhaps`, `maybe` | `◇` | + +### Proposition extraction + +Atomic proposition candidates are the maximal text spans that remain after removing +all detected connectives, quantifiers, modal adverbs, and negations from the sentence. +Each candidate is assigned a label (`p`, `q`, `r`, `s`, …) in left-to-right order +of appearance. The mapping from label → text fragment is stored in `propositionMap`. + +--- + +## FormalTranslator + +Produces a `FormalTranslationSet` from an `AnnotatedSentence[]`. + +### Propositional translation + +Uses the detected `ConnectiveAnnotation[]` and `NegationAnnotation[]` to build a +formula string. The algorithm: + +1. If no connective detected → whole sentence maps to a single atom `p`. +2. If one connective detected → split sentence into left/right halves, assign atoms, emit `left OP right`. +3. If negation wraps a sub-formula → prefix with `~`. +4. Multiple connectives → processed left-to-right (flat, no nesting at this stage). + +Returns a `formulaString` (e.g. `"p -> q"`) and a `propositionMap` recording which +text fragment each label refers to. **Does not construct WFF objects** — that is the +job of the SyntaxEngine (future). The translation is a guide for a human or tool to +populate a `PropositionalTheoryBuilder`. + +### Quantificational translation + +Wraps the propositional translation with detected quantifier prefixes: +`∀x`, `∃x`, `¬∃x`. Suggests predicate names derived from the proposition text. +Returns annotation metadata + formula string. Full QFF construction awaits +`QuantificationalSyntaxEngine`. + +### Modal translation + +Wraps the propositional translation with detected modal operator prefixes: +`□`, `◇`. Returns annotation metadata + formula string. Full MFF construction +awaits `ModalSyntaxEngine`. + +--- + +## NLPResult (extended) + +```ts +interface NLPResult { + input: string; + candidates: AlethicAssertoric[]; // backward-compatible + sentenceSet: SentenceSet; // same sentences as SentenceSet + annotated: AnnotatedSentence[]; // features per sentence + argument: AnalysedArgument; // premise/conclusion structure + translations: FormalTranslationSet; // formal language translations +} +``` diff --git a/src/engine/nlp/argumentAnalyser.ts b/src/engine/nlp/argumentAnalyser.ts new file mode 100644 index 0000000..4cc9160 --- /dev/null +++ b/src/engine/nlp/argumentAnalyser.ts @@ -0,0 +1,164 @@ +import { AlethicAssertoric } from '../../language/shared/types'; +import { AnalysedArgument, ArgumentRelation, SentencePair } from './nlpTypes'; + +// --------------------------------------------------------------------------- +// Discourse markers +// --------------------------------------------------------------------------- + +const CONCLUSION_MARKERS = [ + 'therefore', 'thus', 'hence', 'consequently', 'it follows that', + 'we can conclude', 'this means', 'in conclusion', 'as a result', + 'so,', 'so ', 'which shows', 'which proves', 'which means', + 'this demonstrates', 'this establishes', +]; + +const PREMISE_MARKERS = [ + 'because', 'since', 'given that', 'assuming', 'as we know', + 'it is known that', 'suppose', 'if we assume', 'we know that', + 'it has been shown that', 'the evidence shows', 'we observe that', + 'for the reason that', 'in view of the fact that', +]; + +// Negation words used when detecting opposing relations between sentences +const NEGATION_WORDS = ['not', 'never', 'no ', "isn't", "aren't", "wasn't", + "weren't", "doesn't", "don't", "didn't", 'neither', 'nor']; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function startsWithMarker(text: string, markers: string[]): boolean { + const lower = text.toLowerCase().trimStart(); + return markers.some(m => lower.startsWith(m)); +} + +function containsMarker(text: string, markers: string[]): boolean { + const lower = text.toLowerCase(); + return markers.some(m => lower.includes(m)); +} + +/** + * Extract the key content words from a sentence for overlap comparison. + * Strips stop-words and returns the remaining lowercase tokens. + */ +function contentWords(text: string): Set { + const STOP = new Set([ + 'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', + 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', + 'would', 'could', 'should', 'may', 'might', 'must', 'shall', + 'can', 'and', 'or', 'but', 'if', 'then', 'so', 'that', 'this', + 'it', 'its', 'of', 'in', 'on', 'at', 'to', 'for', 'with', + 'by', 'from', 'not', 'no', 'nor', 'yet', 'both', 'either', + 'neither', 'also', 'therefore', 'thus', 'hence', 'since', + 'because', 'although', 'though', 'however', 'moreover', + ]); + return new Set( + text.toLowerCase() + .replace(/[^a-z0-9\s]/g, '') + .split(/\s+/) + .filter(w => w.length > 2 && !STOP.has(w)), + ); +} + +/** + * Detect whether sentence A appears to oppose sentence B: + * - B contains a negation of a key content word from A, or vice-versa. + */ +function detectOpposition(a: AlethicAssertoric, b: AlethicAssertoric): boolean { + const aWords = contentWords(a.raw); + const bWords = contentWords(b.raw); + + // Any key word overlap with a negation in the other sentence + const overlap = [...aWords].some(w => bWords.has(w)); + if (!overlap) return false; + + const aNegated = NEGATION_WORDS.some(n => a.raw.toLowerCase().includes(n)); + const bNegated = NEGATION_WORDS.some(n => b.raw.toLowerCase().includes(n)); + + // One negated, one not — with shared content words → opposition + return aNegated !== bNegated; +} + +// --------------------------------------------------------------------------- +// ArgumentAnalyser +// --------------------------------------------------------------------------- + +/** + * Detects argument structure within a set of assertoric sentences. + * + * Identifies which sentences play the role of premises and which conclusions, + * and records pairwise support/oppose/independent relations. + */ +export class ArgumentAnalyser { + + /** + * Analyse the argument structure of an ordered set of assertoric sentences. + * + * @param sentences - The assertoric candidates to analyse. + * @returns A fully populated `AnalysedArgument`. + */ + analyse(sentences: AlethicAssertoric[]): AnalysedArgument { + const conclusions: AlethicAssertoric[] = []; + const premises: AlethicAssertoric[] = []; + + for (const s of sentences) { + if (startsWithMarker(s.raw, CONCLUSION_MARKERS) || + containsMarker(s.raw, CONCLUSION_MARKERS.filter(m => m.length > 8))) { + conclusions.push(s); + } else if (startsWithMarker(s.raw, PREMISE_MARKERS) || + containsMarker(s.raw, PREMISE_MARKERS.filter(m => m.length > 8))) { + premises.push(s); + } else { + // No explicit marker — treat as a premise by default + premises.push(s); + } + } + + // If everything was classified as a premise (no conclusions found), + // treat the last sentence as the conclusion — the most common pattern + // in natural argument text. + if (conclusions.length === 0 && premises.length >= 1) { + const last = premises.pop()!; + conclusions.push(last); + } + + const relations = this._computeRelations(sentences); + + return { sentences, premises, conclusions, relations }; + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + private _computeRelations(sentences: AlethicAssertoric[]): SentencePair[] { + const pairs: SentencePair[] = []; + + for (let i = 0; i < sentences.length; i++) { + for (let j = i + 1; j < sentences.length; j++) { + const a = sentences[i]; + const b = sentences[j]; + const relation: ArgumentRelation = detectOpposition(a, b) + ? 'opposes' + : this._detectSupport(a, b) + ? 'supports' + : 'independent'; + pairs.push({ from: a, to: b, relation }); + } + } + + return pairs; + } + + private _detectSupport(a: AlethicAssertoric, b: AlethicAssertoric): boolean { + // A supports B if B begins with a conclusion marker AND A and B share + // content words, OR if A begins with a premise marker. + if (startsWithMarker(a.raw, PREMISE_MARKERS)) return true; + if (startsWithMarker(b.raw, CONCLUSION_MARKERS)) { + const aWords = contentWords(a.raw); + const bWords = contentWords(b.raw); + return [...aWords].some(w => bWords.has(w)); + } + return false; + } +} diff --git a/src/engine/nlp/formalAnnotator.ts b/src/engine/nlp/formalAnnotator.ts new file mode 100644 index 0000000..c9f894b --- /dev/null +++ b/src/engine/nlp/formalAnnotator.ts @@ -0,0 +1,294 @@ +import { AlethicAssertoric } from '../../language/shared/types'; +import { + AnnotatedSentence, + ConnectiveAnnotation, + ModalAnnotation, + MoodType, + NegationAnnotation, + PropositionAnnotation, + QuantifierAnnotation, + SentenceFeatures, +} from './nlpTypes'; +import { NaturalLanguageSyntaxParser } from '../syntax/naturalLanguageSyntaxParser'; + +// --------------------------------------------------------------------------- +// Trigger tables +// --------------------------------------------------------------------------- + +/** Each entry: [trigger text, formal operator]. + * Sorted longest-first so longer phrases match before shorter prefixes. */ +const CONNECTIVE_TRIGGERS: Array<[string, '&' | '|' | '->' | '<->']> = [ + ['if and only if', '<->'], + ['iff', '<->'], + ['just in case', '<->'], + ['if … then', '->'], + ['if...then', '->'], + ['only if', '->'], + ['implies', '->'], + ['entails', '->'], + ['if ', '->'], // bare "if" — low specificity, kept last among -> + ['then ', '->'], + ['both … and', '&'], + ['both ... and', '&'], + ['furthermore', '&'], + ['moreover', '&'], + ['but', '&'], + ['and', '&'], + ['either … or', '|'], + ['either ... or', '|'], + ['unless', '|'], + ['or', '|'], +]; + +const QUANTIFIER_TRIGGERS: Array<[string, '∀' | '∃' | '¬∃']> = [ + ['for all', '∀'], + ['for every', '∀'], + ['every', '∀'], + ['each', '∀'], + ['any ', '∀'], + ['all ', '∀'], + ['there exists', '∃'], + ['there is a', '∃'], + ['there are', '∃'], + ['there is', '∃'], + ['at least one', '∃'], + ['some ', '∃'], + ['no one', '¬∃'], + ['nobody', '¬∃'], + ['nothing', '¬∃'], + ['none', '¬∃'], + ['no ', '¬∃'], + ['never', '¬∃'], +]; + +const MODAL_TRIGGERS: Array<[string, '□' | '◇']> = [ + ['it is necessary that', '□'], + ['it is certain that', '□'], + ['necessarily', '□'], + ['certainly', '□'], + ['must ', '□'], + ['it is possible that', '◇'], + ['it is conceivable that', '◇'], + ['possibly', '◇'], + ['perhaps', '◇'], + ['maybe', '◇'], + ['might ', '◇'], + ['may ', '◇'], + ['could ', '◇'], +]; + +const NEGATION_TRIGGERS: string[] = [ + 'it is not the case that', + 'it is false that', + 'not', + 'never', + 'no ', +]; + +// --------------------------------------------------------------------------- +// Label generator +// --------------------------------------------------------------------------- + +const PROP_LABELS = 'pqrstuvwxyz'.split(''); + +function labelFor(index: number): string { + if (index < PROP_LABELS.length) return PROP_LABELS[index]; + // fallback: p0, p1, … + return `p${index}`; +} + +// --------------------------------------------------------------------------- +// Span search +// --------------------------------------------------------------------------- + +/** + * Find all non-overlapping occurrences of `trigger` in `text` (case-insensitive). + * Returns an array of [start, end) spans. + */ +function findSpans(text: string, trigger: string): Array<[number, number]> { + const spans: Array<[number, number]> = []; + const lower = text.toLowerCase(); + const tLower = trigger.toLowerCase(); + let pos = 0; + while (pos < lower.length) { + const idx = lower.indexOf(tLower, pos); + if (idx === -1) break; + spans.push([idx, idx + trigger.length]); + pos = idx + trigger.length; + } + return spans; +} + +/** + * Check whether `span` overlaps any span in `occupied`. + */ +function overlaps(span: [number, number], occupied: Array<[number, number]>): boolean { + return occupied.some(([s, e]) => span[0] < e && span[1] > s); +} + +// --------------------------------------------------------------------------- +// FormalAnnotator +// --------------------------------------------------------------------------- + +/** + * Extracts logical features (connectives, quantifiers, modal adverbs, + * negations, atomic proposition candidates) from assertoric sentences. + */ +export class FormalAnnotator { + + private readonly _nlParser = new NaturalLanguageSyntaxParser(); + + /** + * Annotate a single assertoric sentence. + * + * @param sentence - The assertoric sentence to annotate. + * @returns An `AnnotatedSentence` with fully populated features and syntax tree. + */ + annotate(sentence: AlethicAssertoric): AnnotatedSentence { + const features = this._extractFeatures(sentence.raw); + const syntaxTree = this._nlParser.parse(sentence.raw); + return { source: sentence, features, syntaxTree }; + } + + /** + * Annotate a batch of assertoric sentences. + * + * @param sentences - The sentences to annotate. + * @returns An `AnnotatedSentence` for each input, in order. + */ + annotateAll(sentences: AlethicAssertoric[]): AnnotatedSentence[] { + return sentences.map(s => this.annotate(s)); + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + private _extractFeatures(raw: string): SentenceFeatures { + const occupied: Array<[number, number]> = []; + + const connectives = this._findConnectives(raw, occupied); + const quantifiers = this._findQuantifiers(raw, occupied); + const modalAdverbs = this._findModalAdverbs(raw, occupied); + const negations = this._findNegations(raw, occupied); + const propositions = this._extractPropositions(raw, occupied); + const mood = this._detectMood(raw); + + return { mood, connectives, quantifiers, modalAdverbs, negations, propositions }; + } + + private _detectMood(raw: string): MoodType { + if (raw.trimEnd().endsWith('?')) return 'interrogative'; + if (raw.trimEnd().endsWith('!')) return 'exclamatory'; + const first = raw.trim().split(/\s+/)[0]?.toLowerCase() ?? ''; + const imperativeStarters = new Set([ + 'go', 'stop', 'run', 'please', 'do', 'make', 'let', 'be', 'come', + 'take', 'get', 'give', 'look', 'show', 'tell', 'try', 'use', 'find', + 'put', 'keep', 'turn', 'open', 'close', 'start', 'wait', 'help', 'move', + ]); + if (imperativeStarters.has(first)) return 'imperative'; + return 'declarative'; + } + + private _findConnectives( + raw: string, + occupied: Array<[number, number]>, + ): ConnectiveAnnotation[] { + const results: ConnectiveAnnotation[] = []; + for (const [trigger, operator] of CONNECTIVE_TRIGGERS) { + for (const span of findSpans(raw, trigger)) { + if (!overlaps(span, occupied)) { + results.push({ text: raw.slice(span[0], span[1]), operator, span }); + occupied.push(span); + } + } + } + return results.sort((a, b) => a.span[0] - b.span[0]); + } + + private _findQuantifiers( + raw: string, + occupied: Array<[number, number]>, + ): QuantifierAnnotation[] { + const results: QuantifierAnnotation[] = []; + for (const [trigger, quantifier] of QUANTIFIER_TRIGGERS) { + for (const span of findSpans(raw, trigger)) { + if (!overlaps(span, occupied)) { + results.push({ text: raw.slice(span[0], span[1]), quantifier, span }); + occupied.push(span); + } + } + } + return results.sort((a, b) => a.span[0] - b.span[0]); + } + + private _findModalAdverbs( + raw: string, + occupied: Array<[number, number]>, + ): ModalAnnotation[] { + const results: ModalAnnotation[] = []; + for (const [trigger, operator] of MODAL_TRIGGERS) { + for (const span of findSpans(raw, trigger)) { + if (!overlaps(span, occupied)) { + results.push({ text: raw.slice(span[0], span[1]), operator, span }); + occupied.push(span); + } + } + } + return results.sort((a, b) => a.span[0] - b.span[0]); + } + + private _findNegations( + raw: string, + occupied: Array<[number, number]>, + ): NegationAnnotation[] { + const results: NegationAnnotation[] = []; + for (const trigger of NEGATION_TRIGGERS) { + for (const span of findSpans(raw, trigger)) { + if (!overlaps(span, occupied)) { + results.push({ text: raw.slice(span[0], span[1]), span }); + occupied.push(span); + } + } + } + return results.sort((a, b) => a.span[0] - b.span[0]); + } + + /** + * Extract atomic proposition candidates — the maximal text spans that remain + * after all detected logical markers have been removed. + * + * The occupied array contains the spans of all already-detected markers. + * We invert it to find the "free" text spans, then trim and discard blanks. + */ + private _extractPropositions( + raw: string, + occupied: Array<[number, number]>, + ): PropositionAnnotation[] { + // Sort occupied spans by start position + const sorted = [...occupied].sort((a, b) => a[0] - b[0]); + + // Compute free spans (gaps between occupied spans) + const free: Array<[number, number]> = []; + let cursor = 0; + for (const [s, e] of sorted) { + if (cursor < s) free.push([cursor, s]); + cursor = Math.max(cursor, e); + } + if (cursor < raw.length) free.push([cursor, raw.length]); + + // Build PropositionAnnotation for each non-trivial free span + const props: PropositionAnnotation[] = []; + let labelIndex = 0; + for (const [s, e] of free) { + const text = raw.slice(s, e).replace(/[,;:.!?"'()\[\]{}]/g, '').trim(); + if (text.length < 2) continue; + props.push({ + text, + label: labelFor(labelIndex++), + span: [s, e], + }); + } + return props; + } +} diff --git a/src/engine/nlp/formalTranslator.ts b/src/engine/nlp/formalTranslator.ts new file mode 100644 index 0000000..f18a14d --- /dev/null +++ b/src/engine/nlp/formalTranslator.ts @@ -0,0 +1,216 @@ +import { SentenceSet } from '../../language/shared/types'; +import { + AnnotatedSentence, + ConnectiveAnnotation, + FormalTranslationSet, + ModalSentenceTranslation, + ModalTranslation, + PropositionalSentenceTranslation, + PropositionalTranslation, + QuantificationalSentenceTranslation, + QuantificationalTranslation, +} from './nlpTypes'; + +// --------------------------------------------------------------------------- +// Propositional formula builder +// --------------------------------------------------------------------------- + +/** + * Build a propositional formula string from an annotated sentence. + * + * Algorithm: + * - 0 connectives → single atom label for the whole sentence. + * - 1 connective → split sentence at the connective span; left half → first + * proposition label, right half → second. + * - N connectives → flatten left-to-right as a chain: + * p1 OP1 p2 OP2 p3 … + * + * Negations that precede the entire formula are prefixed with '~'. + * Returns both the formula string and the proposition map. + */ +function buildPropositionalFormula( + annotated: AnnotatedSentence, +): { formulaString: string; propositionMap: Record } { + const { features } = annotated; + const props = features.propositions; + const connectives = features.connectives; + + if (props.length === 0) { + return { formulaString: 'p', propositionMap: { p: annotated.source.raw } }; + } + + const propMap: Record = {}; + for (const p of props) { + propMap[p.label] = p.text; + } + + if (connectives.length === 0) { + // Single atom — whole sentence is one proposition + const label = props[0].label; + const negated = _isNegated(annotated); + return { + formulaString: negated ? `~${label}` : label, + propositionMap: propMap, + }; + } + + // Build formula by interleaving prop labels and operators + const parts: string[] = []; + for (let i = 0; i < props.length; i++) { + if (i > 0 && i - 1 < connectives.length) { + parts.push(connectives[i - 1].operator); + } + parts.push(props[i].label); + } + + // If more connectives than gaps between props (e.g. sentence starts with "if"), + // prepend any leftover ops + const extraOps = connectives.length - (props.length - 1); + if (extraOps > 0) { + // Re-build using only the ops that fit between detected props + // (extra ops at start/end are artefacts of trigger matching — ignore them) + } + + let formulaString = parts.join(' '); + + // Wrap with outer negation if the whole sentence is negated + if (_isNegated(annotated)) { + formulaString = `~(${formulaString})`; + } + + return { formulaString, propositionMap: propMap }; +} + +/** + * Return true if the sentence has a sentence-level negation (i.e. negation + * that precedes the first proposition or connective). + */ +function _isNegated(annotated: AnnotatedSentence): boolean { + const { negations, propositions, connectives } = annotated.features; + if (negations.length === 0) return false; + + const firstPropStart = propositions[0]?.span[0] ?? Infinity; + const firstConnStart = connectives[0]?.span[0] ?? Infinity; + const firstLogicalStart = Math.min(firstPropStart, firstConnStart); + + // A negation that appears before the first logical element is sentence-level + return negations.some(n => n.span[0] < firstLogicalStart); +} + +// --------------------------------------------------------------------------- +// Quantifier prefix builder +// --------------------------------------------------------------------------- + +function buildQuantifierPrefix(annotated: AnnotatedSentence): string | null { + const { quantifiers } = annotated.features; + if (quantifiers.length === 0) return null; + // Use the first detected quantifier — most sentences have at most one + const q = quantifiers[0]; + if (q.quantifier === '¬∃') return '¬∃x'; + return `${q.quantifier}x`; +} + +function suggestPredicate(annotated: AnnotatedSentence): string | null { + // Derive a predicate name from the first proposition text: + // take the first content word, capitalise it, truncate to 8 chars. + const firstProp = annotated.features.propositions[0]; + if (!firstProp) return null; + const words = firstProp.text.trim().split(/\s+/); + const word = words.find(w => w.length > 2) ?? words[0]; + if (!word) return null; + return word.charAt(0).toUpperCase() + word.slice(1, 8).toLowerCase(); +} + +// --------------------------------------------------------------------------- +// Modal prefix builder +// --------------------------------------------------------------------------- + +function buildModalPrefix(annotated: AnnotatedSentence): string | null { + const { modalAdverbs } = annotated.features; + if (modalAdverbs.length === 0) return null; + return modalAdverbs[0].operator; +} + +// --------------------------------------------------------------------------- +// FormalTranslator +// --------------------------------------------------------------------------- + +/** + * Produces a `FormalTranslationSet` from a list of annotated sentences. + * + * Each translation layer (propositional, quantificational, modal) provides + * formula strings and annotation metadata that guide population of the + * corresponding theory builder. Full WFF/QFF/MFF object construction + * is deferred to the SyntaxEngine layer (not yet implemented). + */ +export class FormalTranslator { + + /** + * Translate an annotated sentence set into all three formal languages. + * + * @param source - The original `SentenceSet`. + * @param annotated - Feature-annotated sentences (same order as source.sentences). + * @returns A `FormalTranslationSet` with propositional, quantificational, + * and modal translations. + */ + translate(source: SentenceSet, annotated: AnnotatedSentence[]): FormalTranslationSet { + return { + source, + propositional: this._translatePropositional(annotated), + quantificational: this._translateQuantificational(annotated), + modal: this._translateModal(annotated), + }; + } + + // ------------------------------------------------------------------------- + // Propositional + // ------------------------------------------------------------------------- + + private _translatePropositional(annotated: AnnotatedSentence[]): PropositionalTranslation { + const sentences: PropositionalSentenceTranslation[] = annotated.map(a => { + const { formulaString, propositionMap } = buildPropositionalFormula(a); + return { source: a.source, propositionMap, formulaString }; + }); + return { sentences }; + } + + // ------------------------------------------------------------------------- + // Quantificational + // ------------------------------------------------------------------------- + + private _translateQuantificational( + annotated: AnnotatedSentence[], + ): QuantificationalTranslation { + const sentences: QuantificationalSentenceTranslation[] = annotated.map(a => { + const { formulaString: baseFormula, propositionMap } = buildPropositionalFormula(a); + const quantifierPrefix = buildQuantifierPrefix(a); + const suggestedPredicate = suggestPredicate(a); + + // Prefix the formula with the quantifier if one was detected + const formulaString = quantifierPrefix + ? `${quantifierPrefix}. ${baseFormula}` + : baseFormula; + + return { source: a.source, propositionMap, quantifierPrefix, suggestedPredicate, formulaString }; + }); + return { sentences }; + } + + // ------------------------------------------------------------------------- + // Modal + // ------------------------------------------------------------------------- + + private _translateModal(annotated: AnnotatedSentence[]): ModalTranslation { + const sentences: ModalSentenceTranslation[] = annotated.map(a => { + const { formulaString: baseFormula, propositionMap } = buildPropositionalFormula(a); + const modalPrefix = buildModalPrefix(a); + + const formulaString = modalPrefix + ? `${modalPrefix}(${baseFormula})` + : baseFormula; + + return { source: a.source, propositionMap, modalPrefix, formulaString }; + }); + return { sentences }; + } +} diff --git a/src/engine/nlp/nlpTypes.ts b/src/engine/nlp/nlpTypes.ts index 7d01014..475ab9b 100644 --- a/src/engine/nlp/nlpTypes.ts +++ b/src/engine/nlp/nlpTypes.ts @@ -1,4 +1,5 @@ import { AlethicAssertoric, SentenceSet } from '../../language/shared/types'; +import { SyntaxTree } from '../syntax/syntaxTypes'; export { AlethicAssertoric }; @@ -80,6 +81,8 @@ export interface SentenceFeatures { export interface AnnotatedSentence { source: AlethicAssertoric; features: SentenceFeatures; + /** Constituency syntax tree for this sentence. */ + syntaxTree: SyntaxTree; } // --------------------------------------------------------------------------- diff --git a/src/engine/nlp/sentenceClassifier.ts b/src/engine/nlp/sentenceClassifier.ts new file mode 100644 index 0000000..fbf779f --- /dev/null +++ b/src/engine/nlp/sentenceClassifier.ts @@ -0,0 +1,108 @@ +import { AlethicAssertoric } from '../../language/shared/types'; +import { MoodType } from './nlpTypes'; + +// --------------------------------------------------------------------------- +// Imperative verb starters — sentences beginning with these words are treated +// as commands and excluded from the assertoric candidate set. +// --------------------------------------------------------------------------- +const IMPERATIVE_STARTERS = new Set([ + 'go', 'stop', 'run', 'please', 'do', "don't", 'make', 'let', 'be', + 'come', 'take', 'get', 'give', 'look', 'show', 'tell', 'try', 'use', + 'find', 'put', 'keep', 'turn', 'open', 'close', 'start', 'wait', + 'help', 'move', 'consider', 'note', 'remember', 'imagine', 'suppose', + 'assume', 'define', 'write', 'read', 'listen', 'watch', +]); + +// --------------------------------------------------------------------------- +// Confidence signal patterns +// --------------------------------------------------------------------------- + +const COPULA_RE = + /\b(is|are|was|were|will be|has been|have been|had been)\b/i; + +const EPISTEMIC_MARKER_RE = + /\b(necessarily|must|certainly|it is certain|it is a fact|it is clear)\b/i; + +const HEDGING_RE = + /\b(maybe|perhaps|i think|i believe|probably|i suppose|i guess|seemingly|apparently)\b/i; + +const IT_IS_THERE_RE = /^(it is|there is|there are)\b/i; + +// A rough subject-verb check: starts with a capitalised noun phrase then a verb +const SUBJ_VERB_RE = /^[A-Z][a-z]+\s+(?:\w+\s+)?(is|are|was|were|has|have|had|will|can|could|should|would|does|did|makes|takes|gives|shows|proves|implies|entails|follows)\b/; + +// --------------------------------------------------------------------------- +// Classifier +// --------------------------------------------------------------------------- + +/** + * Classifies a sentence string as alethic assertoric or not, and scores + * its confidence. + * + * All classification is rule-based — no external dependencies. + */ +export class SentenceClassifier { + + /** + * Attempt to classify a single sentence string. + * + * @param sentence - The trimmed sentence string to classify. + * @returns An `AlethicAssertoric` if the sentence is assertoric, or `null` + * if it is interrogative, imperative, or exclamatory. + */ + classify(sentence: string): AlethicAssertoric | null { + const mood = this._detectMood(sentence); + if (mood !== 'declarative') return null; + + const confidence = this._scoreConfidence(sentence); + return { raw: sentence, confidence }; + } + + /** + * Classify a batch of sentence strings, discarding non-assertoric ones. + * + * @param sentences - The sentence strings to classify. + * @returns Only the assertoric candidates, in input order. + */ + classifyAll(sentences: string[]): AlethicAssertoric[] { + const results: AlethicAssertoric[] = []; + for (const s of sentences) { + const r = this.classify(s); + if (r !== null) results.push(r); + } + return results; + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + private _detectMood(sentence: string): MoodType { + const trimmed = sentence.trim(); + + if (trimmed.endsWith('?')) return 'interrogative'; + if (trimmed.endsWith('!')) return 'exclamatory'; + + // Imperative: first word (lowercased) is a known imperative starter + const firstWord = trimmed.split(/\s+/)[0]?.toLowerCase() ?? ''; + if (IMPERATIVE_STARTERS.has(firstWord)) return 'imperative'; + + return 'declarative'; + } + + private _scoreConfidence(sentence: string): number { + let score = 0.5; + + if (COPULA_RE.test(sentence)) score += 0.15; + if (EPISTEMIC_MARKER_RE.test(sentence)) score += 0.15; + if (HEDGING_RE.test(sentence)) score -= 0.15; + if (IT_IS_THERE_RE.test(sentence)) score += 0.10; + if (SUBJ_VERB_RE.test(sentence)) score += 0.10; + + const wordCount = sentence.trim().split(/\s+/).length; + if (wordCount < 4) score -= 0.10; + + // Clamp to [0.05, 1.0] + return Math.min(1.0, Math.max(0.05, score)); + } +} diff --git a/src/engine/nlp/textSegmenter.ts b/src/engine/nlp/textSegmenter.ts new file mode 100644 index 0000000..a2fa74b --- /dev/null +++ b/src/engine/nlp/textSegmenter.ts @@ -0,0 +1,130 @@ +/** + * TextSegmenter — splits raw text into sentence-candidate strings. + * + * Uses a regex-based boundary detector that respects common abbreviations + * and decimal numbers. Supports both eager (string) and lazy (AsyncIterable) + * input. + */ + +/** Known abbreviations that contain a dot but do not end a sentence. */ +const ABBREVIATIONS = new Set([ + 'mr', 'mrs', 'ms', 'dr', 'prof', 'sr', 'jr', 'st', 'vs', 'etc', + 'e.g', 'i.e', 'approx', 'dept', 'est', 'govt', 'inc', 'ltd', 'jan', + 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', +]); + +/** + * Return true if the word immediately before the dot is a known abbreviation + * or a single capital letter (initials). + */ +function isAbbreviation(text: string, dotIndex: number): boolean { + // grab the word before the dot + const before = text.slice(0, dotIndex).trimEnd(); + const match = before.match(/([A-Za-z.]+)$/); + if (!match) return false; + const word = match[1].toLowerCase(); + if (ABBREVIATIONS.has(word)) return true; + // single letter — likely an initial + if (/^[a-z]$/.test(word)) return true; + return false; +} + +/** + * Return true if the dot is part of a decimal number (e.g. "3.14"). + */ +function isDecimal(text: string, dotIndex: number): boolean { + const charBefore = text[dotIndex - 1]; + const charAfter = text[dotIndex + 1]; + return /\d/.test(charBefore) && /\d/.test(charAfter ?? ''); +} + +/** + * Split a single contiguous text block into sentence strings. + * Each returned string is trimmed and non-empty. + */ +function splitBlock(text: string): string[] { + const results: string[] = []; + let start = 0; + + for (let i = 0; i < text.length; i++) { + const ch = text[i]; + + if (ch === '?' || ch === '!') { + // These always end a sentence (peek ahead for whitespace) + const segment = text.slice(start, i + 1).trim(); + if (segment.length > 0) results.push(segment); + start = i + 1; + // skip any trailing whitespace so next segment starts cleanly + while (start < text.length && /\s/.test(text[start])) start++; + i = start - 1; + continue; + } + + if (ch === '.') { + if (isDecimal(text, i)) continue; + if (isAbbreviation(text, i)) continue; + + // Check what follows: must be whitespace then an uppercase letter + // or end of string + const rest = text.slice(i + 1); + const followMatch = rest.match(/^(\s+)([A-Z"']|$)/); + if (followMatch) { + const segment = text.slice(start, i + 1).trim(); + if (segment.length > 0) results.push(segment); + start = i + 1 + followMatch[1].length; + i = start - 1; + } + } + } + + // Remaining text after last boundary + const tail = text.slice(start).trim(); + if (tail.length > 0) results.push(tail); + + return results; +} + +export class TextSegmenter { + + /** + * Split a raw text string into sentence-candidate strings. + * + * Paragraph breaks (double newlines) are always treated as sentence + * boundaries. Within each paragraph, boundary detection uses punctuation + * heuristics that respect abbreviations and decimal numbers. + * + * @param text - The raw input text. + * @returns An array of trimmed, non-empty sentence strings. + */ + segment(text: string): string[] { + // Split on paragraph breaks first, then apply per-block splitting + const paragraphs = text.split(/\n\s*\n/); + const sentences: string[] = []; + for (const para of paragraphs) { + const trimmed = para.trim(); + if (trimmed.length === 0) continue; + for (const sent of splitBlock(trimmed)) { + sentences.push(sent); + } + } + return sentences; + } + + /** + * Asynchronously collect chunks from an AsyncIterable source and segment them. + * + * Works with any source that produces string chunks: Node.js ReadableStream + * (in object mode or piped through a text decoder), web ReadableStream readers, + * or any other async iterator of strings. + * + * @param source - An async iterable yielding string chunks. + * @returns A promise resolving to an array of sentence strings. + */ + async segmentStream(source: AsyncIterable): Promise { + let buffer = ''; + for await (const chunk of source) { + buffer += chunk; + } + return this.segment(buffer); + } +} diff --git a/src/engine/syntax/index.ts b/src/engine/syntax/index.ts new file mode 100644 index 0000000..92dea51 --- /dev/null +++ b/src/engine/syntax/index.ts @@ -0,0 +1,4 @@ +export * from './syntaxTypes'; +export * from './syntaxTreePrinter'; +export * from './naturalLanguageSyntaxParser'; +export * from './propositional/syntaxEngine'; diff --git a/src/engine/syntax/naturalLanguageSyntaxParser.ts b/src/engine/syntax/naturalLanguageSyntaxParser.ts new file mode 100644 index 0000000..b32e60e --- /dev/null +++ b/src/engine/syntax/naturalLanguageSyntaxParser.ts @@ -0,0 +1,517 @@ +import { + PhraseLabel, + PhraseNode, + POSTag, + SYNTAX_SCHEMA_VERSION, + SyntaxNode, + SyntaxTree, + TaggedToken, + TerminalNode, +} from './syntaxTypes'; + +// --------------------------------------------------------------------------- +// POS tag lexicons +// --------------------------------------------------------------------------- + +const DETERMINERS = new Set([ + 'the', 'a', 'an', 'this', 'that', 'these', 'those', +]); + +const QUANTIFIERS = new Set([ + 'all', 'every', 'each', 'any', 'some', 'most', 'few', 'many', + 'both', 'either', 'neither', 'much', 'several', 'enough', +]); + +const PRONOUNS = new Set([ + 'i', 'me', 'my', 'mine', 'myself', + 'you', 'your', 'yours', 'yourself', + 'he', 'him', 'his', 'himself', + 'she', 'her', 'hers', 'herself', + 'it', 'its', 'itself', + 'we', 'us', 'our', 'ours', 'ourselves', + 'they', 'them', 'their', 'theirs', 'themselves', + 'one', 'ones', +]); + +const COPULAS = new Set([ + 'is', 'are', 'was', 'were', 'am', 'be', 'been', 'being', +]); + +// Non-modal auxiliaries +const AUXILIARIES = new Set([ + 'have', 'has', 'had', 'do', 'does', 'did', +]); + +const MODALS = new Set([ + 'must', 'can', 'could', 'should', 'would', 'may', 'might', + 'shall', 'will', 'ought', 'need', 'dare', +]); + +// Sentence-adverb / modal adverbs (ADV tag — signals modal force) +const MODAL_ADVERBS = new Set([ + 'necessarily', 'possibly', 'certainly', 'probably', 'possibly', + 'contingently', 'necessarily', 'actually', 'actually', +]); + +const NEGATIONS = new Set(['not', 'never', "n't"]); + +const PREPOSITIONS = new Set([ + 'in', 'on', 'at', 'of', 'by', 'for', 'with', 'about', 'from', + 'to', 'into', 'onto', 'under', 'over', 'through', 'between', + 'among', 'during', 'after', 'before', 'above', 'below', 'beside', + 'behind', 'beyond', 'within', 'without', 'against', 'along', + 'around', 'near', 'off', 'out', 'up', 'down', 'across', + 'throughout', 'toward', 'towards', +]); + +const CONJUNCTIONS = new Set(['and', 'or', 'but', 'nor', 'yet', 'so', 'for']); + +// Complementizers (subordinating conjunctions) +const COMPLEMENTIZERS = new Set([ + 'that', 'if', 'whether', 'because', 'since', 'although', + 'though', 'unless', 'until', 'when', 'where', 'while', + 'whereas', 'as', 'than', +]); + +// Words that appear as particles in structural positions (e.g. "if…then") +const PARTICLES = new Set(['then', 'hence', 'therefore', 'thus']); + +const PUNCTUATION = new Set(['.', ',', ';', ':', '!', '?', '"', "'", '(', ')']); + +// --------------------------------------------------------------------------- +// Morphological heuristics (fallback POS assignment) +// --------------------------------------------------------------------------- + +const ADJ_SUFFIXES = [ + 'al', 'ous', 'ive', 'ic', 'ful', 'less', 'able', 'ible', + 'ary', 'ory', 'ish', 'like', 'some', 'ward', 'wise', +]; +const ADV_SUFFIXES = ['ly']; +const NOUN_SUFFIXES = [ + 'tion', 'sion', 'ment', 'ness', 'ity', 'ism', 'ist', + 'ance', 'ence', 'hood', 'ship', 'age', 'ure', +]; +const VERB_SUFFIXES = ['ing', 'ed', 'ize', 'ise', 'ify', 'en']; + +function morphTag(word: string, isFirstToken: boolean): POSTag { + const lower = word.toLowerCase(); + + for (const suf of ADV_SUFFIXES) if (lower.endsWith(suf) && lower.length > suf.length + 1) return 'ADV'; + for (const suf of ADJ_SUFFIXES) if (lower.endsWith(suf) && lower.length > suf.length + 1) return 'ADJ'; + for (const suf of NOUN_SUFFIXES) if (lower.endsWith(suf) && lower.length > suf.length + 1) return 'N'; + for (const suf of VERB_SUFFIXES) if (lower.endsWith(suf) && lower.length > suf.length + 1) return 'V'; + + // Capitalized mid-sentence → proper noun + if (!isFirstToken && /^[A-Z]/.test(word)) return 'PN'; + + return 'N'; // default +} + +// --------------------------------------------------------------------------- +// POS tagger +// --------------------------------------------------------------------------- + +function tagToken(text: string, index: number, total: number): POSTag { + const lower = text.toLowerCase().replace(/[.,;:!?"'()]+$/, ''); + const isFirst = index === 0; + + if (PUNCTUATION.has(text)) return 'PUNCT'; + if (lower === 'no' && index === 0) return 'QUANT'; // "No man is..." + if (lower === 'no') return 'NEG'; + if (NEGATIONS.has(lower)) return 'NEG'; + if (PARTICLES.has(lower)) return 'PART'; + if (COMPLEMENTIZERS.has(lower)) return 'COMP'; + if (MODAL_ADVERBS.has(lower)) return 'ADV'; + if (CONJUNCTIONS.has(lower)) return 'CONJ'; + if (PREPOSITIONS.has(lower)) return 'PREP'; + if (MODALS.has(lower)) return 'MODAL'; + if (COPULAS.has(lower)) return 'COP'; + if (AUXILIARIES.has(lower)) return 'AUX'; + if (PRONOUNS.has(lower)) return 'PRON'; + if (QUANTIFIERS.has(lower)) return 'QUANT'; + if (DETERMINERS.has(lower)) return 'DET'; + + return morphTag(text, isFirst); +} + +function tagAll(tokens: string[]): TaggedToken[] { + return tokens.map((text, i) => ({ + text, + pos: tagToken(text, i, tokens.length), + index: i, + })); +} + +// --------------------------------------------------------------------------- +// Tokenizer +// --------------------------------------------------------------------------- + +function tokenize(sentence: string): string[] { + // Normalise smart quotes; split contractions (can't → can n't) + const normalized = sentence + .replace(/['']/g, "'") + .replace(/n't\b/g, " n't") + .replace(/,/g, ' ,') + .replace(/;/g, ' ;') + .replace(/:/g, ' :') + .replace(/\./g, ' .') + .replace(/!/g, ' !') + .replace(/\?/g, ' ?'); + return normalized.trim().split(/\s+/).filter(t => t.length > 0); +} + +// --------------------------------------------------------------------------- +// Parse helpers — tree construction utilities +// --------------------------------------------------------------------------- + +function terminal(tok: TaggedToken): TerminalNode { + return { kind: 'terminal', pos: tok.pos, text: tok.text, index: tok.index }; +} + +function phrase(label: PhraseLabel, children: SyntaxNode[]): PhraseNode { + const leaves = flatLeaves(children); + const startIndex = leaves.length > 0 ? leaves[0].index : 0; + const endIndex = leaves.length > 0 ? leaves[leaves.length - 1].index + 1 : 0; + return { kind: 'phrase', label, children, startIndex, endIndex }; +} + +function flatLeaves(nodes: SyntaxNode[]): TerminalNode[] { + const out: TerminalNode[] = []; + for (const n of nodes) { + if (n.kind === 'terminal') { + out.push(n); + } else { + out.push(...flatLeaves(n.children)); + } + } + return out; +} + +// --------------------------------------------------------------------------- +// Parser cursor +// --------------------------------------------------------------------------- + +class Cursor { + constructor( + private readonly tokens: TaggedToken[], + private pos: number = 0, + ) {} + + get position(): number { return this.pos; } + get remaining(): number { return this.tokens.length - this.pos; } + done(): boolean { return this.pos >= this.tokens.length; } + + peek(offset = 0): TaggedToken | null { + return this.tokens[this.pos + offset] ?? null; + } + + is(...tags: POSTag[]): boolean { + const tok = this.peek(); + return tok !== null && tags.includes(tok.pos); + } + + consume(): TaggedToken { + if (this.done()) throw new Error('Unexpected end of token stream.'); + return this.tokens[this.pos++]; + } + + save(): number { return this.pos; } + restore(saved: number): void { this.pos = saved; } +} + +// --------------------------------------------------------------------------- +// Recursive-descent phrase parser +// --------------------------------------------------------------------------- + +/** + * Parse an Adjective Phrase: ADJ+ + * Returns null if no adjective at the current position. + */ +function parseAP(cur: Cursor): PhraseNode | null { + if (!cur.is('ADJ')) return null; + const children: SyntaxNode[] = []; + while (cur.is('ADJ')) { + children.push(terminal(cur.consume())); + } + return phrase('AP', children); +} + +/** + * Parse an Adverb Phrase: ADV+ + * Returns null if no adverb at the current position. + */ +function parseAdvP(cur: Cursor): PhraseNode | null { + if (!cur.is('ADV')) return null; + const children: SyntaxNode[] = []; + while (cur.is('ADV')) { + children.push(terminal(cur.consume())); + } + return phrase('AdvP', children); +} + +/** + * Parse a Noun Phrase: (DET | QUANT)? ADJ* (N | PN | PRON)+ + * Returns null if no NP can be formed. + */ +function parseNP(cur: Cursor): PhraseNode | null { + const saved = cur.save(); + const children: SyntaxNode[] = []; + + // Optional determiner or quantifier + if (cur.is('DET', 'QUANT')) { + children.push(terminal(cur.consume())); + } + + // Optional adjectives + while (cur.is('ADJ')) { + children.push(terminal(cur.consume())); + } + + // At least one noun head + if (!cur.is('N', 'PN', 'PRON')) { + cur.restore(saved); + return null; + } + while (cur.is('N', 'PN')) { + children.push(terminal(cur.consume())); + } + + return phrase('NP', children); +} + +/** + * Parse a Prepositional Phrase: PREP NP + * Returns null if no PP can be formed. + */ +function parsePP(cur: Cursor): PhraseNode | null { + if (!cur.is('PREP')) return null; + const saved = cur.save(); + const children: SyntaxNode[] = []; + + children.push(terminal(cur.consume())); // PREP + const np = parseNP(cur); + if (np === null) { + cur.restore(saved); + return null; + } + children.push(np); + return phrase('PP', children); +} + +/** + * Parse a VP complement: AP | NP (PP)* | PP + * The complement follows the head verb/copula/modal. + */ +function parseVPComplement(cur: Cursor): SyntaxNode[] { + const children: SyntaxNode[] = []; + + // Try AP + const ap = parseAP(cur); + if (ap) { + children.push(ap); + } else { + // Try NP + const np = parseNP(cur); + if (np) { + children.push(np); + } + } + + // Optional PP modifiers + let pp = parsePP(cur); + while (pp) { + children.push(pp); + pp = parsePP(cur); + } + + return children; +} + +/** + * Parse a Verb Phrase. + * + * Patterns handled: + * COP (NEG)? (AP | NP | PP)* + * MODAL (NEG)? COP? (AP | NP | PP)* + * MODAL (NEG)? V (NP)? + * AUX (NEG)? (COP | V) (NP | AP)* + * V (NEG)? (NP | PP)* + * NEG VP + * + * Returns null if nothing resembling a VP is found. + */ +function parseVP(cur: Cursor): PhraseNode | null { + if (!cur.is('COP', 'MODAL', 'AUX', 'V', 'NEG')) return null; + + const children: SyntaxNode[] = []; + + // Leading NEG ("not mortal" / "never true") + if (cur.is('NEG') && !cur.is('COP', 'MODAL', 'V', 'AUX')) { + children.push(terminal(cur.consume())); + } + + // MODAL head + if (cur.is('MODAL')) { + children.push(terminal(cur.consume())); + if (cur.is('NEG')) children.push(terminal(cur.consume())); + if (cur.is('COP')) children.push(terminal(cur.consume())); + else if (cur.is('AUX')) children.push(terminal(cur.consume())); + if (cur.is('V')) children.push(terminal(cur.consume())); + children.push(...parseVPComplement(cur)); + return phrase('VP', children); + } + + // AUX head (have been, do not, etc.) + if (cur.is('AUX')) { + children.push(terminal(cur.consume())); + if (cur.is('NEG')) children.push(terminal(cur.consume())); + if (cur.is('COP', 'V')) children.push(terminal(cur.consume())); + children.push(...parseVPComplement(cur)); + return phrase('VP', children); + } + + // COP head (is, are, was, were) + if (cur.is('COP')) { + children.push(terminal(cur.consume())); + if (cur.is('NEG')) children.push(terminal(cur.consume())); + children.push(...parseVPComplement(cur)); + return phrase('VP', children); + } + + // Main V head + if (cur.is('V')) { + children.push(terminal(cur.consume())); + if (cur.is('NEG')) children.push(terminal(cur.consume())); + const np = parseNP(cur); + if (np) children.push(np); + let pp = parsePP(cur); + while (pp) { children.push(pp); pp = parsePP(cur); } + return phrase('VP', children); + } + + return null; +} + +/** + * Parse a Complementizer Phrase (subordinate clause) for "if…then" structures. + * Consumes: COMP [S-body] (PART "then")? + */ +function parseCP(cur: Cursor, parseS: (c: Cursor) => PhraseNode): PhraseNode | null { + if (!cur.is('COMP')) return null; + const children: SyntaxNode[] = []; + + children.push(terminal(cur.consume())); // COMP "if" + + // Parse the embedded S + const embedded = parseS(cur); + if (embedded) children.push(embedded); + + // Optional "then" particle + const tok = cur.peek(); + if (tok && tok.pos === 'PART' && tok.text.toLowerCase() === 'then') { + children.push(terminal(cur.consume())); + } + + return phrase('CP', children); +} + +/** + * Parse an S body: NP VP (PP)* + * Does not consume leading AdvP or COMP — those are handled at the S level. + */ +function parseSBody(cur: Cursor): PhraseNode { + const children: SyntaxNode[] = []; + + const np = parseNP(cur); + if (np) children.push(np); + + const vp = parseVP(cur); + if (vp) children.push(vp); + + // Any trailing PP not consumed by VP + let pp = parsePP(cur); + while (pp) { children.push(pp); pp = parsePP(cur); } + + // Absorb any leftover tokens as UNKNOWN terminals + while (!cur.done()) { + const tok = cur.peek()!; + if (tok.pos === 'PUNCT') { cur.consume(); break; } + children.push(terminal(cur.consume())); + } + + return phrase('S', children); +} + +/** + * Parse the root S node, including leading AdvPs and conditional CPs. + */ +function parseSRoot(cur: Cursor): PhraseNode { + const children: SyntaxNode[] = []; + + // Leading sentence-adverb (Necessarily, Possibly, …) + const advp = parseAdvP(cur); + if (advp) children.push(advp); + + // Comma after adverb + if (cur.peek()?.text === ',') cur.consume(); + + // Conditional "If … then …" + if (cur.is('COMP')) { + const cp = parseCP(cur, c => parseSBody(c)); + if (cp) { + children.push(cp); + // Consequent S + const consequent = parseSBody(cur); + children.push(consequent); + return phrase('S', children); + } + } + + // Standard S body + const body = parseSBody(cur); + // Merge body children into the root S (avoid double S-wrapping) + return phrase('S', [...children, ...body.children]); +} + +// --------------------------------------------------------------------------- +// NaturalLanguageSyntaxParser +// --------------------------------------------------------------------------- + +/** + * Produces constituency syntax trees from natural language sentence strings. + * + * The parser is rule-based (no external dependencies or statistical models). + * It handles common declarative English patterns: + * + * - Simple declarative: [NP] [VP] + * - Quantified: [QUANT NP] [COP AP/NP] + * - With modal adverb: [AdvP] [S] + * - Conditional: [CP if [S]] [S consequent] + * - Copular with PP: [NP] [COP [PP]] + * - With negation: [NP] [COP NEG AP] + * + * The parse tree is a `SyntaxTree` DTO — a plain serializable record. + * Use `SyntaxTreePrinter` to display it. + */ +export class NaturalLanguageSyntaxParser { + + /** + * Parse a sentence string into a constituency syntax tree. + * + * @param sentence - A natural language sentence string. + * @returns A `SyntaxTree` DTO. + */ + parse(sentence: string): SyntaxTree { + const rawTokens = tokenize(sentence); + const tagged = tagAll(rawTokens); + const cur = new Cursor(tagged); + const root = parseSRoot(cur); + + return { + schemaVersion: SYNTAX_SCHEMA_VERSION, + source: sentence, + tokens: tagged, + root, + }; + } +} diff --git a/src/engine/syntax/syntaxTreePrinter.ts b/src/engine/syntax/syntaxTreePrinter.ts new file mode 100644 index 0000000..63747f7 --- /dev/null +++ b/src/engine/syntax/syntaxTreePrinter.ts @@ -0,0 +1,154 @@ +import { PhraseNode, SyntaxNode, SyntaxTree, TerminalNode } from './syntaxTypes'; + +// --------------------------------------------------------------------------- +// Internal rendering helpers +// --------------------------------------------------------------------------- + +function renderNode(node: SyntaxNode, prefix: string, isLast: boolean): string[] { + const connector = isLast ? '└── ' : '├── '; + const childPfx = prefix + (isLast ? ' ' : '│ '); + const lines: string[] = []; + + if (node.kind === 'terminal') { + lines.push(`${prefix}${connector}${node.pos} "${node.text}" [${node.index}]`); + } else { + lines.push( + `${prefix}${connector}${node.label} [${node.startIndex}..${node.endIndex})`, + ); + node.children.forEach((child, i) => { + lines.push(...renderNode(child, childPfx, i === node.children.length - 1)); + }); + } + + return lines; +} + +function renderRoot(root: PhraseNode): string[] { + const lines: string[] = []; + lines.push(`${root.label} [${root.startIndex}..${root.endIndex})`); + root.children.forEach((child, i) => { + lines.push(...renderNode(child, '', i === root.children.length - 1)); + }); + return lines; +} + +// --------------------------------------------------------------------------- +// SyntaxTreePrinter +// --------------------------------------------------------------------------- + +/** + * Renders `SyntaxTree` DTOs as readable text — either to the console or as a + * plain string. + * + * Output format (box-drawing tree): + * + * ``` + * S [0..4) + * ├── NP [0..2) + * │ ├── QUANT "All" [0] + * │ └── N "men" [1] + * └── VP [2..4) + * ├── COP "are" [2] + * └── AP [3..4) + * └── ADJ "mortal" [3] + * ``` + * + * The printer is a stateless utility — instantiate once and reuse freely. + */ +export class SyntaxTreePrinter { + + /** + * Render a `SyntaxTree` as a multi-line string. + * + * @param tree - The syntax tree to render. + * @param header - When true (default), includes the source sentence above + * the tree diagram. + * @returns A multi-line string ready for display or storage. + */ + render(tree: SyntaxTree, header = true): string { + const parts: string[] = []; + + if (header) { + parts.push(`Syntax Tree — "${tree.source}"`); + parts.push('─'.repeat(Math.min(72, tree.source.length + 16))); + } + + parts.push(...renderRoot(tree.root)); + return parts.join('\n'); + } + + /** + * Print a `SyntaxTree` to the console. + * + * @param tree - The syntax tree to print. + * @param header - When true (default), prints the source sentence header. + */ + print(tree: SyntaxTree, header = true): void { + console.log('\n' + this.render(tree, header) + '\n'); + } + + /** + * Render the tagged-token layer only (no phrase structure). + * + * Useful for quick inspection of POS tagging results before reviewing the + * full tree. + * + * Format: `[index] POS "text"` + * + * @param tree - The syntax tree whose tokens to render. + * @returns A multi-line string of tagged tokens. + */ + renderTokens(tree: SyntaxTree): string { + const lines = tree.tokens.map(t => + `[${String(t.index).padStart(2)}] ${t.pos.padEnd(7)} "${t.text}"`, + ); + return lines.join('\n'); + } + + /** + * Print the tagged-token layer to the console. + * + * @param tree - The syntax tree whose tokens to print. + */ + printTokens(tree: SyntaxTree): void { + console.log('\nTokens:'); + console.log(this.renderTokens(tree)); + console.log(''); + } + + /** + * Render a bracketed notation string for the syntax tree. + * + * Example: `[S [NP [QUANT All][N men]][VP [COP are][AP [ADJ mortal]]]]` + * + * This is the standard linguistics bracket notation, useful for compact + * representation and copy-pasting into analysis tools. + * + * @param tree - The syntax tree to bracket-encode. + * @returns A single-line bracketed string. + */ + renderBracketed(tree: SyntaxTree): string { + return bracketNode(tree.root); + } + + /** + * Print the bracketed notation to the console. + * + * @param tree - The syntax tree to print. + */ + printBracketed(tree: SyntaxTree): void { + console.log('\n' + this.renderBracketed(tree) + '\n'); + } +} + +// --------------------------------------------------------------------------- +// Bracketed notation helper +// --------------------------------------------------------------------------- + +function bracketNode(node: SyntaxNode): string { + if (node.kind === 'terminal') { + return `[${node.pos} ${node.text}]`; + } + const inner = node.children.map(bracketNode).join(''); + return `[${node.label} ${inner}]`; +} diff --git a/src/engine/syntax/syntaxTypes.ts b/src/engine/syntax/syntaxTypes.ts new file mode 100644 index 0000000..bbeb520 --- /dev/null +++ b/src/engine/syntax/syntaxTypes.ts @@ -0,0 +1,168 @@ +/** + * Syntax DTOs — constituency parse trees and related types. + * + * All types are plain data interfaces (no methods, no classes) designed for + * easy serialization. The discriminated-union pattern used for `SyntaxNode` + * maps cleanly to protobuf `oneof` fields. Arrays are used throughout in + * preference to `Map` for the same reason. A `schemaVersion` field on + * top-level DTOs enables forward-compatible schema evolution. + * + * Intended serialization paths (current and future): + * - JSON : natively supported — all field types are JSON-primitive + * - Protobuf : `kind` → oneof discriminator; `PhraseLabel`/`POSTag` → enum; + * `SyntaxNode[]` → repeated oneof message + * - MessagePack / CBOR : same shape as JSON + */ + +// --------------------------------------------------------------------------- +// Schema versioning +// --------------------------------------------------------------------------- + +/** Bumped when the shape of any DTO changes in a breaking way. */ +export const SYNTAX_SCHEMA_VERSION = '1' as const; + +// --------------------------------------------------------------------------- +// Vocabulary types +// --------------------------------------------------------------------------- + +/** + * Constituent phrase categories (non-terminal labels). + * + * Maps to a protobuf enum `PhraseLabel`. + */ +export type PhraseLabel = + | 'S' // Sentence (root) + | 'NP' // Noun Phrase + | 'VP' // Verb Phrase + | 'PP' // Prepositional Phrase + | 'AP' // Adjective Phrase + | 'AdvP' // Adverb Phrase (includes sentence-initial modal adverbs) + | 'CP' // Complementizer Phrase (subordinate / conditional clauses) + | 'QP'; // Quantifier Phrase + +/** + * Part-of-speech tags for terminal (leaf) nodes. + * + * Maps to a protobuf enum `POSTag`. + */ +export type POSTag = + | 'DET' // Determiner: the, a, an, this, that + | 'QUANT' // Quantifier: all, every, each, any, some, no, most, few + | 'N' // Common noun + | 'PN' // Proper noun (capitalized, not sentence-initial) + | 'PRON' // Pronoun: he, she, it, they, I, we + | 'V' // Main verb (including gerunds used predicatively) + | 'COP' // Copula: is, are, was, were, am, be, been + | 'AUX' // Non-modal auxiliary: have, has, had, do, does, did + | 'MODAL' // Modal verb: must, can, could, should, would, may, might, shall, will + | 'ADJ' // Adjective + | 'ADV' // Adverb (including modal adverbs: necessarily, possibly, certainly) + | 'PREP' // Preposition: in, on, at, of, by, for, with, to, from + | 'CONJ' // Coordinating conjunction: and, or, but, nor, yet, so + | 'COMP' // Complementizer: that, if, whether, because, since, although, unless + | 'NEG' // Negation: not, never, no (negation context) + | 'PART' // Particle: then (in if-then), to (infinitive marker) + | 'PUNCT' // Punctuation: . , ; : ! ? + | 'UNKNOWN'; // Unrecognized token + +// --------------------------------------------------------------------------- +// Token layer +// --------------------------------------------------------------------------- + +/** + * A single tokenized word with its POS tag. + * The intermediate representation between the raw sentence and the tree. + * + * Protobuf field notes: + * text → string (field 1) + * pos → POSTag enum (field 2) + * index → int32 (field 3) + */ +export interface TaggedToken { + /** Surface form of the token, as it appears in the source text. */ + text: string; + /** Assigned part-of-speech tag. */ + pos: POSTag; + /** Zero-based position in the sentence's token sequence. */ + index: number; +} + +// --------------------------------------------------------------------------- +// Tree node layer — discriminated union +// --------------------------------------------------------------------------- + +/** + * A leaf node: a single word/token positioned in the sentence. + * + * Protobuf field notes: + * kind → oneof discriminator string, always "terminal" (field 1) + * pos → POSTag enum (field 2) + * text → string (field 3) + * index → int32 (field 4) + */ +export interface TerminalNode { + /** Discriminator. Always `'terminal'`. */ + kind: 'terminal'; + /** Part-of-speech category of this word. */ + pos: POSTag; + /** Surface form of the word. */ + text: string; + /** Zero-based token index in the sentence. */ + index: number; +} + +/** + * An internal node: a labelled phrase spanning one or more tokens. + * + * Protobuf field notes: + * kind → oneof discriminator string, always "phrase" (field 1) + * label → PhraseLabel enum (field 2) + * children → repeated SyntaxNode oneof (field 3) + * startIndex → int32, inclusive (field 4) + * endIndex → int32, exclusive (field 5) + */ +export interface PhraseNode { + /** Discriminator. Always `'phrase'`. */ + kind: 'phrase'; + /** Constituent category of this phrase. */ + label: PhraseLabel; + /** Ordered child nodes — phrases or terminals. */ + children: SyntaxNode[]; + /** Token index of the first terminal in this span (inclusive). */ + startIndex: number; + /** Token index past the last terminal in this span (exclusive). */ + endIndex: number; +} + +/** + * A node in a constituency syntax tree. + * Use `node.kind` to discriminate between `TerminalNode` and `PhraseNode`. + */ +export type SyntaxNode = TerminalNode | PhraseNode; + +// --------------------------------------------------------------------------- +// Top-level document DTO +// --------------------------------------------------------------------------- + +/** + * The full constituency parse of a single sentence. + * + * Designed as a self-contained, serializable record. All information needed + * to reconstruct or display the parse is present — no external references. + * + * Protobuf message: `SyntaxTree` + * schemaVersion → string (field 1) + * source → string (field 2) + * tokens → repeated TaggedToken (field 3) + * root → PhraseNode (field 4) + */ +export interface SyntaxTree { + /** Schema version. Bump when DTO shape changes. Currently `'1'`. */ + schemaVersion: string; + /** The original sentence string that was parsed. */ + source: string; + /** Ordered token sequence with POS tags (the pre-terminal layer). */ + tokens: TaggedToken[]; + /** The root `S` node of the constituency tree. */ + root: PhraseNode; +} diff --git a/test/engine/nlp/argumentAnalyser.spec.ts b/test/engine/nlp/argumentAnalyser.spec.ts new file mode 100644 index 0000000..c77eed3 --- /dev/null +++ b/test/engine/nlp/argumentAnalyser.spec.ts @@ -0,0 +1,115 @@ +import { ArgumentAnalyser } from '../../../src/engine/nlp/argumentAnalyser'; +import { AlethicAssertoric } from '../../../src/language/shared/types'; + +function s(raw: string): AlethicAssertoric { + return { raw, confidence: 1.0 }; +} + +describe('ArgumentAnalyser', () => { + let analyser: ArgumentAnalyser; + + beforeEach(() => { analyser = new ArgumentAnalyser(); }); + + describe('analyse() — structural identification', () => { + test('returns all input sentences in the sentences field', () => { + const inputs = [s('All men are mortal.'), s('Socrates is a man.'), s('Therefore Socrates is mortal.')]; + const result = analyser.analyse(inputs); + expect(result.sentences).toHaveLength(3); + }); + + test('identifies a sentence starting with "therefore" as a conclusion', () => { + const inputs = [ + s('All men are mortal.'), + s('Socrates is a man.'), + s('Therefore Socrates is mortal.'), + ]; + const result = analyser.analyse(inputs); + expect(result.conclusions.some(c => c.raw.startsWith('Therefore'))).toBe(true); + }); + + test('identifies a sentence starting with "thus" as a conclusion', () => { + const inputs = [s('All bachelors are unmarried.'), s('Thus John is unmarried.')]; + const result = analyser.analyse(inputs); + expect(result.conclusions.some(c => c.raw.startsWith('Thus'))).toBe(true); + }); + + test('identifies a sentence starting with "hence" as a conclusion', () => { + const inputs = [s('The premises hold.'), s('Hence the conclusion follows.')]; + const result = analyser.analyse(inputs); + expect(result.conclusions.some(c => c.raw.startsWith('Hence'))).toBe(true); + }); + + test('treats non-marked sentences as premises', () => { + const inputs = [s('All men are mortal.'), s('Socrates is a man.'), s('Therefore Socrates is mortal.')]; + const result = analyser.analyse(inputs); + expect(result.premises.some(p => p.raw === 'All men are mortal.')).toBe(true); + expect(result.premises.some(p => p.raw === 'Socrates is a man.')).toBe(true); + }); + + test('when no conclusion marker found, treats last sentence as conclusion', () => { + const inputs = [s('P is true.'), s('Q is true.'), s('R is true.')]; + const result = analyser.analyse(inputs); + expect(result.conclusions).toHaveLength(1); + expect(result.conclusions[0].raw).toBe('R is true.'); + }); + + test('single sentence returns one conclusion and no premises', () => { + const result = analyser.analyse([s('It is raining.')]); + expect(result.conclusions).toHaveLength(1); + expect(result.premises).toHaveLength(0); + }); + + test('returns empty arrays for empty input', () => { + const result = analyser.analyse([]); + expect(result.sentences).toEqual([]); + expect(result.premises).toEqual([]); + expect(result.conclusions).toEqual([]); + expect(result.relations).toEqual([]); + }); + }); + + describe('analyse() — pairwise relations', () => { + test('produces n*(n-1)/2 pairs for n sentences', () => { + const inputs = [s('A is true.'), s('B is true.'), s('C is true.')]; + const result = analyser.analyse(inputs); + expect(result.relations).toHaveLength(3); + }); + + test('detects opposition when one sentence negates key word of another', () => { + const inputs = [ + s('The cat is on the mat.'), + s('The cat is not on the mat.'), + ]; + const result = analyser.analyse(inputs); + const pair = result.relations[0]; + expect(pair.relation).toBe('opposes'); + }); + + test('detects support when sentence begins with a premise marker', () => { + const inputs = [ + s('Since all men are mortal, we should accept it.'), + s('Therefore John is mortal.'), + ]; + const result = analyser.analyse(inputs); + const pair = result.relations[0]; + expect(pair.relation).toBe('supports'); + }); + + test('marks independent relation when no structural link detected', () => { + const inputs = [ + s('The sky is blue.'), + s('Mathematics is abstract.'), + ]; + const result = analyser.analyse(inputs); + expect(result.relations[0].relation).toBe('independent'); + }); + + test('from/to fields reference the correct sentences', () => { + const a = s('First sentence.'); + const b = s('Second sentence.'); + const result = analyser.analyse([a, b]); + expect(result.relations[0].from).toBe(a); + expect(result.relations[0].to).toBe(b); + }); + }); +}); diff --git a/test/engine/nlp/formalAnnotator.spec.ts b/test/engine/nlp/formalAnnotator.spec.ts new file mode 100644 index 0000000..d5193e7 --- /dev/null +++ b/test/engine/nlp/formalAnnotator.spec.ts @@ -0,0 +1,174 @@ +import { FormalAnnotator } from '../../../src/engine/nlp/formalAnnotator'; +import { AlethicAssertoric } from '../../../src/language/shared/types'; + +function s(raw: string): AlethicAssertoric { + return { raw, confidence: 1.0 }; +} + +describe('FormalAnnotator', () => { + let annotator: FormalAnnotator; + + beforeEach(() => { annotator = new FormalAnnotator(); }); + + describe('annotate() — connective detection', () => { + test('detects "and" as & operator', () => { + const result = annotator.annotate(s('The cat is on the mat and the dog is in the garden.')); + const ops = result.features.connectives.map(c => c.operator); + expect(ops).toContain('&'); + }); + + test('detects "or" as | operator', () => { + const result = annotator.annotate(s('It is raining or it is snowing.')); + const ops = result.features.connectives.map(c => c.operator); + expect(ops).toContain('|'); + }); + + test('detects "if" as -> operator', () => { + const result = annotator.annotate(s('If it is raining then the streets are wet.')); + const ops = result.features.connectives.map(c => c.operator); + expect(ops).toContain('->'); + }); + + test('detects "if and only if" as <-> operator', () => { + const result = annotator.annotate(s('P holds if and only if Q holds.')); + const ops = result.features.connectives.map(c => c.operator); + expect(ops).toContain('<->'); + }); + + test('returns empty connectives array when none detected', () => { + const result = annotator.annotate(s('Socrates is a man.')); + expect(result.features.connectives).toHaveLength(0); + }); + + test('connective span references correct position in raw string', () => { + const raw = 'It is raining and the streets are wet.'; + const result = annotator.annotate(s(raw)); + const conn = result.features.connectives.find(c => c.operator === '&'); + expect(conn).toBeDefined(); + expect(raw.slice(conn!.span[0], conn!.span[1]).toLowerCase()).toContain('and'); + }); + }); + + describe('annotate() — quantifier detection', () => { + test('detects "all" as ∀', () => { + const result = annotator.annotate(s('All men are mortal.')); + const qs = result.features.quantifiers.map(q => q.quantifier); + expect(qs).toContain('∀'); + }); + + test('detects "every" as ∀', () => { + const result = annotator.annotate(s('Every student passed the exam.')); + const qs = result.features.quantifiers.map(q => q.quantifier); + expect(qs).toContain('∀'); + }); + + test('detects "some" as ∃', () => { + const result = annotator.annotate(s('Some philosophers are logicians.')); + const qs = result.features.quantifiers.map(q => q.quantifier); + expect(qs).toContain('∃'); + }); + + test('detects "there exists" as ∃', () => { + const result = annotator.annotate(s('There exists a proof of the theorem.')); + const qs = result.features.quantifiers.map(q => q.quantifier); + expect(qs).toContain('∃'); + }); + + test('detects "no" as ¬∃', () => { + const result = annotator.annotate(s('No mortal is immortal.')); + const qs = result.features.quantifiers.map(q => q.quantifier); + expect(qs).toContain('¬∃'); + }); + + test('returns empty quantifiers array when none detected', () => { + const result = annotator.annotate(s('Socrates is a man.')); + expect(result.features.quantifiers).toHaveLength(0); + }); + }); + + describe('annotate() — modal adverb detection', () => { + test('detects "necessarily" as □', () => { + const result = annotator.annotate(s('Necessarily all bachelors are unmarried.')); + const ops = result.features.modalAdverbs.map(m => m.operator); + expect(ops).toContain('□'); + }); + + test('detects "must" as □', () => { + const result = annotator.annotate(s('It must be the case that p holds.')); + const ops = result.features.modalAdverbs.map(m => m.operator); + expect(ops).toContain('□'); + }); + + test('detects "possibly" as ◇', () => { + const result = annotator.annotate(s('Possibly the theorem is unprovable.')); + const ops = result.features.modalAdverbs.map(m => m.operator); + expect(ops).toContain('◇'); + }); + + test('detects "might" as ◇', () => { + const result = annotator.annotate(s('It might be raining outside.')); + const ops = result.features.modalAdverbs.map(m => m.operator); + expect(ops).toContain('◇'); + }); + + test('returns empty modalAdverbs array when none detected', () => { + const result = annotator.annotate(s('Socrates is a man.')); + expect(result.features.modalAdverbs).toHaveLength(0); + }); + }); + + describe('annotate() — negation detection', () => { + test('detects "not" as negation', () => { + const result = annotator.annotate(s('The cat is not on the mat.')); + expect(result.features.negations.length).toBeGreaterThan(0); + }); + + test('detects "it is not the case that" as negation', () => { + const result = annotator.annotate(s('It is not the case that p is true.')); + expect(result.features.negations.some(n => n.text.includes('not the case'))).toBe(true); + }); + }); + + describe('annotate() — proposition extraction', () => { + test('assigns labels starting from p', () => { + const result = annotator.annotate(s('It is raining and the streets are wet.')); + const labels = result.features.propositions.map(p => p.label); + expect(labels[0]).toBe('p'); + }); + + test('assigns sequential labels for multiple propositions', () => { + const result = annotator.annotate(s('It is raining and the streets are wet.')); + const labels = result.features.propositions.map(p => p.label); + expect(labels).toContain('p'); + expect(labels.length).toBeGreaterThanOrEqual(1); + }); + + test('proposition text is non-empty', () => { + const result = annotator.annotate(s('All men are mortal.')); + result.features.propositions.forEach(p => { + expect(p.text.trim().length).toBeGreaterThan(0); + }); + }); + }); + + describe('annotate() — mood', () => { + test('reports declarative mood for standard sentence', () => { + const result = annotator.annotate(s('The cat is on the mat.')); + expect(result.features.mood).toBe('declarative'); + }); + }); + + describe('annotateAll()', () => { + test('returns one AnnotatedSentence per input', () => { + const inputs = [s('All men are mortal.'), s('Socrates is a man.')]; + const result = annotator.annotateAll(inputs); + expect(result).toHaveLength(2); + }); + + test('preserves source reference', () => { + const input = s('All men are mortal.'); + const result = annotator.annotateAll([input]); + expect(result[0].source).toBe(input); + }); + }); +}); diff --git a/test/engine/nlp/formalTranslator.spec.ts b/test/engine/nlp/formalTranslator.spec.ts new file mode 100644 index 0000000..0dc2f87 --- /dev/null +++ b/test/engine/nlp/formalTranslator.spec.ts @@ -0,0 +1,156 @@ +import { FormalAnnotator } from '../../../src/engine/nlp/formalAnnotator'; +import { FormalTranslator } from '../../../src/engine/nlp/formalTranslator'; +import { AlethicAssertoric, SentenceSet } from '../../../src/language/shared/types'; + +function s(raw: string): AlethicAssertoric { + return { raw, confidence: 1.0 }; +} + +function makeSet(...raws: string[]): SentenceSet { + return { sentences: raws.map(s) }; +} + +describe('FormalTranslator', () => { + let annotator: FormalAnnotator; + let translator: FormalTranslator; + + beforeEach(() => { + annotator = new FormalAnnotator(); + translator = new FormalTranslator(); + }); + + function translate(sentences: AlethicAssertoric[]) { + const set: SentenceSet = { sentences }; + const annotated = annotator.annotateAll(sentences); + return translator.translate(set, annotated); + } + + describe('translate() — structure', () => { + test('returns source SentenceSet unchanged', () => { + const set = makeSet('Socrates is a man.'); + const annotated = annotator.annotateAll(set.sentences); + const result = translator.translate(set, annotated); + expect(result.source).toBe(set); + }); + + test('propositional.sentences has one entry per input sentence', () => { + const result = translate([s('All men are mortal.'), s('Socrates is a man.')]); + expect(result.propositional.sentences).toHaveLength(2); + }); + + test('quantificational.sentences has one entry per input sentence', () => { + const result = translate([s('All men are mortal.'), s('Socrates is a man.')]); + expect(result.quantificational.sentences).toHaveLength(2); + }); + + test('modal.sentences has one entry per input sentence', () => { + const result = translate([s('Necessarily all bachelors are unmarried.')]); + expect(result.modal.sentences).toHaveLength(1); + }); + }); + + describe('translate() — propositional formulaString', () => { + test('simple sentence with no connective → single atom label', () => { + const result = translate([s('Socrates is a man.')]); + const f = result.propositional.sentences[0].formulaString; + expect(f).toMatch(/^~?[p-z](\d+)?$/); + }); + + test('sentence with "and" connective → formula contains &', () => { + const result = translate([s('It is raining and the streets are wet.')]); + const f = result.propositional.sentences[0].formulaString; + expect(f).toContain('&'); + }); + + test('sentence with "if … then" connective → formula contains ->', () => { + const result = translate([s('If it is raining then the streets are wet.')]); + const f = result.propositional.sentences[0].formulaString; + expect(f).toContain('->'); + }); + + test('sentence with "if and only if" → formula contains <->', () => { + const result = translate([s('P holds if and only if Q holds.')]); + const f = result.propositional.sentences[0].formulaString; + expect(f).toContain('<->'); + }); + + test('propositionMap maps labels to non-empty text fragments', () => { + const result = translate([s('Socrates is a man.')]); + const map = result.propositional.sentences[0].propositionMap; + Object.values(map).forEach(text => { + expect(text.trim().length).toBeGreaterThan(0); + }); + }); + + test('source field references the original AlethicAssertoric', () => { + const input = s('Socrates is a man.'); + const result = translate([input]); + expect(result.propositional.sentences[0].source).toBe(input); + }); + }); + + describe('translate() — quantificational', () => { + test('sentence with "all" gets ∀x quantifier prefix', () => { + const result = translate([s('All men are mortal.')]); + const t = result.quantificational.sentences[0]; + expect(t.quantifierPrefix).toBe('∀x'); + }); + + test('sentence with "some" gets ∃x quantifier prefix', () => { + const result = translate([s('Some philosophers are logicians.')]); + const t = result.quantificational.sentences[0]; + expect(t.quantifierPrefix).toBe('∃x'); + }); + + test('sentence with "no" gets ¬∃x quantifier prefix', () => { + const result = translate([s('No mortal is immortal.')]); + const t = result.quantificational.sentences[0]; + expect(t.quantifierPrefix).toBe('¬∃x'); + }); + + test('sentence without quantifier has null quantifierPrefix', () => { + const result = translate([s('Socrates is a man.')]); + expect(result.quantificational.sentences[0].quantifierPrefix).toBeNull(); + }); + + test('formulaString includes the quantifier prefix', () => { + const result = translate([s('All men are mortal.')]); + const f = result.quantificational.sentences[0].formulaString; + expect(f).toContain('∀x'); + }); + + test('suggestedPredicate is a non-empty capitalised string when proposition exists', () => { + const result = translate([s('All men are mortal.')]); + const pred = result.quantificational.sentences[0].suggestedPredicate; + if (pred !== null) { + expect(pred.length).toBeGreaterThan(0); + expect(pred[0]).toBe(pred[0].toUpperCase()); + } + }); + }); + + describe('translate() — modal', () => { + test('sentence with "necessarily" gets □ modal prefix', () => { + const result = translate([s('Necessarily all bachelors are unmarried.')]); + const t = result.modal.sentences[0]; + expect(t.modalPrefix).toBe('□'); + }); + + test('sentence with "possibly" gets ◇ modal prefix', () => { + const result = translate([s('Possibly the theorem is unprovable.')]); + const t = result.modal.sentences[0]; + expect(t.modalPrefix).toBe('◇'); + }); + + test('sentence without modal adverb has null modalPrefix', () => { + const result = translate([s('Socrates is a man.')]); + expect(result.modal.sentences[0].modalPrefix).toBeNull(); + }); + + test('formulaString wraps atom in □(…) when modal prefix present', () => { + const result = translate([s('Necessarily p is true.')]); + const f = result.modal.sentences[0].formulaString; + expect(f).toMatch(/^□\(/); + }); + }); +}); diff --git a/test/engine/nlp/sentenceClassifier.spec.ts b/test/engine/nlp/sentenceClassifier.spec.ts new file mode 100644 index 0000000..59e844f --- /dev/null +++ b/test/engine/nlp/sentenceClassifier.spec.ts @@ -0,0 +1,96 @@ +import { SentenceClassifier } from '../../../src/engine/nlp/sentenceClassifier'; + +describe('SentenceClassifier', () => { + let clf: SentenceClassifier; + + beforeEach(() => { clf = new SentenceClassifier(); }); + + describe('classify() — mood filtering', () => { + test('returns null for interrogative sentence (ends with ?)', () => { + expect(clf.classify('Is it raining?')).toBeNull(); + }); + + test('returns null for exclamatory sentence (ends with !)', () => { + expect(clf.classify('It is raining!')).toBeNull(); + }); + + test('returns null for imperative sentence (starts with Go)', () => { + expect(clf.classify('Go to the store.')).toBeNull(); + }); + + test('returns null for imperative sentence (starts with Stop)', () => { + expect(clf.classify('Stop doing that.')).toBeNull(); + }); + + test('returns AlethicAssertoric for a declarative sentence', () => { + const result = clf.classify('The cat is on the mat.'); + expect(result).not.toBeNull(); + expect(result!.raw).toBe('The cat is on the mat.'); + }); + + test('raw field matches the original sentence string', () => { + const sentence = 'All men are mortal.'; + const result = clf.classify(sentence); + expect(result!.raw).toBe(sentence); + }); + }); + + describe('classify() — confidence scoring', () => { + test('confidence is between 0.05 and 1.0', () => { + const result = clf.classify('The cat is on the mat.'); + expect(result!.confidence).toBeGreaterThanOrEqual(0.05); + expect(result!.confidence).toBeLessThanOrEqual(1.0); + }); + + test('sentence with copula scores higher than base', () => { + const withCopula = clf.classify('The cat is on the mat.'); + const base = clf.classify('Logic matters greatly here somehow.'); + expect(withCopula!.confidence).toBeGreaterThan(base!.confidence); + }); + + test('sentence with epistemic marker scores higher', () => { + const epistemic = clf.classify('It is necessarily true that all bachelors are unmarried.'); + const plain = clf.classify('The cat sat on the mat somewhere.'); + expect(epistemic!.confidence).toBeGreaterThan(plain!.confidence); + }); + + test('sentence with hedging language scores lower', () => { + const hedged = clf.classify('Maybe the cat is on the mat.'); + const plain = clf.classify('The cat is on the mat.'); + expect(hedged!.confidence).toBeLessThan(plain!.confidence); + }); + + test('very short sentence (< 4 words) scores lower', () => { + // "Rain falls." — 2 words, no copula, no subject-verb match → base 0.5 − 0.10 = 0.40 + // long sentence with copula → 0.5 + 0.15 = 0.65 + const short = clf.classify('Rain falls.'); + const long = clf.classify('The study of formal logic is essential for rigorous reasoning.'); + expect(short!.confidence).toBeLessThan(long!.confidence); + }); + }); + + describe('classifyAll()', () => { + test('drops non-assertoric sentences', () => { + const input = [ + 'All men are mortal.', + 'Is Socrates mortal?', + 'Socrates is a man.', + 'Stop philosophising.', + ]; + const result = clf.classifyAll(input); + expect(result).toHaveLength(2); + expect(result[0].raw).toBe('All men are mortal.'); + expect(result[1].raw).toBe('Socrates is a man.'); + }); + + test('returns empty array when all sentences are non-assertoric', () => { + expect(clf.classifyAll(['Is it raining?', 'Go home!'])).toEqual([]); + }); + + test('preserves order of assertoric sentences', () => { + const input = ['First sentence.', 'Second sentence.', 'Third sentence.']; + const result = clf.classifyAll(input); + expect(result.map(r => r.raw)).toEqual(input); + }); + }); +}); diff --git a/test/engine/nlp/textSegmenter.spec.ts b/test/engine/nlp/textSegmenter.spec.ts new file mode 100644 index 0000000..bbbb5f1 --- /dev/null +++ b/test/engine/nlp/textSegmenter.spec.ts @@ -0,0 +1,109 @@ +import { TextSegmenter } from '../../../src/engine/nlp/textSegmenter'; + +describe('TextSegmenter', () => { + let seg: TextSegmenter; + + beforeEach(() => { seg = new TextSegmenter(); }); + + describe('segment()', () => { + test('splits a single sentence ending with a period', () => { + expect(seg.segment('The cat is on the mat.')).toEqual(['The cat is on the mat.']); + }); + + test('splits two sentences separated by a period and space', () => { + const result = seg.segment('The cat is on the mat. The dog is in the garden.'); + expect(result).toEqual([ + 'The cat is on the mat.', + 'The dog is in the garden.', + ]); + }); + + test('splits on question mark', () => { + const result = seg.segment('Is it raining? The streets are wet.'); + expect(result).toEqual(['Is it raining?', 'The streets are wet.']); + }); + + test('splits on exclamation mark', () => { + const result = seg.segment('It is raining! The streets are wet.'); + expect(result).toEqual(['It is raining!', 'The streets are wet.']); + }); + + test('splits on double newline (paragraph break)', () => { + const result = seg.segment('First sentence.\n\nSecond sentence.'); + expect(result).toEqual(['First sentence.', 'Second sentence.']); + }); + + test('does not split on Mr. abbreviation', () => { + const result = seg.segment('Mr. Smith is a philosopher. He studies logic.'); + expect(result).toEqual([ + 'Mr. Smith is a philosopher.', + 'He studies logic.', + ]); + }); + + test('does not split on Dr. abbreviation', () => { + const result = seg.segment('Dr. Jones proved the theorem. It was elegant.'); + expect(result).toEqual([ + 'Dr. Jones proved the theorem.', + 'It was elegant.', + ]); + }); + + test('does not split on decimal numbers', () => { + const result = seg.segment('The value is 3.14. This is pi.'); + expect(result).toEqual(['The value is 3.14.', 'This is pi.']); + }); + + test('handles multiple sentences correctly', () => { + const text = 'All men are mortal. Socrates is a man. Therefore Socrates is mortal.'; + const result = seg.segment(text); + expect(result).toHaveLength(3); + expect(result[0]).toBe('All men are mortal.'); + expect(result[1]).toBe('Socrates is a man.'); + expect(result[2]).toBe('Therefore Socrates is mortal.'); + }); + + test('trims whitespace from segments', () => { + const result = seg.segment(' First sentence. Second sentence. '); + expect(result[0]).toBe('First sentence.'); + expect(result[1]).toBe('Second sentence.'); + }); + + test('discards blank segments', () => { + const result = seg.segment('\n\n\n'); + expect(result).toEqual([]); + }); + + test('handles single word input', () => { + const result = seg.segment('Logic'); + expect(result).toEqual(['Logic']); + }); + }); + + describe('segmentStream()', () => { + async function* makeStream(chunks: string[]): AsyncIterable { + for (const chunk of chunks) yield chunk; + } + + test('collects chunks and segments correctly', async () => { + const stream = makeStream(['All men are mortal. ', 'Socrates is a man.']); + const result = await seg.segmentStream(stream); + expect(result).toEqual([ + 'All men are mortal.', + 'Socrates is a man.', + ]); + }); + + test('handles single-chunk stream', async () => { + const stream = makeStream(['It is raining.']); + const result = await seg.segmentStream(stream); + expect(result).toEqual(['It is raining.']); + }); + + test('handles empty stream', async () => { + const stream = makeStream([]); + const result = await seg.segmentStream(stream); + expect(result).toEqual([]); + }); + }); +}); diff --git a/test/engine/syntax/naturalLanguageSyntaxParser.spec.ts b/test/engine/syntax/naturalLanguageSyntaxParser.spec.ts new file mode 100644 index 0000000..24a5850 --- /dev/null +++ b/test/engine/syntax/naturalLanguageSyntaxParser.spec.ts @@ -0,0 +1,210 @@ +import { NaturalLanguageSyntaxParser } from '../../../src/engine/syntax/naturalLanguageSyntaxParser'; +import { PhraseNode, SyntaxNode, TerminalNode } from '../../../src/engine/syntax/syntaxTypes'; + +describe('NaturalLanguageSyntaxParser', () => { + let parser: NaturalLanguageSyntaxParser; + + beforeEach(() => { parser = new NaturalLanguageSyntaxParser(); }); + + // ── SyntaxTree shape ────────────────────────────────────────────────────── + + describe('SyntaxTree structure', () => { + test('returns a SyntaxTree with schemaVersion, source, tokens, root', () => { + const tree = parser.parse('All men are mortal.'); + expect(tree.schemaVersion).toBe('1'); + expect(tree.source).toBe('All men are mortal.'); + expect(Array.isArray(tree.tokens)).toBe(true); + expect(tree.root).toBeDefined(); + expect(tree.root.kind).toBe('phrase'); + }); + + test('root node has label S', () => { + const tree = parser.parse('Socrates is a man.'); + expect(tree.root.label).toBe('S'); + }); + + test('tokens length matches number of word-tokens in the sentence', () => { + const tree = parser.parse('All men are mortal.'); + // "All", "men", "are", "mortal", "." → 5 tokens + expect(tree.tokens.length).toBeGreaterThanOrEqual(4); + }); + + test('all tokens carry pos and index fields', () => { + const tree = parser.parse('Socrates is mortal.'); + tree.tokens.forEach(tok => { + expect(typeof tok.text).toBe('string'); + expect(typeof tok.pos).toBe('string'); + expect(typeof tok.index).toBe('number'); + }); + }); + + test('token indices are sequential starting from 0', () => { + const tree = parser.parse('All men are mortal.'); + tree.tokens.forEach((tok, i) => { + expect(tok.index).toBe(i); + }); + }); + }); + + // ── POS tagging ─────────────────────────────────────────────────────────── + + describe('POS tagging', () => { + function posOf(sentence: string, word: string): string { + const tree = parser.parse(sentence); + const tok = tree.tokens.find(t => t.text.toLowerCase() === word.toLowerCase()); + return tok?.pos ?? 'NOT_FOUND'; + } + + test('"all" → QUANT', () => expect(posOf('All men are mortal.', 'all')).toBe('QUANT')); + test('"every" → QUANT', () => expect(posOf('Every student passed.', 'every')).toBe('QUANT')); + test('"some" → QUANT', () => expect(posOf('Some philosophers are wise.', 'some')).toBe('QUANT')); + test('"the" → DET', () => expect(posOf('The cat is on the mat.', 'the')).toBe('DET')); + test('"is" → COP', () => expect(posOf('Socrates is mortal.', 'is')).toBe('COP')); + test('"are" → COP', () => expect(posOf('All men are mortal.', 'are')).toBe('COP')); + test('"must" → MODAL', () => expect(posOf('All men must die.', 'must')).toBe('MODAL')); + test('"not" → NEG', () => expect(posOf('Socrates is not mortal.', 'not')).toBe('NEG')); + test('"on" → PREP', () => expect(posOf('The cat is on the mat.', 'on')).toBe('PREP')); + test('"and" → CONJ', () => expect(posOf('Rain and wind arrived.', 'and')).toBe('CONJ')); + test('"if" → COMP', () => expect(posOf('If it rains then streets flood.', 'if')).toBe('COMP')); + test('"then" → PART', () => expect(posOf('If it rains then streets flood.', 'then')).toBe('PART')); + test('"necessarily" → ADV', () => expect(posOf('Necessarily all bachelors are unmarried.', 'necessarily')).toBe('ADV')); + }); + + // ── NP detection ────────────────────────────────────────────────────────── + + describe('NP identification', () => { + function findPhrase(tree: ReturnType, label: string): PhraseNode | undefined { + function search(node: SyntaxNode): PhraseNode | undefined { + if (node.kind === 'phrase') { + if (node.label === label) return node; + for (const child of node.children) { + const found = search(child); + if (found) return found; + } + } + return undefined; + } + return search(tree.root); + } + + test('quantified NP contains QUANT terminal', () => { + const tree = parser.parse('All men are mortal.'); + const np = findPhrase(tree, 'NP'); + expect(np).toBeDefined(); + const quant = np!.children.find( + c => c.kind === 'terminal' && (c as TerminalNode).pos === 'QUANT', + ); + expect(quant).toBeDefined(); + }); + + test('DET NP contains DET terminal', () => { + const tree = parser.parse('The cat is on the mat.'); + const np = findPhrase(tree, 'NP'); + expect(np).toBeDefined(); + const det = np!.children.find( + c => c.kind === 'terminal' && (c as TerminalNode).pos === 'DET', + ); + expect(det).toBeDefined(); + }); + }); + + // ── VP detection ────────────────────────────────────────────────────────── + + describe('VP identification', () => { + function findVP(tree: ReturnType): PhraseNode | undefined { + function search(node: SyntaxNode): PhraseNode | undefined { + if (node.kind === 'phrase') { + if (node.label === 'VP') return node; + for (const child of node.children) { + const found = search(child); + if (found) return found; + } + } + return undefined; + } + return search(tree.root); + } + + test('"is mortal" — VP contains COP', () => { + const tree = parser.parse('Socrates is mortal.'); + const vp = findVP(tree); + expect(vp).toBeDefined(); + const cop = vp!.children.find( + c => c.kind === 'terminal' && (c as TerminalNode).pos === 'COP', + ); + expect(cop).toBeDefined(); + }); + + test('"is not mortal" — VP contains NEG', () => { + const tree = parser.parse('Socrates is not mortal.'); + const vp = findVP(tree); + expect(vp).toBeDefined(); + const neg = vp!.children.find( + c => c.kind === 'terminal' && (c as TerminalNode).pos === 'NEG', + ); + expect(neg).toBeDefined(); + }); + }); + + // ── Conditional (CP) ────────────────────────────────────────────────────── + + describe('CP — conditional sentences', () => { + function hasCP(tree: ReturnType): boolean { + function search(node: SyntaxNode): boolean { + if (node.kind === 'phrase') { + if (node.label === 'CP') return true; + return node.children.some(search); + } + return false; + } + return search(tree.root); + } + + test('"If it is raining then the streets are wet." contains a CP', () => { + const tree = parser.parse('If it is raining then the streets are wet.'); + expect(hasCP(tree)).toBe(true); + }); + }); + + // ── AdvP (modal adverbs) ────────────────────────────────────────────────── + + describe('AdvP — sentence-level modal adverbs', () => { + function hasAdvP(tree: ReturnType): boolean { + function search(node: SyntaxNode): boolean { + if (node.kind === 'phrase') { + if (node.label === 'AdvP') return true; + return node.children.some(search); + } + return false; + } + return search(tree.root); + } + + test('"Necessarily all bachelors are unmarried." contains AdvP', () => { + const tree = parser.parse('Necessarily all bachelors are unmarried.'); + expect(hasAdvP(tree)).toBe(true); + }); + }); + + // ── span correctness ────────────────────────────────────────────────────── + + describe('span indices', () => { + test('root span covers all non-punctuation tokens', () => { + const tree = parser.parse('All men are mortal.'); + expect(tree.root.startIndex).toBe(0); + expect(tree.root.endIndex).toBeGreaterThan(0); + }); + + test('child spans are subsets of parent spans', () => { + const tree = parser.parse('All men are mortal.'); + function checkSpans(node: SyntaxNode, parentStart: number, parentEnd: number): void { + if (node.kind === 'phrase') { + expect(node.startIndex).toBeGreaterThanOrEqual(parentStart); + expect(node.endIndex).toBeLessThanOrEqual(parentEnd); + node.children.forEach(c => checkSpans(c, node.startIndex, node.endIndex)); + } + } + tree.root.children.forEach(c => checkSpans(c, tree.root.startIndex, tree.root.endIndex)); + }); + }); +}); diff --git a/test/engine/syntax/syntaxTreePrinter.spec.ts b/test/engine/syntax/syntaxTreePrinter.spec.ts new file mode 100644 index 0000000..eaa3b82 --- /dev/null +++ b/test/engine/syntax/syntaxTreePrinter.spec.ts @@ -0,0 +1,127 @@ +import { NaturalLanguageSyntaxParser } from '../../../src/engine/syntax/naturalLanguageSyntaxParser'; +import { SyntaxTreePrinter } from '../../../src/engine/syntax/syntaxTreePrinter'; + +describe('SyntaxTreePrinter', () => { + let parser: NaturalLanguageSyntaxParser; + let printer: SyntaxTreePrinter; + + beforeEach(() => { + parser = new NaturalLanguageSyntaxParser(); + printer = new SyntaxTreePrinter(); + }); + + describe('render()', () => { + test('returns a non-empty string', () => { + const tree = parser.parse('All men are mortal.'); + expect(printer.render(tree).length).toBeGreaterThan(0); + }); + + test('contains the root label S', () => { + const tree = parser.parse('All men are mortal.'); + expect(printer.render(tree)).toContain('S'); + }); + + test('contains the source sentence in the header', () => { + const tree = parser.parse('Socrates is mortal.'); + expect(printer.render(tree)).toContain('Socrates is mortal.'); + }); + + test('omits header when header=false', () => { + const tree = parser.parse('Socrates is mortal.'); + const output = printer.render(tree, false); + expect(output).not.toContain('Syntax Tree'); + }); + + test('contains box-drawing characters', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.render(tree); + expect(output).toMatch(/[└├│─]/); + }); + + test('contains POS tags for terminal nodes', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.render(tree); + // At least one of the known POS tags should appear + expect(output).toMatch(/QUANT|DET|N|COP|ADJ|V/); + }); + + test('each terminal shows quoted text', () => { + const tree = parser.parse('Socrates is mortal.'); + const output = printer.render(tree); + expect(output).toContain('"Socrates"'); + }); + + test('multiple sentences produce distinct renders', () => { + const t1 = parser.parse('All men are mortal.'); + const t2 = parser.parse('Socrates is a man.'); + expect(printer.render(t1)).not.toBe(printer.render(t2)); + }); + }); + + describe('renderTokens()', () => { + test('returns one line per token', () => { + const tree = parser.parse('All men are mortal.'); + const lines = printer.renderTokens(tree).split('\n').filter(l => l.trim().length > 0); + expect(lines.length).toBe(tree.tokens.length); + }); + + test('each line contains index, POS, and quoted text', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.renderTokens(tree); + expect(output).toContain('"All"'); + expect(output).toContain('QUANT'); + expect(output).toContain('['); + }); + }); + + describe('renderBracketed()', () => { + test('returns a single-line string', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.renderBracketed(tree); + expect(output).not.toContain('\n'); + }); + + test('starts with [S', () => { + const tree = parser.parse('All men are mortal.'); + expect(printer.renderBracketed(tree)).toMatch(/^\[S /); + }); + + test('is balanced in brackets', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.renderBracketed(tree); + const opens = (output.match(/\[/g) ?? []).length; + const closes = (output.match(/\]/g) ?? []).length; + expect(opens).toBe(closes); + }); + + test('contains the token text', () => { + const tree = parser.parse('All men are mortal.'); + const output = printer.renderBracketed(tree); + expect(output).toContain('All'); + expect(output).toContain('mortal'); + }); + }); + + describe('print() and printTokens() and printBracketed()', () => { + test('print() calls console.log without throwing', () => { + const spy = jest.spyOn(console, 'log').mockImplementation(() => {}); + const tree = parser.parse('All men are mortal.'); + expect(() => printer.print(tree)).not.toThrow(); + spy.mockRestore(); + }); + + test('printTokens() calls console.log without throwing', () => { + const spy = jest.spyOn(console, 'log').mockImplementation(() => {}); + const tree = parser.parse('All men are mortal.'); + expect(() => printer.printTokens(tree)).not.toThrow(); + spy.mockRestore(); + }); + + test('printBracketed() calls console.log without throwing', () => { + const spy = jest.spyOn(console, 'log').mockImplementation(() => {}); + const tree = parser.parse('All men are mortal.'); + expect(() => printer.printBracketed(tree)).not.toThrow(); + spy.mockRestore(); + }); + }); +});