From 235e12e7db7f5609bf8d6d573d1d1ce7face32d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20L=C3=B6vqvist?= Date: Thu, 14 May 2026 20:19:02 +0200 Subject: [PATCH] WIP - reduction system --- experiments/generic-parser-2.mjs | 26 ++- experiments/reduction-scanner-2.mjs | 2 +- experiments/reduction-scanner-3.mjs | 157 +++++++++++++++++++ source/parsing/generic-parsing.mjs | 4 +- source/rule-processing/reduction-scanner.mjs | 28 ++-- 5 files changed, 197 insertions(+), 20 deletions(-) create mode 100644 experiments/reduction-scanner-3.mjs diff --git a/experiments/generic-parser-2.mjs b/experiments/generic-parser-2.mjs index 3f87f29..e68ea7f 100644 --- a/experiments/generic-parser-2.mjs +++ b/experiments/generic-parser-2.mjs @@ -98,19 +98,21 @@ const greek_chars = load_table(` + + const rt = new RegExp_Tokenizer(); for (const { name, left, right } of grouping.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(left), - (tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined, - (tokenizer, value) => tokenizer.push_token( - {kind: 'grouping', name, value} + (tokenizer, ingress_match) => tokenizer.enter_sub_tokenizer(undefined, + (tokenizer, value, egress_match) => tokenizer.push_token( + {kind: 'EXPR', name, value, ingress_match, egress_match} ) ), `LEFT_${name}` )); rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(right), - (tokenizer, match) => tokenizer.leave_sub_tokenizer(), `RIGHT_${name}`) + (tokenizer, match) => tokenizer.leave_sub_tokenizer(match), `RIGHT_${name}`) ); } @@ -118,19 +120,27 @@ for (const { name, left, right } of grouping.iter_objects()) { for (const table of [logic_ops, generic_ops, punctuation]) { for (const { name, symbol } of table.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(symbol), - (tokenizer, match) => tokenizer.push_token(match.value), name) + (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), name) ); } } for (const { name, lower, upper } of greek_chars.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(lower), - (tokenizer, match) => tokenizer.push_token(match.value), `LOWER_${name}`) + (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `LOWER_${name}`) ); rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(upper), - (tokenizer, match) => tokenizer.push_token(match.value), `UPPER_${name}`) + (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `UPPER_${name}`) ); } +rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), 'WORD')); +rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'WHITESPACE')); -console.log(rt.rules.at(-1)); \ No newline at end of file + +const text = 'Hello World (how are you (doing)) I may ask'; +const p = new Parser(text, { tokenizer: rt }); + +//console.log(rt.rules.at(-3)); + +console.log(p.parse()) \ No newline at end of file diff --git a/experiments/reduction-scanner-2.mjs b/experiments/reduction-scanner-2.mjs index 3adbc78..ccbcfa5 100644 --- a/experiments/reduction-scanner-2.mjs +++ b/experiments/reduction-scanner-2.mjs @@ -44,8 +44,8 @@ const rss = Reduction_Settings.load({ // Switching this on or off affects whether add comes before mul or not //reduction_order: 'POSITION_MAJOR', }); -const rs = new Reduction_Scanner(rss); +const rs = new Reduction_Scanner(rss); rss.rules.push( diff --git a/experiments/reduction-scanner-3.mjs b/experiments/reduction-scanner-3.mjs new file mode 100644 index 0000000..1bb9778 --- /dev/null +++ b/experiments/reduction-scanner-3.mjs @@ -0,0 +1,157 @@ +import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner'; +import * as R from '@efforting.tech/rule-processing/rules'; + +import { inspect } from 'node:util'; + + +class Rule_Match { + constructor(rule, match) { + Object.assign(this, { rule, match }); + } + + get action() { + return this.rule.handler; + } + + +} + + +class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function + constructor(condition, handler) { + Object.assign(this, { condition, handler }); + } + + match(...args) { + const match = this.condition.match(...args); + if (match) { + return new Rule_Match(this, match); + } + } + + +} + +class Sub_Scan_Rule_Match { + constructor(rule, sub_scan_candidate) { + Object.assign(this, { rule, sub_scan_candidate }); + } + + get action() { + return this.sub_scan_candidate.match.action; + } + + get match() { + return this.sub_scan_candidate.match.match; + } + +} + +class Sub_Scan_Rule { + constructor(sub_system) { + Object.assign(this, { sub_system }); + } + + match(...args) { + const candidate = this.sub_system.find_reduction_candidate(...args); + if (candidate) { + return new Sub_Scan_Rule_Match(this, candidate) + } + } + + +} + + + +function sequence_rule(sequence, transform_fn) { + return new Rule( + new R.Sequence_Condition(sequence), + ({sequence, match}) => { + const MS = match.match_start; + const ME = match.match_end; + sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1))); + } + ); +} + + + +const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' ); + +const CARET = new R.Strict_Equality('^'); +const CARON = new R.Strict_Equality('ˇ'); + +const ASTERISK = new R.Strict_Equality('*'); +const SLASH = new R.Strict_Equality('/'); + +const PLUS = new R.Strict_Equality('+'); +const MINUS = new R.Strict_Equality('-'); + +// These are the outer settings +const rss = Reduction_Settings.load(); + +// These are the inner settings +const rss_inner = Reduction_Settings.load({ + reduction_order: 'POSITION_MAJOR', +}); + +const rs = new Reduction_Scanner(rss); + +// Local factory for sub system +function sub_system(...rules) { + const sub_settings = { ...rss_inner, rules }; + const scanner = new Reduction_Scanner(sub_settings); + return new Sub_Scan_Rule(scanner); +} + + + + +rss.rules.push( + sub_system( + sequence_rule([N, CARET, N], (left, op, right) => ({ type: 'BINOP', op: 'CARET', operands: [left, right]})), + sequence_rule([N, CARON, N], (left, op, right) => ({ type: 'BINOP', op: 'CARON', operands: [left, right]})), + ), + + sub_system( + sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})), + sequence_rule([N, SLASH, N], (left, op, right) => ({ type: 'BINOP', op: 'SLASH', operands: [left, right]})), + ), + + sub_system( + sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})), + sequence_rule([N, MINUS, N], (left, op, right) => ({ type: 'BINOP', op: 'MINUS', operands: [left, right]})), + ), +); + + +const arr = [5, '-', 10, '^', 5, 'ˇ', 2, '+', 20, '*', 30]; +console.log(inspect(rs.transform(arr), { colors: true, depth: null })); + + +/* + +[ + { + type: 'BINOP', + op: 'MINUS', + operands: [ + 5, + { + type: 'BINOP', + op: 'PLUS', + operands: [ + { + type: 'BINOP', + op: 'CARON', + operands: [ { type: 'BINOP', op: 'CARET', operands: [ 10, 5 ] }, 2 ] + }, + { type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] } + ] + } + ] + } +] + +*/ \ No newline at end of file diff --git a/source/parsing/generic-parsing.mjs b/source/parsing/generic-parsing.mjs index b5b4299..4793c4e 100644 --- a/source/parsing/generic-parsing.mjs +++ b/source/parsing/generic-parsing.mjs @@ -83,14 +83,14 @@ export class Parser { this.switch_to(tokenizer); } - leave_sub_tokenizer() { + leave_sub_tokenizer(egress_match=null) { const frame = this.stack.pop(true); const { sub_tokenizer_handlers } = this.state; if (sub_tokenizer_handlers.length) { const handler = sub_tokenizer_handlers.pop(); this.state.match = null; //TODO: Decide if we should reset match here or not - handler(this, frame.value); + handler(this, frame.value, egress_match); } else { this.push_token(frame.value); } diff --git a/source/rule-processing/reduction-scanner.mjs b/source/rule-processing/reduction-scanner.mjs index 7167760..7a3f3e4 100644 --- a/source/rule-processing/reduction-scanner.mjs +++ b/source/rule-processing/reduction-scanner.mjs @@ -23,6 +23,8 @@ export const FP_Reduction_Settings = new CF.Schema({ +//TODO - we should probably have a pre-defined record shape as argument for actions and such rather than using an ever growing list of positionals or an anonymous Object() + export class Reduction_Scanner { static settings_schema = Reduction_Settings; @@ -36,7 +38,7 @@ export class Reduction_Scanner { } } - perform_reduction(sequence) { + find_reduction_candidate(sequence) { const { settings } = this; switch (settings.reduction_order) { @@ -44,12 +46,10 @@ export class Reduction_Scanner { for (const rule of settings.rules) { const match = rule.match(sequence); if (match) { - //console.log('RULE_MAJOR', match); - rule.action(this, sequence, match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces - return true; + return { sequence, rule, match }; } } - return false; + return; case Reduction_Order.symbols.POSITION_MAJOR: @@ -68,18 +68,28 @@ export class Reduction_Scanner { } if (best_match) { - //console.log('POSITION_MAJOR', best_match) - best_rule.action(this, sequence, best_match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces - return true; + return { sequence, rule: best_rule, match: best_match }; } - return false; + return; default: throw new Error(`Unknown reduction order: ${this.reduction_order}`); //TODO: Force invalid configuration error } + } + + perform_reduction(sequence) { + const candidate = this.find_reduction_candidate(sequence); + if (candidate) { + const { sequence, rule, match } = candidate; + //console.log('ACT', match.match) + match.action({ reduction_system: this, rule, sequence, match: match.match }); + return true; + } else { + return false; + } } clear_transform_state() {