WIP - reduction system

This commit is contained in:
2026-05-14 20:19:02 +02:00
parent 3047649372
commit 235e12e7db
5 changed files with 197 additions and 20 deletions

View File

@@ -98,19 +98,21 @@ const greek_chars = load_table(`
const rt = new RegExp_Tokenizer();
for (const { name, left, right } of grouping.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(left),
(tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined,
(tokenizer, value) => tokenizer.push_token(
{kind: 'grouping', name, value}
(tokenizer, ingress_match) => tokenizer.enter_sub_tokenizer(undefined,
(tokenizer, value, egress_match) => tokenizer.push_token(
{kind: 'EXPR', name, value, ingress_match, egress_match}
)
), `LEFT_${name}`
));
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(right),
(tokenizer, match) => tokenizer.leave_sub_tokenizer(), `RIGHT_${name}`)
(tokenizer, match) => tokenizer.leave_sub_tokenizer(match), `RIGHT_${name}`)
);
}
@@ -118,19 +120,27 @@ for (const { name, left, right } of grouping.iter_objects()) {
for (const table of [logic_ops, generic_ops, punctuation]) {
for (const { name, symbol } of table.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(symbol),
(tokenizer, match) => tokenizer.push_token(match.value), name)
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), name)
);
}
}
for (const { name, lower, upper } of greek_chars.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(lower),
(tokenizer, match) => tokenizer.push_token(match.value), `LOWER_${name}`)
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `LOWER_${name}`)
);
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(upper),
(tokenizer, match) => tokenizer.push_token(match.value), `UPPER_${name}`)
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `UPPER_${name}`)
);
}
rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), 'WORD'));
rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'WHITESPACE'));
console.log(rt.rules.at(-1));
const text = 'Hello World (how are you (doing)) I may ask';
const p = new Parser(text, { tokenizer: rt });
//console.log(rt.rules.at(-3));
console.log(p.parse())

View File

@@ -44,8 +44,8 @@ const rss = Reduction_Settings.load({
// Switching this on or off affects whether add comes before mul or not
//reduction_order: 'POSITION_MAJOR',
});
const rs = new Reduction_Scanner(rss);
const rs = new Reduction_Scanner(rss);
rss.rules.push(

View File

@@ -0,0 +1,157 @@
import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner';
import * as R from '@efforting.tech/rule-processing/rules';
import { inspect } from 'node:util';
class Rule_Match {
constructor(rule, match) {
Object.assign(this, { rule, match });
}
get action() {
return this.rule.handler;
}
}
class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
constructor(condition, handler) {
Object.assign(this, { condition, handler });
}
match(...args) {
const match = this.condition.match(...args);
if (match) {
return new Rule_Match(this, match);
}
}
}
class Sub_Scan_Rule_Match {
constructor(rule, sub_scan_candidate) {
Object.assign(this, { rule, sub_scan_candidate });
}
get action() {
return this.sub_scan_candidate.match.action;
}
get match() {
return this.sub_scan_candidate.match.match;
}
}
class Sub_Scan_Rule {
constructor(sub_system) {
Object.assign(this, { sub_system });
}
match(...args) {
const candidate = this.sub_system.find_reduction_candidate(...args);
if (candidate) {
return new Sub_Scan_Rule_Match(this, candidate)
}
}
}
function sequence_rule(sequence, transform_fn) {
return new Rule(
new R.Sequence_Condition(sequence),
({sequence, match}) => {
const MS = match.match_start;
const ME = match.match_end;
sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1)));
}
);
}
const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
const CARET = new R.Strict_Equality('^');
const CARON = new R.Strict_Equality('ˇ');
const ASTERISK = new R.Strict_Equality('*');
const SLASH = new R.Strict_Equality('/');
const PLUS = new R.Strict_Equality('+');
const MINUS = new R.Strict_Equality('-');
// These are the outer settings
const rss = Reduction_Settings.load();
// These are the inner settings
const rss_inner = Reduction_Settings.load({
reduction_order: 'POSITION_MAJOR',
});
const rs = new Reduction_Scanner(rss);
// Local factory for sub system
function sub_system(...rules) {
const sub_settings = { ...rss_inner, rules };
const scanner = new Reduction_Scanner(sub_settings);
return new Sub_Scan_Rule(scanner);
}
rss.rules.push(
sub_system(
sequence_rule([N, CARET, N], (left, op, right) => ({ type: 'BINOP', op: 'CARET', operands: [left, right]})),
sequence_rule([N, CARON, N], (left, op, right) => ({ type: 'BINOP', op: 'CARON', operands: [left, right]})),
),
sub_system(
sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})),
sequence_rule([N, SLASH, N], (left, op, right) => ({ type: 'BINOP', op: 'SLASH', operands: [left, right]})),
),
sub_system(
sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})),
sequence_rule([N, MINUS, N], (left, op, right) => ({ type: 'BINOP', op: 'MINUS', operands: [left, right]})),
),
);
const arr = [5, '-', 10, '^', 5, 'ˇ', 2, '+', 20, '*', 30];
console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
/*
[
{
type: 'BINOP',
op: 'MINUS',
operands: [
5,
{
type: 'BINOP',
op: 'PLUS',
operands: [
{
type: 'BINOP',
op: 'CARON',
operands: [ { type: 'BINOP', op: 'CARET', operands: [ 10, 5 ] }, 2 ]
},
{ type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] }
]
}
]
}
]
*/

View File

@@ -83,14 +83,14 @@ export class Parser {
this.switch_to(tokenizer);
}
leave_sub_tokenizer() {
leave_sub_tokenizer(egress_match=null) {
const frame = this.stack.pop(true);
const { sub_tokenizer_handlers } = this.state;
if (sub_tokenizer_handlers.length) {
const handler = sub_tokenizer_handlers.pop();
this.state.match = null; //TODO: Decide if we should reset match here or not
handler(this, frame.value);
handler(this, frame.value, egress_match);
} else {
this.push_token(frame.value);
}

View File

@@ -23,6 +23,8 @@ export const FP_Reduction_Settings = new CF.Schema({
//TODO - we should probably have a pre-defined record shape as argument for actions and such rather than using an ever growing list of positionals or an anonymous Object()
export class Reduction_Scanner {
static settings_schema = Reduction_Settings;
@@ -36,7 +38,7 @@ export class Reduction_Scanner {
}
}
perform_reduction(sequence) {
find_reduction_candidate(sequence) {
const { settings } = this;
switch (settings.reduction_order) {
@@ -44,12 +46,10 @@ export class Reduction_Scanner {
for (const rule of settings.rules) {
const match = rule.match(sequence);
if (match) {
//console.log('RULE_MAJOR', match);
rule.action(this, sequence, match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces
return true;
return { sequence, rule, match };
}
}
return false;
return;
case Reduction_Order.symbols.POSITION_MAJOR:
@@ -68,18 +68,28 @@ export class Reduction_Scanner {
}
if (best_match) {
//console.log('POSITION_MAJOR', best_match)
best_rule.action(this, sequence, best_match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces
return true;
return { sequence, rule: best_rule, match: best_match };
}
return false;
return;
default:
throw new Error(`Unknown reduction order: ${this.reduction_order}`); //TODO: Force invalid configuration error
}
}
perform_reduction(sequence) {
const candidate = this.find_reduction_candidate(sequence);
if (candidate) {
const { sequence, rule, match } = candidate;
//console.log('ACT', match.match)
match.action({ reduction_system: this, rule, sequence, match: match.match });
return true;
} else {
return false;
}
}
clear_transform_state() {