From 60e1ad57cb962d0568aa3a3450c786ffa828a883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20L=C3=B6vqvist?= Date: Sun, 3 May 2026 12:09:15 +0200 Subject: [PATCH] Updated stale imports. Started experiment with parsing system. Got reduction scanner feature ready for use. Added several data utilities. --- experiments/config1.mjs | 2 +- experiments/config2.mjs | 2 +- experiments/generic-parser-1.mjs | 156 ++++++++++++++++++++++++++++ experiments/reduction-scanner-2.mjs | 76 ++++++++++++++ source/data/iteration-utilities.mjs | 31 ++++++ source/data/object-utilities.mjs | 9 ++ source/data/stack.mjs | 54 ++++++++++ source/parsing/regexp-dispatch.mjs | 7 +- 8 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 experiments/generic-parser-1.mjs create mode 100644 experiments/reduction-scanner-2.mjs create mode 100644 source/data/iteration-utilities.mjs create mode 100644 source/data/object-utilities.mjs create mode 100644 source/data/stack.mjs diff --git a/experiments/config1.mjs b/experiments/config1.mjs index 8a93cd5..e078fd4 100644 --- a/experiments/config1.mjs +++ b/experiments/config1.mjs @@ -1,4 +1,4 @@ -import { Schema, Field_Configuration } from '@efforting.tech/data/field-configuration'; +import { Schema, Field_Configuration } from '@efforting.tech/schema/field-configuration'; function mandatory_anything(value) { diff --git a/experiments/config2.mjs b/experiments/config2.mjs index 885f19e..bf2d151 100644 --- a/experiments/config2.mjs +++ b/experiments/config2.mjs @@ -1,4 +1,4 @@ -import * as F from '@efforting.tech/data/field-configuration-factories'; +import * as F from '@efforting.tech/schema/field-configuration-factories'; const s = new F.Schema({ foo: F.value(123, 'The value'), diff --git a/experiments/generic-parser-1.mjs b/experiments/generic-parser-1.mjs new file mode 100644 index 0000000..034addd --- /dev/null +++ b/experiments/generic-parser-1.mjs @@ -0,0 +1,156 @@ +import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch'; +import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities'; +import { String_Keyed_Stack } from '@efforting.tech/data/stack'; +import { assign_defined } from '@efforting.tech/data/object-utilities'; +import * as F from '@efforting.tech/schema/field-configuration-factories'; +import { inspect } from 'node:util'; + + +class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule { + constructor(pattern, action, identifier=undefined) { + super(pattern, identifier); + Object.assign(this, { action }); + } +} + + +const Parser_State = new F.Schema({ + + position: F.value(0, 'Pending position in source'), + value: F.factory(() => [], 'Pending value to return'), + sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'), + tokenizer: F.value(null, 'Current tokenizer'), + context: F.value(null, 'User supplied context'), + +}, 'Parser state'); + + + +class Parser { + constructor(source, state=undefined) { + state = Parser_State.load(state); + const token_generator = new Switchable_Iterator(); + const stack = new String_Keyed_Stack(state); + Object.assign(this, { source, state, stack, token_generator }); + this.switch_to(); + } + + + switch_to(tokenizer=undefined, position=undefined) { + assign_defined(this.state, { tokenizer, position }); + this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position)); + } + + parse(handler=undefined) { + + for (const match of this.token_generator) { + const { action } = match.rule; + if (!action) { continue; } + if (typeof action !== 'function') { + console.log('NOT IMPLEMENTED', match.rule.action); + continue; + } + + this.state.position = match.pending_index; + this.state.match = match; + action(this, match); + + } + + if (handler) { + this.state.match = null; //TODO: Decide if we should reset match here or not + handler(this, this.state.value); + return this.state.value; + } else { + return this.state.value; + } + + + } + + push_token(...tokens) { + this.state.value.push(...tokens); + } + + replace_value(value) { + this.state.value = value; + } + + enter_sub_tokenizer(tokenizer=undefined, handler=undefined) { + this.stack.push_defined({ tokenizer, value: [] }); + if (handler) { + this.state.sub_tokenizer_handlers.push(handler); + } + this.switch_to(tokenizer); + } + + leave_sub_tokenizer() { + const frame = this.stack.pop(true); + const { sub_tokenizer_handlers } = this.state; + + if (sub_tokenizer_handlers.length) { + const handler = sub_tokenizer_handlers.pop(); + this.state.match = null; //TODO: Decide if we should reset match here or not + handler(this, frame.value); + } else { + this.push_token(frame.value); + } + this.switch_to(); + } + +} + + + +const text = 'Hello World (how are you (doing)) I may ask'; + +const rt = new RegExp_Tokenizer(); +rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token(match.value), 'word')); +rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'space')); +rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape('('), (tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined, (tokenizer, value) => tokenizer.push_token({kind: 'sub expression', value}) ), 'lpar')); +rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match) => tokenizer.leave_sub_tokenizer(), 'rpar')); + +const p = new Parser(text, { tokenizer: rt }); + + +console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null })); + +/* + +{ + kind: 'parsing result', + value: [ + 'Hello', + 'World', + { + kind: 'sub expression', + value: [ + 'how', + 'are', + 'you', + { kind: 'sub expression', value: [ 'doing' ] } + ] + }, + 'I', + 'may', + 'ask' + ] +} + + + +*/ + + + +process.exit() + + + + + + + +for (const m of rt.iter_matches(text)) { + console.log({identifier: m.identifier, value: m.value }); +}; \ No newline at end of file diff --git a/experiments/reduction-scanner-2.mjs b/experiments/reduction-scanner-2.mjs new file mode 100644 index 0000000..3adbc78 --- /dev/null +++ b/experiments/reduction-scanner-2.mjs @@ -0,0 +1,76 @@ +import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner'; +import * as R from '@efforting.tech/rule-processing/rules'; + +import { inspect } from 'node:util'; + + + +class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function + constructor(condition, handler) { + Object.assign(this, { condition, handler }); + } + + get match() { + return this.condition.match.bind(this.condition); + } + + get action() { + return this.handler; + } + +} + + + +function sequence_rule(sequence, transform_fn) { + return new Rule( + new R.Sequence_Condition(sequence), + (rs, sequence, match) => { + const MS = match.match_start; + const ME = match.match_end; + sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1))); + } + ); +} + + +const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' ); +const ASTERISK = new R.Strict_Equality('*'); +const PLUS = new R.Strict_Equality('+'); +const HAT = new R.Strict_Equality('^'); + + +const rss = Reduction_Settings.load({ + // Switching this on or off affects whether add comes before mul or not + //reduction_order: 'POSITION_MAJOR', +}); +const rs = new Reduction_Scanner(rss); + + +rss.rules.push( + + sequence_rule([N, HAT, N], (left, op, right) => ({ type: 'BINOP', op: 'HAT', operands: [left, right]})), + sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})), + sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})), + +); + +const arr = [10, '^', 5, '+', 20, '*', 30]; +console.log(inspect(rs.transform(arr), { colors: true, depth: null })); + +/* OUTPUT + + +[ + { + type: 'BINOP', + op: 'PLUS', + operands: [ + { type: 'BINOP', op: 'HAT', operands: [ 10, 5 ] }, + { type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] } + ] + } +] + + +*/ diff --git a/source/data/iteration-utilities.mjs b/source/data/iteration-utilities.mjs new file mode 100644 index 0000000..4ada01d --- /dev/null +++ b/source/data/iteration-utilities.mjs @@ -0,0 +1,31 @@ +export class Switchable_Iterator { + constructor(iterator=null, stack=[]) { + Object.assign(this, { iterator, stack }); + } + + push(iterator) { + this.stack.push(this.iterator); + this.switch_to(iterator); + } + + pop(iterator) { + this.switch_to(this.stack.pop()); + } + + switch_to(iterator) { + this.iterator = iterator; + } + + next() { + return this.iterator.next(); + } + + peek() { + return this.iterator.peek(); + } + + [Symbol.iterator]() { + return this; + } + +} diff --git a/source/data/object-utilities.mjs b/source/data/object-utilities.mjs new file mode 100644 index 0000000..bcc6b9b --- /dev/null +++ b/source/data/object-utilities.mjs @@ -0,0 +1,9 @@ + +export function assign_defined(target, source) { + Object.assign(target, Object.fromEntries(Object.entries(source).filter(([k ,v]) => v !== undefined ))); +} + + +export function assign_using_predicate(target, source, kv_predicate) { + Object.assign(target, Object.fromEntries(Object.entries(source).filter(kv_predicate))); // Call predicate with ([k, v]) +} diff --git a/source/data/stack.mjs b/source/data/stack.mjs new file mode 100644 index 0000000..c88cb3d --- /dev/null +++ b/source/data/stack.mjs @@ -0,0 +1,54 @@ +export const DELETE_PROPERTY = Symbol('DELETE_PROPERTY'); + + +export class String_Keyed_Stack { + constructor(target={}, stack=[]) { + Object.assign(this, { target, stack }); + } + + push(updates={}) { + const frame = {} + this.stack.push(frame); + for (const [key, value] of Object.entries(updates)) { + + if (key in this.target) { + frame[key] = this.target[key]; + } else { + frame[key] = DELETE_PROPERTY; + } + + if (value === DELETE_PROPERTY) { + delete this.target[key]; + } else { + this.target[key] = value; + } + } + return frame; + } + + push_defined(updates={}) { + this.push(Object.fromEntries(Object.entries(updates).filter(([k ,v]) => v !== undefined ))); + } + + pop(copy_previous_state=false) { + const frame = this.stack.pop(); + const { target } = this; + + const return_value = copy_previous_state ? { ...target } : null; + + for (const [key, value] of Object.entries(frame)) { + if (value === DELETE_PROPERTY) { + delete target[key]; + } else { + target[key] = value; + } + } + + return return_value; + } + + get top_reverse_delta() { + return this.stack.at(-1); + } + +} \ No newline at end of file diff --git a/source/parsing/regexp-dispatch.mjs b/source/parsing/regexp-dispatch.mjs index 31ba098..ceac4bd 100644 --- a/source/parsing/regexp-dispatch.mjs +++ b/source/parsing/regexp-dispatch.mjs @@ -70,12 +70,15 @@ export class Abstract_RegExp_Token_Rule { } export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule { - constructor(pattern, identifier=this) { + constructor(pattern, identifier=undefined) { super(pattern); - Object.assign(this, { identifier }); + Object.assign(this, { identifier: identifier ?? this }); } } +// Note: There is no clean built in way to set an end position of a RegExp pattern, the only generic way is to slice the string we match before. +// We may at some point implement support for this (and it would only be done if end position was given) + export class RegExp_Tokenizer { constructor(rules=[], default_action=undefined) { Object.assign(this, { rules, default_action });