From 60e1ad57cb962d0568aa3a3450c786ffa828a883 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikael=20L=C3=B6vqvist?= <mikael.lovqvist.mail@gmail.com>
Date: Sun, 3 May 2026 12:09:15 +0200
Subject: [PATCH] Updated stale imports. Started experiment with parsing
 system. Got reduction scanner feature ready for use. Added several data
 utilities.

---
 experiments/config1.mjs             |   2 +-
 experiments/config2.mjs             |   2 +-
 experiments/generic-parser-1.mjs    | 156 ++++++++++++++++++++++++++++
 experiments/reduction-scanner-2.mjs |  76 ++++++++++++++
 source/data/iteration-utilities.mjs |  31 ++++++
 source/data/object-utilities.mjs    |   9 ++
 source/data/stack.mjs               |  54 ++++++++++
 source/parsing/regexp-dispatch.mjs  |   7 +-
 8 files changed, 333 insertions(+), 4 deletions(-)
 create mode 100644 experiments/generic-parser-1.mjs
 create mode 100644 experiments/reduction-scanner-2.mjs
 create mode 100644 source/data/iteration-utilities.mjs
 create mode 100644 source/data/object-utilities.mjs
 create mode 100644 source/data/stack.mjs

diff --git a/experiments/config1.mjs b/experiments/config1.mjs
index 8a93cd5..e078fd4 100644
--- a/experiments/config1.mjs
+++ b/experiments/config1.mjs
@@ -1,4 +1,4 @@
-import { Schema, Field_Configuration  } from '@efforting.tech/data/field-configuration';
+import { Schema, Field_Configuration  } from '@efforting.tech/schema/field-configuration';
 
 
 function mandatory_anything(value) {
diff --git a/experiments/config2.mjs b/experiments/config2.mjs
index 885f19e..bf2d151 100644
--- a/experiments/config2.mjs
+++ b/experiments/config2.mjs
@@ -1,4 +1,4 @@
-import * as F from '@efforting.tech/data/field-configuration-factories';
+import * as F from '@efforting.tech/schema/field-configuration-factories';
 
 const s = new F.Schema({
 	foo: F.value(123, 'The value'),
diff --git a/experiments/generic-parser-1.mjs b/experiments/generic-parser-1.mjs
new file mode 100644
index 0000000..034addd
--- /dev/null
+++ b/experiments/generic-parser-1.mjs
@@ -0,0 +1,156 @@
+import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
+import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
+import { String_Keyed_Stack } from '@efforting.tech/data/stack';
+import { assign_defined } from '@efforting.tech/data/object-utilities';
+import * as F from '@efforting.tech/schema/field-configuration-factories';
+import { inspect } from 'node:util';
+
+
+class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
+	constructor(pattern, action, identifier=undefined) {
+		super(pattern, identifier);
+		Object.assign(this, { action });
+	}
+}
+
+
+const Parser_State = new F.Schema({
+
+	position: F.value(0, 'Pending position in source'),
+	value: F.factory(() => [], 'Pending value to return'),
+	sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
+	tokenizer: F.value(null, 'Current tokenizer'),
+	context: F.value(null, 'User supplied context'),
+
+}, 'Parser state');
+
+
+
+class Parser {
+	constructor(source, state=undefined) {
+		state = Parser_State.load(state);
+		const token_generator = new Switchable_Iterator();
+		const stack = new String_Keyed_Stack(state);
+		Object.assign(this, { source, state, stack, token_generator });
+		this.switch_to();
+	}
+
+
+	switch_to(tokenizer=undefined, position=undefined) {
+		assign_defined(this.state, { tokenizer, position });
+		this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
+	}
+
+	parse(handler=undefined) {
+
+		for (const match of this.token_generator) {
+			const { action } = match.rule;
+			if (!action) { continue; }
+			if (typeof action !== 'function') {
+				console.log('NOT IMPLEMENTED', match.rule.action);
+				continue;
+			}
+
+			this.state.position = match.pending_index;
+			this.state.match = match;
+			action(this, match);
+
+		}
+
+		if (handler) {
+			this.state.match = null;	//TODO: Decide if we should reset match here or not
+			handler(this, this.state.value);
+			return this.state.value;
+		} else {
+			return this.state.value;
+		}
+
+
+	}
+
+	push_token(...tokens) {
+		this.state.value.push(...tokens);
+	}
+
+	replace_value(value) {
+		this.state.value = value;
+	}
+
+	enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
+		this.stack.push_defined({ tokenizer, value: [] });
+		if (handler) {
+			this.state.sub_tokenizer_handlers.push(handler);
+		}
+		this.switch_to(tokenizer);
+	}
+
+	leave_sub_tokenizer() {
+		const frame = this.stack.pop(true);
+		const { sub_tokenizer_handlers } = this.state;
+
+		if (sub_tokenizer_handlers.length) {
+			const handler = sub_tokenizer_handlers.pop();
+			this.state.match = null;	//TODO: Decide if we should reset match here or not
+			handler(this, frame.value);
+		} else {
+			this.push_token(frame.value);
+		}
+		this.switch_to();
+	}
+
+}
+
+
+
+const text = 'Hello World (how are you (doing)) I may ask';
+
+const rt = new RegExp_Tokenizer();
+rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token(match.value), 'word'));
+rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'space'));
+rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape('('), (tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined, (tokenizer, value) => tokenizer.push_token({kind: 'sub expression', value}) ), 'lpar'));
+rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match) => tokenizer.leave_sub_tokenizer(), 'rpar'));
+
+const p = new Parser(text, { tokenizer: rt });
+
+
+console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null }));
+
+/*
+
+{
+  kind: 'parsing result',
+  value: [
+    'Hello',
+    'World',
+    {
+      kind: 'sub expression',
+      value: [
+        'how',
+        'are',
+        'you',
+        { kind: 'sub expression', value: [ 'doing' ] }
+      ]
+    },
+    'I',
+    'may',
+    'ask'
+  ]
+}
+
+
+
+*/
+
+
+
+process.exit()
+
+
+
+
+
+
+
+for (const m of rt.iter_matches(text)) {
+	console.log({identifier: m.identifier, value: m.value });
+};
\ No newline at end of file
diff --git a/experiments/reduction-scanner-2.mjs b/experiments/reduction-scanner-2.mjs
new file mode 100644
index 0000000..3adbc78
--- /dev/null
+++ b/experiments/reduction-scanner-2.mjs
@@ -0,0 +1,76 @@
+import { Reduction_Scanner, Reduction_Settings } from  '@efforting.tech/rule-processing/reduction-scanner';
+import * as R from  '@efforting.tech/rule-processing/rules';
+
+import { inspect } from 'node:util';
+
+
+
+class Rule {	//NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
+	constructor(condition, handler) {
+		Object.assign(this, { condition, handler });
+	}
+
+	get match() {
+		return this.condition.match.bind(this.condition);
+	}
+
+	get action() {
+		return this.handler;
+	}
+
+}
+
+
+
+function sequence_rule(sequence, transform_fn) {
+	return new Rule(
+		new R.Sequence_Condition(sequence),
+		(rs, sequence, match) => {
+			const MS = match.match_start;
+			const ME = match.match_end;
+			sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1)));
+		}
+	);
+}
+
+
+const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
+const ASTERISK = new R.Strict_Equality('*');
+const PLUS = new R.Strict_Equality('+');
+const HAT = new R.Strict_Equality('^');
+
+
+const rss = Reduction_Settings.load({
+	// Switching this on or off affects whether add comes before mul or not
+	//reduction_order: 'POSITION_MAJOR',
+});
+const rs = new Reduction_Scanner(rss);
+
+
+rss.rules.push(
+
+	sequence_rule([N, HAT, N], 		(left, op, right) => ({ type: 'BINOP', op: 'HAT', operands: [left, right]})),
+	sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})),
+	sequence_rule([N, PLUS, N], 	(left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})),
+
+);
+
+const arr = [10, '^', 5, '+', 20, '*', 30];
+console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
+
+/* OUTPUT
+
+
+[
+  {
+    type: 'BINOP',
+    op: 'PLUS',
+    operands: [
+      { type: 'BINOP', op: 'HAT', operands: [ 10, 5 ] },
+      { type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] }
+    ]
+  }
+]
+
+
+*/
diff --git a/source/data/iteration-utilities.mjs b/source/data/iteration-utilities.mjs
new file mode 100644
index 0000000..4ada01d
--- /dev/null
+++ b/source/data/iteration-utilities.mjs
@@ -0,0 +1,31 @@
+export class Switchable_Iterator {
+	constructor(iterator=null, stack=[]) {
+		Object.assign(this, { iterator, stack });
+	}
+
+	push(iterator) {
+		this.stack.push(this.iterator);
+		this.switch_to(iterator);
+	}
+
+	pop(iterator) {
+		this.switch_to(this.stack.pop());
+	}
+
+	switch_to(iterator) {
+		this.iterator = iterator;
+	}
+
+	next() {
+		return this.iterator.next();
+	}
+
+	peek() {
+		return this.iterator.peek();
+	}
+
+	[Symbol.iterator]() {
+		return this;
+	}
+
+}
diff --git a/source/data/object-utilities.mjs b/source/data/object-utilities.mjs
new file mode 100644
index 0000000..bcc6b9b
--- /dev/null
+++ b/source/data/object-utilities.mjs
@@ -0,0 +1,9 @@
+
+export function assign_defined(target, source) {
+	Object.assign(target, Object.fromEntries(Object.entries(source).filter(([k ,v]) => v !== undefined )));
+}
+
+
+export function assign_using_predicate(target, source, kv_predicate) {
+	Object.assign(target, Object.fromEntries(Object.entries(source).filter(kv_predicate)));	// Call predicate with ([k, v])
+}
diff --git a/source/data/stack.mjs b/source/data/stack.mjs
new file mode 100644
index 0000000..c88cb3d
--- /dev/null
+++ b/source/data/stack.mjs
@@ -0,0 +1,54 @@
+export const DELETE_PROPERTY = Symbol('DELETE_PROPERTY');
+
+
+export class String_Keyed_Stack {
+	constructor(target={}, stack=[]) {
+		Object.assign(this, { target, stack });
+	}
+
+	push(updates={}) {
+		const frame = {}
+		this.stack.push(frame);
+		for (const [key, value] of Object.entries(updates)) {
+
+			if (key in this.target) {
+				frame[key] = this.target[key];
+			} else {
+				frame[key] = DELETE_PROPERTY;
+			}
+
+			if (value === DELETE_PROPERTY) {
+				delete this.target[key];
+			} else {
+				this.target[key] = value;
+			}
+		}
+		return frame;
+	}
+
+	push_defined(updates={}) {
+		this.push(Object.fromEntries(Object.entries(updates).filter(([k ,v]) => v !== undefined )));
+	}
+
+	pop(copy_previous_state=false) {
+		const frame = this.stack.pop();
+		const { target } = this;
+
+		const return_value = copy_previous_state ? { ...target } : null;
+
+		for (const [key, value] of Object.entries(frame)) {
+			if (value === DELETE_PROPERTY) {
+				delete target[key];
+			} else {
+				target[key] = value;
+			}
+		}
+
+		return return_value;
+	}
+
+	get top_reverse_delta() {
+		return this.stack.at(-1);
+	}
+
+}
\ No newline at end of file
diff --git a/source/parsing/regexp-dispatch.mjs b/source/parsing/regexp-dispatch.mjs
index 31ba098..ceac4bd 100644
--- a/source/parsing/regexp-dispatch.mjs
+++ b/source/parsing/regexp-dispatch.mjs
@@ -70,12 +70,15 @@ export class Abstract_RegExp_Token_Rule {
 }
 
 export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
-	constructor(pattern, identifier=this) {
+	constructor(pattern, identifier=undefined) {
 		super(pattern);
-		Object.assign(this, { identifier });
+		Object.assign(this, { identifier: identifier ?? this });
 	}
 }
 
+// Note: There is no clean built in way to set an end position of a RegExp pattern, the only generic way is to slice the string we match before.
+//		 We may at some point implement support for this (and it would only be done if end position was given)
+
 export class RegExp_Tokenizer {
 	constructor(rules=[], default_action=undefined) {
 		Object.assign(this, { rules, default_action });