Implemented rudimentary reduction scanner

2026-04-29 03:25:26 +02:00
parent 0376aab672
commit 4ac01cbc9a
13 changed files with 259 additions and 26 deletions
--- a/experiments/package.json
+++ b/experiments/package.json
@@ -1,4 +1,7 @@
 {
+  "name": "@efforting.tech/experiments",
+  "version": "0.0.1",
+  "private": true,
  "devDependencies": {
    "tree-sitter": "^0.25.0",
    "tree-sitter-javascript": "^0.25.0"
--- a/experiments/reduction-scanner-1.mjs
+++ b/experiments/reduction-scanner-1.mjs
@@ -1,14 +1,109 @@
 import { Reduction_Scanner, Reduction_Settings } from  '@efforting.tech/rule-processing/reduction-scanner';
-//import { Sub_Sequence_Rule } from  '@efforting.tech/rule-processing/rules';
+import * as R from  '@efforting.tech/rule-processing/rules';
+
+import { inspect } from 'node:util';
+
+/*
+Here's what needs addressing:
+
+**`perform_reduction` refactor**
+- Remove the `start_index` loop — scanning is the condition's responsibility
+- RULE_MAJOR: iterate rules, call `rule.match(sequence, context)`, apply first that returns a match
+- POSITION_MAJOR: collect matches from all rules, apply the one with lowest `start_index` in result
+
+**`Sequence_Condition.match` implementation**
+- Iterate positions internally
+- Return match result with `start_index`, `end_index`, captures
+- Return null if no match found anywhere
+
+**Rule interface**
+- `rule.match(sequence, context)` → match result or null
+- `rule.action(scanner, sequence, match)` → performs the transformation
+- Decide: forwarding getters, bind in constructor, or scanner calls `rule.condition.match` directly
+
+**Match result shape**
+- `{ rule, sequence, start_index, end_index, captures, ...extra_info }`
+- `captures` lazily evaluated via getter
+
+**Normalization**
+- Decide when rules get normalized/compiled (construction, first transform, explicit `prepare()`)
+- Normalize bare functions to condition objects at that point
+
+**`context` shape**
+- What does the scanner inject into context beyond `start_index`/`end_index`?
+- How does `extra_info` from resolver flow through to condition match?
+*/


-const rss = Reduction_Settings.load();

+class Rule {	//NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
+	constructor(condition, handler) {
+		Object.assign(this, { condition, handler });
+	}
+
+	get match() {
+		return this.condition.match.bind(this.condition);
+	}
+
+	get action() {
+		return this.handler;
+	}
+
+}
+
+
+
+
+const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
+const ASTERISK = new R.Strict_Equality('*');
+const PLUS = new R.Strict_Equality('+');
+
+
+const rss = Reduction_Settings.load({
+	// Switching this on or off affects whether add comes before mul or not
+	//reduction_order: 'POSITION_MAJOR',
+});
 const rs = new Reduction_Scanner(rss);

-rss.rules.push();

+rss.rules.push(
+	new Rule(
+		new R.Sequence_Condition([N, ASTERISK, N]),
+		(rs, sequence, match) => {
+			const MS = match.match_start;
+			const ME = match.match_end;
+			sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'MUL', operands: [sequence[MS], sequence[ME]]});
+		}
+	),
+
+	new Rule(
+		new R.Sequence_Condition([N, PLUS, N]),
+		(rs, sequence, match) => {
+			const MS = match.match_start;
+			const ME = match.match_end;
+			sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'ADD', operands: [sequence[MS], sequence[ME]]});
+		},
+	),
+
+);

 const arr = [10, '+', 20, '*', 30];
-console.log(rs.perform_reduction(arr));
-console.log(arr);
+console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
+
+/* OUTPUT
+
+[
+  {
+    type: 'BINOP',
+    op: 'ADD',
+    operands: [ 10, { type: 'BINOP', op: 'MUL', operands: [ 20, 30 ] } ]
+  }
+]
+
+
+*/
+
+
+// These are for testing conditions without reduction
+// const sc = new R.Sequence_Condition([N, PLUS, N]);
+// console.log(sc.match([10, '+', 20, '*', 30]));
--- a/package-manifest.yaml
+++ b/package-manifest.yaml
@@ -25,6 +25,13 @@ packages:
    path: source/data
    documentation: documentation/data
    description: Data management
+    internal-dependencies:
+      - schema
+
+  schema:
+    path: source/schema
+    #documentation: documentation/schema
+    description: Schema system
    internal-dependencies:
      - errors

--- a/package.json
+++ b/package.json
@@ -0,0 +1,5 @@
+{
+  "dependencies": {
+    "@efforting.tech/experiments": "file:experiments"
+  }
+}
--- a/source/data/string-utilities.mjs
+++ b/source/data/string-utilities.mjs
@@ -1,4 +1,4 @@
-import * as CF from '@efforting.tech/data/field-configuration-factories';
+import * as CF from '@efforting.tech/schema/field-configuration-factories';


 export const Indention_Mode = new CF.symbol_set({
--- a/source/rule-processing/reduction-scanner.mjs
+++ b/source/rule-processing/reduction-scanner.mjs
@@ -1,7 +1,5 @@
 import { Reduction_Contract, FPR_Contract } from './contracts.mjs';
-
-
-import * as CF from '@efforting.tech/data/field-configuration-factories';
+import * as CF from '@efforting.tech/schema/field-configuration-factories';


 export const Reduction_Order = new CF.symbol_set({
@@ -44,26 +42,38 @@ export class Reduction_Scanner {

 			case Reduction_Order.symbols.RULE_MAJOR:
 				for (const rule of settings.rules) {
-					for (let start_index=0; start_index < sequence.length; start_index++) {
-						const match = rule.match(sequence, start_index);
+					const match = rule.match(sequence);
 					if (match) {
-							rule.action(this, sequence, match);
+						//console.log('RULE_MAJOR', match);
+						rule.action(this, sequence, match);	//TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces
 						return true;
 					}
 				}
-				}
 				return false;

 			case Reduction_Order.symbols.POSITION_MAJOR:
-				for (let start_index=0; start_index < sequence.length; start_index++) {
+
+				let best_match, best_rule;
+
 				for (const rule of settings.rules) {
-						const match = rule.match(sequence, start_index);
+					const match = rule.match(sequence);
+
 					if (match)  {
-							rule.action(this, sequence, match);
+						if (!best_match || best_match.match_start > match.match_start) {
+							//TODO - early return if start of sequence
+							best_match = match;
+							best_rule = rule;
+						}
+					}
+				}
+
+				if (best_match) {
+					//console.log('POSITION_MAJOR', best_match)
+					best_rule.action(this, sequence, best_match);	//TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces
 					return true;
 				}
-					}
-				}
+
+
 				return false;
 			default:
 				throw new Error(`Unknown reduction order: ${this.reduction_order}`);	//TODO: Force invalid configuration  error
--- a/source/rule-processing/resolvers.mjs
+++ b/source/rule-processing/resolvers.mjs
@@ -1,5 +1,7 @@
 import { Item_Unresolvable } from '@efforting.tech/errors';

+//TODO: Should this be integrated with rules.mjs for predicates? Possibly a normalization step during rule insertion.
+
 export class Abstract_Resolver {
 	resolve(item, extra_info={}) {
 		const result = this.resolve_handler(item, extra_info);
--- a/source/rule-processing/rules.mjs
+++ b/source/rule-processing/rules.mjs
@@ -0,0 +1,108 @@
+
+const ABORT_SEQUENCE = Symbol('ABORT_SEQUENCE');
+
+export class Abstract_Item_Condition {
+	match(sequence, context={}) {
+		return this.match_value(sequence[context.start_index ?? 0], context);
+	}
+
+}
+
+export class Abstract_Sequence_Condition {
+}
+
+export class Match {
+	constructor(rule, value, context) {
+		Object.assign(this, { rule, value, ...context });
+	}
+}
+
+export class Predicate extends Abstract_Item_Condition {
+	constructor(predicate) {
+		super();
+		Object.assign(this, { predicate });
+	}
+
+	match_value(item, context={}) {
+		if (this.predicate(item, context)) {
+			const si = context.start_index ?? 0;
+			return new Match(this, item, { ...context, match_start: si, match_end: si });
+		};
+	}
+
+}
+
+export class Type_Is extends Abstract_Item_Condition {
+	constructor(type) {
+		super();
+		Object.assign(this, { type });
+	}
+
+	match_value(item, context={}) {
+		if (typeof item === this.type) {
+			const si = context.start_index ?? 0;
+			return new Match(this, item, { ...context, match_start: si, match_end: si });
+		};
+	}
+
+}
+
+export class Strict_Equality extends Abstract_Item_Condition {
+	constructor(value) {
+		super();
+		Object.assign(this, { value });
+	}
+
+	match_value(item, context={}) {
+		if (item === this.value) {
+			const si = context.start_index ?? 0;
+			return new Match(this, item, { ...context, match_start: si, match_end: si });
+		}
+	}
+
+}
+
+
+export class Sequence_Condition extends Abstract_Sequence_Condition {
+	constructor(sequence) {
+		super();
+		Object.assign(this, { sequence });
+	}
+
+	match(sequence, context={}) {
+		//TODO: For anchors we need anchor_start_index and anchor_end_index (compare with regexp ^ and $)
+		const start_index = context.start_index ?? 0;
+		const end_index = context.end_index ?? sequence.length - 1;
+
+		const match_from = (sequence_index, pattern_index=0) => {
+
+			if (pattern_index === this.sequence.length) {
+				return [];
+			}
+
+			//TODO - There are plenty of optimizations to be implemented here but we must be suer they are correct - we will start naively
+			const sub_condition = this.sequence[pattern_index];
+			const sub_match = sub_condition.match(sequence, { ...context, start_index: sequence_index });
+
+			//console.log('match_from', {sequence_index, pattern_index, sub_condition, sub_match})
+
+			if (sub_match) {
+				const remaining = match_from(sub_match.match_end + 1, pattern_index + 1);
+				if (remaining) {
+					return [sub_match, ...remaining];
+				}
+			}
+
+		}
+
+		for (let i=start_index; i<=end_index; i++) {
+			const m = match_from(i);
+			if (m) {
+				// NOTE: If result is empty array which can be a positive match, match_start and match_end will be undefined which is by design
+				return new Match(this, m, { match_start: m.at(0)?.match_start, match_end: m.at(-1)?.match_end, ...context});
+			}
+		}
+
+	}
+
+}
--- a/source/rule-processing/state-serializer.mjs
+++ b/source/rule-processing/state-serializer.mjs
@@ -1,4 +1,4 @@
-import * as CF from '@efforting.tech/data/field-configuration-factories';
+import * as CF from '@efforting.tech/schema/field-configuration-factories';


 export const Object_Serialization_Strategy = new CF.Schema({
--- a/source/schema/field-configuration-factories.mjs
+++ b/source/schema/field-configuration-factories.mjs
--- a/source/schema/field-configuration.mjs
+++ b/source/schema/field-configuration.mjs
--- a/source/text/basic-tree.mjs
+++ b/source/text/basic-tree.mjs
@@ -1,6 +1,5 @@
-import { string_has_contents, indented_line_iterator } from  '@efforting.tech/data/string-utilities';
-import * as CF from '@efforting.tech/data/field-configuration-factories';
-import { Text_Settings } from  '@efforting.tech/data/string-utilities';
+import { Text_Settings, string_has_contents, indented_line_iterator } from  '@efforting.tech/data/string-utilities';
+import * as CF from '@efforting.tech/schema/field-configuration-factories';

 export const Text_Tree_Settings = new CF.Schema({
 	//BUG - there is currently no way (I think) to put defaults into a sub schema - this should be fixed
--- a/tools/stage-for-npm.mjs
+++ b/tools/stage-for-npm.mjs
@@ -145,11 +145,15 @@ const publish_script = (
 	`${publish_script_lines.join('\n')}\n`
 );

+const repo_root = path.resolve(output_directory, '../..');
+const experiments_link_line = `mkdir -p "${repo_root}/experiments/node_modules"\nln -sf "../../build/packages" "${repo_root}/experiments/node_modules/@efforting.tech"`;
+
 const dev_stage_script = (
 	'#!/usr/bin/env bash\n' +
 	'set -e\n' +
 	`${dev_stage_mkdir_lines.join('\n')}\n` +
-	`${dev_stage_script_lines.join('\n')}\n`
+	`${dev_stage_script_lines.join('\n')}\n` +
+	experiments_link_line
 );