From 4ac01cbc9a3cab453707aacfef2b5e00350f224f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20L=C3=B6vqvist?= Date: Wed, 29 Apr 2026 03:25:26 +0200 Subject: [PATCH] Implemented rudimentary reduction scanner --- experiments/package.json | 3 + experiments/reduction-scanner-1.mjs | 105 ++++++++++++++++- package-manifest.yaml | 7 ++ package.json | 5 + source/data/string-utilities.mjs | 2 +- source/rule-processing/reduction-scanner.mjs | 40 ++++--- source/rule-processing/resolvers.mjs | 2 + source/rule-processing/rules.mjs | 108 ++++++++++++++++++ source/rule-processing/state-serializer.mjs | 2 +- .../field-configuration-factories.mjs | 0 .../{data => schema}/field-configuration.mjs | 0 source/text/basic-tree.mjs | 5 +- tools/stage-for-npm.mjs | 6 +- 13 files changed, 259 insertions(+), 26 deletions(-) create mode 100644 package.json create mode 100644 source/rule-processing/rules.mjs rename source/{data => schema}/field-configuration-factories.mjs (100%) rename source/{data => schema}/field-configuration.mjs (100%) diff --git a/experiments/package.json b/experiments/package.json index 346de98..f866f04 100644 --- a/experiments/package.json +++ b/experiments/package.json @@ -1,4 +1,7 @@ { + "name": "@efforting.tech/experiments", + "version": "0.0.1", + "private": true, "devDependencies": { "tree-sitter": "^0.25.0", "tree-sitter-javascript": "^0.25.0" diff --git a/experiments/reduction-scanner-1.mjs b/experiments/reduction-scanner-1.mjs index 3bb1ac9..8235650 100644 --- a/experiments/reduction-scanner-1.mjs +++ b/experiments/reduction-scanner-1.mjs @@ -1,14 +1,109 @@ import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner'; -//import { Sub_Sequence_Rule } from '@efforting.tech/rule-processing/rules'; +import * as R from '@efforting.tech/rule-processing/rules'; + +import { inspect } from 'node:util'; + +/* +Here's what needs addressing: + +**`perform_reduction` refactor** +- Remove the `start_index` loop — scanning is the condition's responsibility +- RULE_MAJOR: iterate rules, call `rule.match(sequence, context)`, apply first that returns a match +- POSITION_MAJOR: collect matches from all rules, apply the one with lowest `start_index` in result + +**`Sequence_Condition.match` implementation** +- Iterate positions internally +- Return match result with `start_index`, `end_index`, captures +- Return null if no match found anywhere + +**Rule interface** +- `rule.match(sequence, context)` → match result or null +- `rule.action(scanner, sequence, match)` → performs the transformation +- Decide: forwarding getters, bind in constructor, or scanner calls `rule.condition.match` directly + +**Match result shape** +- `{ rule, sequence, start_index, end_index, captures, ...extra_info }` +- `captures` lazily evaluated via getter + +**Normalization** +- Decide when rules get normalized/compiled (construction, first transform, explicit `prepare()`) +- Normalize bare functions to condition objects at that point + +**`context` shape** +- What does the scanner inject into context beyond `start_index`/`end_index`? +- How does `extra_info` from resolver flow through to condition match? +*/ -const rss = Reduction_Settings.load(); +class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function + constructor(condition, handler) { + Object.assign(this, { condition, handler }); + } + + get match() { + return this.condition.match.bind(this.condition); + } + + get action() { + return this.handler; + } + +} + + + + +const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' ); +const ASTERISK = new R.Strict_Equality('*'); +const PLUS = new R.Strict_Equality('+'); + + +const rss = Reduction_Settings.load({ + // Switching this on or off affects whether add comes before mul or not + //reduction_order: 'POSITION_MAJOR', +}); const rs = new Reduction_Scanner(rss); -rss.rules.push(); +rss.rules.push( + new Rule( + new R.Sequence_Condition([N, ASTERISK, N]), + (rs, sequence, match) => { + const MS = match.match_start; + const ME = match.match_end; + sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'MUL', operands: [sequence[MS], sequence[ME]]}); + } + ), + + new Rule( + new R.Sequence_Condition([N, PLUS, N]), + (rs, sequence, match) => { + const MS = match.match_start; + const ME = match.match_end; + sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'ADD', operands: [sequence[MS], sequence[ME]]}); + }, + ), + +); const arr = [10, '+', 20, '*', 30]; -console.log(rs.perform_reduction(arr)); -console.log(arr); \ No newline at end of file +console.log(inspect(rs.transform(arr), { colors: true, depth: null })); + +/* OUTPUT + +[ + { + type: 'BINOP', + op: 'ADD', + operands: [ 10, { type: 'BINOP', op: 'MUL', operands: [ 20, 30 ] } ] + } +] + + +*/ + + +// These are for testing conditions without reduction +// const sc = new R.Sequence_Condition([N, PLUS, N]); +// console.log(sc.match([10, '+', 20, '*', 30])); diff --git a/package-manifest.yaml b/package-manifest.yaml index 18c7b85..bae9e73 100644 --- a/package-manifest.yaml +++ b/package-manifest.yaml @@ -25,6 +25,13 @@ packages: path: source/data documentation: documentation/data description: Data management + internal-dependencies: + - schema + + schema: + path: source/schema + #documentation: documentation/schema + description: Schema system internal-dependencies: - errors diff --git a/package.json b/package.json new file mode 100644 index 0000000..3d4a025 --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "@efforting.tech/experiments": "file:experiments" + } +} diff --git a/source/data/string-utilities.mjs b/source/data/string-utilities.mjs index 9c59717..396ebe4 100644 --- a/source/data/string-utilities.mjs +++ b/source/data/string-utilities.mjs @@ -1,4 +1,4 @@ -import * as CF from '@efforting.tech/data/field-configuration-factories'; +import * as CF from '@efforting.tech/schema/field-configuration-factories'; export const Indention_Mode = new CF.symbol_set({ diff --git a/source/rule-processing/reduction-scanner.mjs b/source/rule-processing/reduction-scanner.mjs index b5d4a08..7167760 100644 --- a/source/rule-processing/reduction-scanner.mjs +++ b/source/rule-processing/reduction-scanner.mjs @@ -1,7 +1,5 @@ import { Reduction_Contract, FPR_Contract } from './contracts.mjs'; - - -import * as CF from '@efforting.tech/data/field-configuration-factories'; +import * as CF from '@efforting.tech/schema/field-configuration-factories'; export const Reduction_Order = new CF.symbol_set({ @@ -44,26 +42,38 @@ export class Reduction_Scanner { case Reduction_Order.symbols.RULE_MAJOR: for (const rule of settings.rules) { - for (let start_index=0; start_index < sequence.length; start_index++) { - const match = rule.match(sequence, start_index); - if (match) { - rule.action(this, sequence, match); - return true; - } + const match = rule.match(sequence); + if (match) { + //console.log('RULE_MAJOR', match); + rule.action(this, sequence, match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces + return true; } } return false; case Reduction_Order.symbols.POSITION_MAJOR: - for (let start_index=0; start_index < sequence.length; start_index++) { - for (const rule of settings.rules) { - const match = rule.match(sequence, start_index); - if (match) { - rule.action(this, sequence, match); - return true; + + let best_match, best_rule; + + for (const rule of settings.rules) { + const match = rule.match(sequence); + + if (match) { + if (!best_match || best_match.match_start > match.match_start) { + //TODO - early return if start of sequence + best_match = match; + best_rule = rule; } } } + + if (best_match) { + //console.log('POSITION_MAJOR', best_match) + best_rule.action(this, sequence, best_match); //TODO: should rule.action be able to add additional checks? Though that probably dillutes responsibility and blurs interfaces + return true; + } + + return false; default: throw new Error(`Unknown reduction order: ${this.reduction_order}`); //TODO: Force invalid configuration error diff --git a/source/rule-processing/resolvers.mjs b/source/rule-processing/resolvers.mjs index cba1d76..03d988f 100644 --- a/source/rule-processing/resolvers.mjs +++ b/source/rule-processing/resolvers.mjs @@ -1,5 +1,7 @@ import { Item_Unresolvable } from '@efforting.tech/errors'; +//TODO: Should this be integrated with rules.mjs for predicates? Possibly a normalization step during rule insertion. + export class Abstract_Resolver { resolve(item, extra_info={}) { const result = this.resolve_handler(item, extra_info); diff --git a/source/rule-processing/rules.mjs b/source/rule-processing/rules.mjs new file mode 100644 index 0000000..a811bdd --- /dev/null +++ b/source/rule-processing/rules.mjs @@ -0,0 +1,108 @@ + +const ABORT_SEQUENCE = Symbol('ABORT_SEQUENCE'); + +export class Abstract_Item_Condition { + match(sequence, context={}) { + return this.match_value(sequence[context.start_index ?? 0], context); + } + +} + +export class Abstract_Sequence_Condition { +} + +export class Match { + constructor(rule, value, context) { + Object.assign(this, { rule, value, ...context }); + } +} + +export class Predicate extends Abstract_Item_Condition { + constructor(predicate) { + super(); + Object.assign(this, { predicate }); + } + + match_value(item, context={}) { + if (this.predicate(item, context)) { + const si = context.start_index ?? 0; + return new Match(this, item, { ...context, match_start: si, match_end: si }); + }; + } + +} + +export class Type_Is extends Abstract_Item_Condition { + constructor(type) { + super(); + Object.assign(this, { type }); + } + + match_value(item, context={}) { + if (typeof item === this.type) { + const si = context.start_index ?? 0; + return new Match(this, item, { ...context, match_start: si, match_end: si }); + }; + } + +} + +export class Strict_Equality extends Abstract_Item_Condition { + constructor(value) { + super(); + Object.assign(this, { value }); + } + + match_value(item, context={}) { + if (item === this.value) { + const si = context.start_index ?? 0; + return new Match(this, item, { ...context, match_start: si, match_end: si }); + } + } + +} + + +export class Sequence_Condition extends Abstract_Sequence_Condition { + constructor(sequence) { + super(); + Object.assign(this, { sequence }); + } + + match(sequence, context={}) { + //TODO: For anchors we need anchor_start_index and anchor_end_index (compare with regexp ^ and $) + const start_index = context.start_index ?? 0; + const end_index = context.end_index ?? sequence.length - 1; + + const match_from = (sequence_index, pattern_index=0) => { + + if (pattern_index === this.sequence.length) { + return []; + } + + //TODO - There are plenty of optimizations to be implemented here but we must be suer they are correct - we will start naively + const sub_condition = this.sequence[pattern_index]; + const sub_match = sub_condition.match(sequence, { ...context, start_index: sequence_index }); + + //console.log('match_from', {sequence_index, pattern_index, sub_condition, sub_match}) + + if (sub_match) { + const remaining = match_from(sub_match.match_end + 1, pattern_index + 1); + if (remaining) { + return [sub_match, ...remaining]; + } + } + + } + + for (let i=start_index; i<=end_index; i++) { + const m = match_from(i); + if (m) { + // NOTE: If result is empty array which can be a positive match, match_start and match_end will be undefined which is by design + return new Match(this, m, { match_start: m.at(0)?.match_start, match_end: m.at(-1)?.match_end, ...context}); + } + } + + } + +} \ No newline at end of file diff --git a/source/rule-processing/state-serializer.mjs b/source/rule-processing/state-serializer.mjs index 42fcba1..3065194 100644 --- a/source/rule-processing/state-serializer.mjs +++ b/source/rule-processing/state-serializer.mjs @@ -1,4 +1,4 @@ -import * as CF from '@efforting.tech/data/field-configuration-factories'; +import * as CF from '@efforting.tech/schema/field-configuration-factories'; export const Object_Serialization_Strategy = new CF.Schema({ diff --git a/source/data/field-configuration-factories.mjs b/source/schema/field-configuration-factories.mjs similarity index 100% rename from source/data/field-configuration-factories.mjs rename to source/schema/field-configuration-factories.mjs diff --git a/source/data/field-configuration.mjs b/source/schema/field-configuration.mjs similarity index 100% rename from source/data/field-configuration.mjs rename to source/schema/field-configuration.mjs diff --git a/source/text/basic-tree.mjs b/source/text/basic-tree.mjs index aee95d9..160d438 100644 --- a/source/text/basic-tree.mjs +++ b/source/text/basic-tree.mjs @@ -1,6 +1,5 @@ -import { string_has_contents, indented_line_iterator } from '@efforting.tech/data/string-utilities'; -import * as CF from '@efforting.tech/data/field-configuration-factories'; -import { Text_Settings } from '@efforting.tech/data/string-utilities'; +import { Text_Settings, string_has_contents, indented_line_iterator } from '@efforting.tech/data/string-utilities'; +import * as CF from '@efforting.tech/schema/field-configuration-factories'; export const Text_Tree_Settings = new CF.Schema({ //BUG - there is currently no way (I think) to put defaults into a sub schema - this should be fixed diff --git a/tools/stage-for-npm.mjs b/tools/stage-for-npm.mjs index 1b4aa2e..7a04c37 100644 --- a/tools/stage-for-npm.mjs +++ b/tools/stage-for-npm.mjs @@ -145,11 +145,15 @@ const publish_script = ( `${publish_script_lines.join('\n')}\n` ); +const repo_root = path.resolve(output_directory, '../..'); +const experiments_link_line = `mkdir -p "${repo_root}/experiments/node_modules"\nln -sf "../../build/packages" "${repo_root}/experiments/node_modules/@efforting.tech"`; + const dev_stage_script = ( '#!/usr/bin/env bash\n' + 'set -e\n' + `${dev_stage_mkdir_lines.join('\n')}\n` + - `${dev_stage_script_lines.join('\n')}\n` + `${dev_stage_script_lines.join('\n')}\n` + + experiments_link_line );