Updated documentation, added tree-sitter experiment

Added error stub for regexp-dispatch
Dropped pnpm support in favor for npm
2026-04-17 00:50:00 +02:00 · 2026-04-17 00:49:05 +02:00 · 2026-04-15 22:32:10 +02:00 · 2026-04-15 22:25:34 +02:00 · 2026-04-12 19:45:47 +02:00
14 changed files with 390 additions and 14 deletions
--- a/7
+++ b/7
@@ -2,12 +2,15 @@
 build/packages:
 	mkdir -p $@
-	node tools/stage-for-pnpm.mjs package-manifest.yaml source $@
+	node tools/stage-for-npm.mjs package-manifest.yaml source $@
 publish:
 	cd build/packages && ./publish-all.sh
 dev:
 	build/packages/local-install.sh
 clean:
 	rm -rf build
-.PHONY: clean build/packages publish
+.PHONY: clean build/packages publish dev
--- a/experiments/package.json
+++ b/experiments/package.json
@@ -0,0 +1,6 @@
 {
  "devDependencies": {
    "tree-sitter": "^0.25.0",
    "tree-sitter-javascript": "^0.25.0"
  }
 }
--- a/experiments/regexp-tokenizer.mjs
+++ b/experiments/regexp-tokenizer.mjs
@@ -0,0 +1,15 @@
 import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
 const rt = new RegExp_Tokenizer();
 rt.add_rules(new RegExp_Token_Rule(/\w+/, 'word'));
 rt.set_default_identifier('random stuff');
 //console.log(rt.rules);
 //console.log(rt.closest_scanning_match('#Hello World!'));
 for (const m of rt.iter_matches('#Hello World!')) {
 	console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
 };
--- a/experiments/text-nodes-dispatch.mjs
+++ b/experiments/text-nodes-dispatch.mjs
@@ -3,6 +3,9 @@ import * as CF from '@efforting.tech/data/field-configuration-factories';
 import { inspect } from 'node:util';
 import { Text_Tree_Node, Text_Tree_Settings } from  '@efforting.tech/text/basic-tree';
 import { RegExp_Resolver } from  '@efforting.tech/rule-processing/resolvers';
 import { parse_csv } from  '@efforting.tech/data/string-utilities';
@@ -26,11 +29,11 @@ const root = Text_Tree_Node.from_string(ts, example_string);
 const d = new RegExp_Resolver([
 	[/^animals:\s*(.*)$/, (c) => console.log("ANIMAL", c)],
-	[/^trees:\s*(.*)$/, () => console.log("TREE")],
+	[/^trees:\s*(.*)$/, ({node, predicate_result}) => console.log(`TREE of node at line ${node.line_no}:`, parse_csv(predicate_result[1]))],
 ]);
 for (const child of root.children) {
 	if (child.has_line) {
-		console.log(child.line, d.resolve(child.line));
+		console.log(child.line, d.resolve(child.line, { node: child }));
 	}
 }
--- a/experiments/tree-sitter-1.mjs
+++ b/experiments/tree-sitter-1.mjs
@@ -0,0 +1,52 @@
 import Parser from 'tree-sitter';
 import JavaScript from 'tree-sitter-javascript';
 import { readFileSync } from 'node:fs';
 import { inspect } from 'node:util';
 // NOTE: 	once upon a time
 //			there was some sort of
 //
 //			example comment that we wanted to investigate
 const parser = new Parser();
 parser.setLanguage(JavaScript);
 class example {
 	#private = 123
 	/*
 		This other comment
 		is of the style of a
 		block comment of course
 	*/
 	stuff() {
 	}
 }
 const source = readFileSync('./tree-sitter-1.mjs', 'utf-8');
 const tree = parser.parse(source);
 function* iter_nodes(node) {
    yield node;
    for (let i = 0; i < node.childCount; i++) {
        yield* iter_nodes(node.child(i));
    }
 }
 function *iter_child_nodes(node) {
    for (let i = 0; i < node.childCount; i++) {
        yield node.child(i);
    }
 }
 for (const node of iter_child_nodes(tree.rootNode)) {
    //console.log({type: node.type, text: node.text?.slice(0, 40)});
    console.log(inspect(node));
    console.log({text: node.text});
 }
--- a/package-manifest.yaml
+++ b/package-manifest.yaml
@@ -35,6 +35,14 @@ packages:
    internal-dependencies:
      - data
  parsing:
    path: source/parsing
    #documentation: documentation/text
    description: Generic string parsing
    internal-dependencies:
      - errors
      - text
 wip-packages:
  object-graph-storage:
    path: source/object-graph-storage
--- a/planning/dsl.md
+++ b/planning/dsl.md
@@ -0,0 +1,25 @@
 ## Example of indented block
 ```
 § block
 	this content
 	is in the block body
 ```
 ## Inline
 `«inline expression»`
 ## Escaping
 ```
 «§» block
 	Here is an «la»inline expression«ra»
 	Or possibly «outer escape: inline expression»
 ```
--- a/planning/to-document.md
+++ b/planning/to-document.md
@@ -0,0 +1,19 @@
 # Readme.md
 In `readme.md` we should explain that `npm install` can be used both with and without development dependencies.
 We are likely to want some development dependencies for the following purposes:
 - Template processing
 - Ecmascript parsing for inline rich semantic documentation
 ## Installing tree-sitter-javascript
 - requires `node-gyp`
 ```sh
 CXXFLAGS="-std=c++20" npm i -D tree-sitter tree-sitter-javascript
 ```
 This section should be somewhat expanded - especially regarding node-gyp
--- a/source/data/string-utilities.mjs
+++ b/source/data/string-utilities.mjs
@@ -20,6 +20,11 @@ export function string_has_contents(str) {
 	return /\S/.test(str);
 }
 export function parse_csv(str) {
 	// NOTE: This is for simple comma separated values, a future RFC-4180 compatible version would have to be in a different module in this library (or be a third party thing)
 	return str.split(',').map(element => element.trim());
 }
 export function *indented_line_iterator(settings, text) {
 	let line_no = settings.first_line;
--- a/source/errors.mjs
+++ b/source/errors.mjs
@@ -1,5 +1,17 @@
 import { inspect } from 'node:util';
 // § GROUP: Regexp tokenization
 export class Tokenization_Error extends Error {
 	constructor(data) {
 		const { parser, value, index, end_index } = data;
 		super(`Tokenization_Error`);	//TODO: Format message
 		this.data = data;
 	}
 }
 // § GROUP: Configuration field errors
 export class Data_Validation_Failed extends Error {
--- a/source/parsing/regexp-dispatch.mjs
+++ b/source/parsing/regexp-dispatch.mjs
@@ -0,0 +1,182 @@
 import * as RE from '@efforting.tech/text/regexp';
 import { Tokenization_Error } from '@efforting.tech/errors';
 // NOTE: 	There are some open questions about this implementation and API which may change as the library matures.
 //			Check out the example at experiments/regexp-tokenizer.mjs for more information on how to use this in its current state.
 //
 //			Specifically it is not currently decided where the boundary between rule/action/capture should be
 export class Pattern_Match {
 	constructor(match, rule) {
 		Object.assign(this, { match, rule });
 	}
 	get identifier() {
 		return this.rule. identifier;
 	}
 	get value() {
 		return this.match[0];
 	}
 	get captured() {
 		return this.match.slice(1);
 	}
 	get pending_index() {
 		return this.match.index + this.match[0].length;
 	}
 }
 export class Default_Match {
 	constructor(text, index, end_index, action) {
 		const identifier = action(this);
 		Object.assign(this, { text, index, end_index, action, identifier });
 	}
 	get value() {
 		return this.text;
 	}
 	get pending_index() {
 		if (this.end_index === null) {
 			return null;
 		} else {
 			return this.end_index;
 		}
 	}
 }
 export class Abstract_RegExp_Token_Rule {
 	constructor(pattern) {
 		const pattern_source = RE.get_source(pattern);
 		const pattern_flags = RE.get_flags(pattern);
 		const immediate_flags = String.prototype.concat(...(new Set([...pattern_flags, 'y'])));
 		const scanning_flags =  String.prototype.concat(...(new Set([...pattern_flags, 'g'])));
 		const immediate_pattern = new RegExp(pattern_source, immediate_flags);
 		const scanning_pattern = new RegExp(pattern_source, scanning_flags);
 		Object.assign(this, { pattern, immediate_pattern, scanning_pattern });
 	}
 }
 export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
 	constructor(pattern, identifier=this) {
 		super(pattern);
 		Object.assign(this, { identifier });
 	}
 }
 export class RegExp_Tokenizer {
 	constructor(rules=[], default_action=undefined) {
 		Object.assign(this, { rules, default_action });
 	}
 	set_default_identifier(identifier) {
 		this.default_action = (
 			() => identifier
 		);
 	}
 	add_rules(...rules_to_add) {
 		this.rules.push(...rules_to_add);
 	}
 	immediate_match(text, position=0) {
 		for (const rule of this.rules) {
 			const pattern = rule.immediate_pattern;
 			pattern.lastIndex = position;
 			const match = pattern.exec(text);
 			if (match) {
 				return new Pattern_Match(match, rule);
 			}
 		}
 	}
 	_handle_default_match(value, index, end_index=null) {
 		const { default_action } = this;
 		if (!default_action) {
 			throw new Tokenization_Error({ parser: this, value, index, end_index });
 		}
 		return new Default_Match(value, index, end_index, default_action);
 	}
 	closest_scanning_match(text, position=0) {
 		const immediate_match = this.immediate_match(text, position);
 		if (immediate_match) {
 			return immediate_match;
 		}
 		let best_candidate;
 		for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
 			if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
 				best_candidate = candidate;
 			}
 		}
 		// There was no match, just get the tail
 		if (!best_candidate) {
 			const tail = text.slice(position);
 			if (tail.length) {
 				return this._handle_default_match(tail, position);
 			}
 		}
 		// There was a match, check the head
 		if (best_candidate) {
 			const head = text.slice(position, best_candidate.match.index);
 			if (head.length) {
 				return this._handle_default_match(head, position, best_candidate.match.index);
 			}
 		}
 		return best_candidate;
 	}
 	*iter_scanning_rule_candidates(text, position=0) {
 		// Iterates over all rules and yields any matches found anywhere (but only once per rule)
 		for (const rule of this.rules) {
 			const pattern = rule.scanning_pattern;
 			pattern.lastIndex = position;
 			const match = pattern.exec(text);
 			if (match) {
 				yield new Pattern_Match(match, rule);
 			}
 		}
 	}
 	*iter_matches(text, position=0) {
 		while (true) {
 			const pending = this.closest_scanning_match(text, position);
 			if (pending) {
 				yield pending;
 			}
 			if (!pending || pending.pending_index === null) {
 				break;
 			}
 			position = pending.pending_index;
 		}
 	}
 }
--- a/source/rule-processing/resolvers.mjs
+++ b/source/rule-processing/resolvers.mjs
@@ -1,13 +1,13 @@
 import { Item_Unresolvable } from '@efforting.tech/errors';
 export class Abstract_Resolver {
-	resolve(item) {
+	resolve(item, extra_info={}) {
 		const result = this.resolve_handler(item);
 		if (!result?.handler) {
 			throw new Item_Unresolvable({ resolver: this, item });
 		}
 		// TO DOC: Spreading result into the resulting context means there are some reserved keys we need to be mindful of to avoid clobbering them
-		return result.handler({ resolver: this, item, ...result });
+		return result.handler({ resolver: this, item, ...extra_info, ...result });
 	}
 }
@@ -39,7 +39,8 @@ export class Predicate_Resolver extends Abstract_Resolver {
 		const { rules } = this;
 		for (const [predicate, handler] of rules) {
 			const predicate_result = predicate(item);
-			if (predicate_result !== undefined) {
+			// NOTE: to return a falsy predicate_result as a positive hit you must wrap it in something
 			if (predicate_result) {
 				return { handler, predicate_result };
 			}
 		}
@@ -50,13 +51,9 @@ export class RegExp_Resolver extends Predicate_Resolver {
 	constructor(rules=[]) {
 		// NOTE: Rules should be iterable as [predicate, handler] pairs
 		super();
-		Object.assign(this, { rules: rules.map(([pattern, handler]) => {
+		Object.assign(this, {
-
+			rules: rules.map(([pattern, handler]) => [(str) => str.match(pattern), handler])
-			const wrapped_handler = handler;	//TODO
+		});
 			const predicate = ((str) => str.match(pattern));
 			return [predicate, wrapped_handler];
 		})});
 	}
--- a/source/text/regexp.mjs
+++ b/source/text/regexp.mjs
@@ -0,0 +1,49 @@
 export function get_flags(pattern) {
 	if (pattern instanceof RegExp) {
 		return new Set(pattern.flags);
 	} else {
 		return new Set();
 	}
 }
 export function get_source(pattern) {
 	if (pattern instanceof RegExp) {
 		return pattern.source;
 	} else {
 		return pattern;
 	}
 }
 export function concat(...pattern_list) {
 	let pending_source = '';
 	const pending_flags = new Set();
 	for (const pattern of pattern_list) {
 		if (pattern instanceof RegExp) {
 			pending_source += pattern.source;
 			for (const flag of pattern.flags) {
 				pending_flags.add(flag);
 			}
 		} else {
 			pending_source += pattern;
 		}
 	}
 	return new RegExp(pending_source, String.prototype.concat(...pending_flags));
 }
 export function join(pattern_list, separator, flags=undefined) {
 	return new RegExp(pattern_list.map(pattern => get_source(pattern)).join(get_source(separator)), flags);
 }
 export function update_flag(pattern, flag, state) {
 	const pattern_flags = get_flags(pattern);
 	if (state) {
 		pattern_flags.add(flag);
 	} else {
 		pattern_flags.delete(flag);
 	}
 	return new RegExp(pattern.source, String.prototype.concat(...pattern_flags));
 }
--- a/tools/stage-for-pnpm.mjs
+++ b/tools/stage-for-pnpm.mjs
Author	SHA1	Message	Date
Mikael Lövqvist	8b7d99393d	Updated documentation, added tree-sitter experiment	2026-04-17 00:50:00 +02:00
Mikael Lövqvist	376ca6d2f2	Added error stub for regexp-dispatch	2026-04-17 00:49:05 +02:00
Mikael Lövqvist	bee32ec5fa	Dropped pnpm support in favor for npm	2026-04-15 22:32:10 +02:00
Mikael Lövqvist	5afd363aa7	Added dev target in makefile, added regexp-dispatch and example	2026-04-15 22:25:34 +02:00
Mikael Lövqvist	1fdaee0b57	Added extra_info as resolve-call site addendum	2026-04-12 19:45:47 +02:00