7 Commits

15 changed files with 550 additions and 126 deletions

View File

@@ -2,12 +2,15 @@
build/packages:
mkdir -p $@
node tools/stage-for-pnpm.mjs package-manifest.yaml source $@
node tools/stage-for-npm.mjs package-manifest.yaml source $@
publish:
cd build/packages && ./publish-all.sh
dev:
build/packages/local-install.sh
clean:
rm -rf build
.PHONY: clean build/packages publish
.PHONY: clean build/packages publish dev

View File

@@ -0,0 +1,15 @@
import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
const rt = new RegExp_Tokenizer();
rt.add_rules(new RegExp_Token_Rule(/\w+/, 'word'));
rt.set_default_identifier('random stuff');
//console.log(rt.rules);
//console.log(rt.closest_scanning_match('#Hello World!'));
for (const m of rt.iter_matches('#Hello World!')) {
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
};

View File

@@ -1,6 +1,6 @@
import { Mapping_Resolver, Chained_Resolver } from '@efforting.tech/rule-processing/resolvers';
import { inspect } from 'node:util';
const vr = new Mapping_Resolver();
@@ -9,9 +9,55 @@ const tr = new Mapping_Resolver(new Map(), item => typeof item);
const cr = new Chained_Resolver([vr, tr]);
vr.rules.set('HELLO', () => 'WORLD');
tr.rules.set('string', () => 'World');
vr.rules.set('HELLO', {handler: () => 'WORLD'});
tr.rules.set('string', {extra_stuff: 'Yo!', handler: (c) => `World with context: ${inspect(c)}`});
console.log(cr.resolve('HELLO'));
console.log(cr.resolve('hello'));
console.log(cr.resolve(123));
/* OUTPUT
WORLD
World with context: {
resolver: Chained_Resolver {
chain_links: [ [Mapping_Resolver], [Mapping_Resolver] ]
},
item: 'hello',
extra_stuff: 'Yo!',
handler: [Function: handler]
}
file:///srv/Projekt/efforting.tech/nodejs.esm-library/build/packages/rule-processing/resolv
throw new Item_Unresolvable({ resolver: this, item });
^
Item_Unresolvable [Error]: Cannot resolve item 123 of type "number" using resolver Chained_
at Chained_Resolver.resolve (file:///srv/Projekt/efforting.tech/nodejs.esm-library/buil
at file:///srv/Projekt/efforting.tech/nodejs.esm-library/experiments/res1.mjs:17:16
at ModuleJob.run (node:internal/modules/esm/module_job:430:25)
at async node:internal/modules/esm/loader:639:26
at async asyncRunEntryPointWithESMLoader (node:internal/modules/run_main:101:5) {
data: {
resolver: Chained_Resolver {
chain_links: [
Mapping_Resolver {
rules: Map(1) { 'HELLO' => [Object] },
key_function: null
},
Mapping_Resolver {
rules: Map(1) { 'string' => [Object] },
key_function: [Function (anonymous)]
}
]
},
item: 123
}
}
Node.js v25.8.2
*/

View File

@@ -0,0 +1,39 @@
import * as CF from '@efforting.tech/data/field-configuration-factories';
import { inspect } from 'node:util';
import { Text_Tree_Node, Text_Tree_Settings } from '@efforting.tech/text/basic-tree';
import { RegExp_Resolver } from '@efforting.tech/rule-processing/resolvers';
import { parse_csv } from '@efforting.tech/data/string-utilities';
const ts = Text_Tree_Settings.load({
text: {
indention_mode: 'TABULATORS',
},
trim_lines: true,
});
const example_string =
`
animals: dog, cat
trees: birch, pine
`;
const root = Text_Tree_Node.from_string(ts, example_string);
const d = new RegExp_Resolver([
[/^animals:\s*(.*)$/, (c) => console.log("ANIMAL", c)],
[/^trees:\s*(.*)$/, ({node, predicate_result}) => console.log(`TREE of node at line ${node.line_no}:`, parse_csv(predicate_result[1]))],
]);
for (const child of root.children) {
if (child.has_line) {
console.log(child.line, d.resolve(child.line, { node: child }));
}
}

View File

@@ -1,51 +1,13 @@
import * as CF from '@efforting.tech/data/field-configuration-factories';
import { readFileSync } from 'node:fs';
import { inspect } from 'node:util';
import { Indention_Mode, Text_Settings } from '@efforting.tech/text/basic-tree';
// TODO: Move into string helper module
function string_has_contents(str) {
return /\S/.test(str);
}
function *indented_line_iterator(settings, text) {
let line_no = settings.first_line;
let index = 0;
const { indention_tabulator_width } = settings;
switch (settings.indention_mode) {
case Indention_Mode.symbols.TABULATORS: {
for (const line of text.matchAll(/^(\t*)(.*)$/gm)) {
const [raw, tabs, remaining] = line;
yield { raw, indent: tabs.length, line: remaining, line_no: line_no++, index: index++};
}
break;
}
case Indention_Mode.symbols.SPACES: {
for (const line of text.matchAll(/^([ ]*)(.*)$/gm)) {
const [raw, spaces, remaining] = line;
if ((spaces.length % indention_tabulator_width) !== 0) {
throw new Error('Unaligned indention'); //TODO - proper error
}
yield { raw, indent: Math.floor(spaces.length / indention_tabulator_width), line: remaining, line_no: line_no++, index: index++};
}
break;
}
default:
throw new Error('Unsupported indention mode'); //TODO - proper error
}
import { Text_Tree_Node, Text_Tree_Settings } from '@efforting.tech/text/basic-tree';
}
const ts = Text_Settings.load({
indention_mode: 'TABULATORS',
const ts = Text_Tree_Settings.load({
text: {
indention_mode: 'TABULATORS',
},
});
const example_string =
@@ -68,61 +30,9 @@ branch3
`;
class Text_Node {
constructor(text_settings, line=undefined, indent=0, line_no=undefined, index=undefined, raw=undefined, parent=undefined) {
Object.assign(this, { text_settings, line, indent, line_no, index, raw, parent, children: [] });
}
}
const root = Text_Tree_Node.from_string(ts, example_string);
const root = new Text_Node(ts);
// NOTE: This first Text_Node is not added to the tree, it serves as an initial cursor only.
let current = new Text_Node(root.text_settings, undefined, 0, undefined, undefined, undefined, root);
for (const line_info of indented_line_iterator(ts, example_string)) {
// TODO: Implement other variants than inherit-from-previous
const indent = string_has_contents(line_info.line) ? line_info.indent : current.indent;
const delta_indent = indent - current.indent;
if (delta_indent == 0) {
const pending = new Text_Node(current.text_settings, undefined, current.indent, undefined, undefined, undefined, current.parent); // Partial insertion - same level
if (current.parent) {
current.parent.children.push(pending);
}
current = pending;
} else if (delta_indent > 0) {
for (let i=0; i<delta_indent; i++) {
const pending = new Text_Node(current.text_settings, undefined, current.indent + 1, undefined, undefined, undefined, current); // Partial insertion
current.children.push(pending);
current = pending;
}
} else {
for (let i=0; i>delta_indent; i--) {
current = current.parent;
}
const pending = new Text_Node(current.text_settings, undefined, current.indent, undefined, undefined, undefined, current.parent); // Partial insertion - same level
if (current.parent) {
current.parent.children.push(pending);
}
current = pending;
}
// Fill in partial insertion
Object.assign(current, {
line: line_info.line,
line_no: line_info.line_no,
index: line_info.index,
raw: line_info.raw,
});
}
function debug_dump(node, level=0) {
console.log(`${" ".repeat(level)}[${node.line_no ?? '-'}] ${inspect(node.line)}`);
@@ -132,6 +42,8 @@ function debug_dump(node, level=0) {
}
debug_dump(root);
/*
[-] undefined

View File

@@ -1,6 +1,6 @@
scope: '@efforting.tech'
registry: 'https://npm.efforting.tech/'
version: 0.2.8
version: 0.2.9
author:
name: 'Mikael Lövqvist'
@@ -35,6 +35,14 @@ packages:
internal-dependencies:
- data
parsing:
path: source/parsing
#documentation: documentation/text
description: Generic string parsing
internal-dependencies:
- errors
- text
wip-packages:
object-graph-storage:
path: source/object-graph-storage

View File

@@ -15,6 +15,12 @@ export function typed_value(coercion_function, default_value, description) {
return new Field_Configuration(null, coercion_function, () => default_value, description);
}
export function boolean(default_value, description) {
//BUG: Text representations such as "false" is still truthy here - we should have a more capable coearcing function
return new Field_Configuration(null, Boolean, () => default_value, description);
}
export function typed_factory(coercion_function, factory_function, description) {
return new Field_Configuration(null, coercion_function, factory_function, description);
}

View File

@@ -0,0 +1,56 @@
import * as CF from '@efforting.tech/data/field-configuration-factories';
export const Indention_Mode = new CF.symbol_set({
AUTO: 'Automatic detection of indention mode',
SPACES: 'Indention is based on spaces',
TABULATORS: 'Indention is based on tabulators',
}, 'Indention mode');
// BUG: Current implementation of CF.symbol_set doesn't support default value
export const Text_Settings = new CF.Schema({
indention_mode: Indention_Mode,
indention_tabulator_width: CF.cardinal_value(4, 'Width of a tabulator in spaces'),
first_line: CF.natural_value(1, 'First line number'),
}, 'Text settings');
export function string_has_contents(str) {
return /\S/.test(str);
}
export function parse_csv(str) {
// NOTE: This is for simple comma separated values, a future RFC-4180 compatible version would have to be in a different module in this library (or be a third party thing)
return str.split(',').map(element => element.trim());
}
export function *indented_line_iterator(settings, text) {
let line_no = settings.first_line;
let index = 0;
const { indention_tabulator_width } = settings;
switch (settings.indention_mode) {
case Indention_Mode.symbols.TABULATORS: {
for (const line of text.matchAll(/^(\t*)(.*)$/gm)) {
const [raw, tabs, remaining] = line;
yield { raw, indent: tabs.length, line: remaining, line_no: line_no++, index: index++};
}
break;
}
case Indention_Mode.symbols.SPACES: {
for (const line of text.matchAll(/^([ ]*)(.*)$/gm)) {
const [raw, spaces, remaining] = line;
if ((spaces.length % indention_tabulator_width) !== 0) {
throw new Error('Unaligned indention'); //TODO - proper error
}
yield { raw, indent: Math.floor(spaces.length / indention_tabulator_width), line: remaining, line_no: line_no++, index: index++};
}
break;
}
default:
throw new Error(`Unsupported indention mode: ${settings.indention_mode}`); //TODO - proper error
}
}

View File

@@ -50,7 +50,7 @@ export class Item_Unresolvable extends Error {
constructor(data) {
const { resolver, item } = data;
const type = item === null ? 'null' : typeof item;
super(`Cannot resolve item ${inspect(item)} of type "${type}" using resolver ${resolver}`);
super(`Cannot resolve item ${inspect(item)} of type "${type}" using resolver ${resolver.constructor.name}`);
this.data = data;
}
}

View File

@@ -0,0 +1,180 @@
import * as RE from '@efforting.tech/text/regexp';
// NOTE: There are some open questions about this implementation and API which may change as the library matures.
// Check out the example at experiments/regexp-tokenizer.mjs for more information on how to use this in its current state.
//
// Specifically it is not currently decided where the boundary between rule/action/capture should be
export class Pattern_Match {
constructor(match, rule) {
Object.assign(this, { match, rule });
}
get identifier() {
return this.rule. identifier;
}
get value() {
return this.match[0];
}
get captured() {
return this.match.slice(1);
}
get pending_index() {
return this.match.index + this.match[0].length;
}
}
export class Default_Match {
constructor(text, index, end_index, action) {
const identifier = action(this);
Object.assign(this, { text, index, end_index, action, identifier });
}
get value() {
return this.text;
}
get pending_index() {
if (this.end_index === null) {
return null;
} else {
return this.end_index;
}
}
}
export class Abstract_RegExp_Token_Rule {
constructor(pattern) {
const pattern_source = RE.get_source(pattern);
const pattern_flags = RE.get_flags(pattern);
const immediate_flags = String.prototype.concat(...(new Set([...pattern_flags, 'y'])));
const scanning_flags = String.prototype.concat(...(new Set([...pattern_flags, 'g'])));
const immediate_pattern = new RegExp(pattern_source, immediate_flags);
const scanning_pattern = new RegExp(pattern_source, scanning_flags);
Object.assign(this, { pattern, immediate_pattern, scanning_pattern });
}
}
export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
constructor(pattern, identifier=this) {
super(pattern);
Object.assign(this, { identifier });
}
}
export class RegExp_Tokenizer {
constructor(rules=[], default_action=undefined) {
Object.assign(this, { rules, default_action });
}
set_default_identifier(identifier) {
this.default_action = (
() => identifier
);
}
add_rules(...rules_to_add) {
this.rules.push(...rules_to_add);
}
immediate_match(text, position=0) {
for (const rule of this.rules) {
const pattern = rule.immediate_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
if (match) {
return new Pattern_Match(match, rule);
}
}
}
_handle_default_match(value, index, end_index=null) {
const { default_action } = this;
if (!default_action) {
throw new Parsing_Error({ parser: this, value, index, end_index });
}
return new Default_Match(value, index, end_index, default_action);
}
closest_scanning_match(text, position=0) {
const immediate_match = this.immediate_match(text, position);
if (immediate_match) {
return immediate_match;
}
let best_candidate;
for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
best_candidate = candidate;
}
}
// There was no match, just get the tail
if (!best_candidate) {
const tail = text.slice(position);
if (tail.length) {
return this._handle_default_match(tail, position);
}
}
// There was a match, check the head
if (best_candidate) {
const head = text.slice(position, best_candidate.match.index);
if (head.length) {
return this._handle_default_match(head, position, best_candidate.match.index);
}
}
return best_candidate;
}
*iter_scanning_rule_candidates(text, position=0) {
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
for (const rule of this.rules) {
const pattern = rule.scanning_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
if (match) {
yield new Pattern_Match(match, rule);
}
}
}
*iter_matches(text, position=0) {
while (true) {
const pending = this.closest_scanning_match(text, position);
if (pending) {
yield pending;
}
if (!pending || pending.pending_index === null) {
break;
}
position = pending.pending_index;
}
}
}

View File

@@ -1,12 +1,13 @@
import { Item_Unresolvable } from '@efforting.tech/errors';
export class Abstract_Resolver {
resolve(item) {
const handler = this.resolve_handler(item);
if (!handler) {
resolve(item, extra_info={}) {
const result = this.resolve_handler(item);
if (!result?.handler) {
throw new Item_Unresolvable({ resolver: this, item });
}
return handler({ resolver: this, item });
// TO DOC: Spreading result into the resulting context means there are some reserved keys we need to be mindful of to avoid clobbering them
return result.handler({ resolver: this, item, ...extra_info, ...result });
}
}
@@ -19,14 +20,44 @@ export class Chained_Resolver extends Abstract_Resolver {
resolve_handler(item) {
const { chain_links } = this;
for (const link of chain_links) {
const handler = link.resolve_handler(item);
if (handler) {
return handler;
const result = link.resolve_handler(item);
if (result?.handler) {
return result;
}
}
}
}
export class Predicate_Resolver extends Abstract_Resolver {
constructor(rules=[]) {
// NOTE: Rules should be iterable as [predicate, handler] pairs
super();
Object.assign(this, { rules });
}
resolve_handler(item) {
const { rules } = this;
for (const [predicate, handler] of rules) {
const predicate_result = predicate(item);
// NOTE: to return a falsy predicate_result as a positive hit you must wrap it in something
if (predicate_result) {
return { handler, predicate_result };
}
}
}
}
export class RegExp_Resolver extends Predicate_Resolver {
constructor(rules=[]) {
// NOTE: Rules should be iterable as [predicate, handler] pairs
super();
Object.assign(this, {
rules: rules.map(([pattern, handler]) => [(str) => str.match(pattern), handler])
});
}
}
export class Mapping_Resolver extends Abstract_Resolver {
constructor(rules=new Map(), key_function=null) {

View File

@@ -1,16 +1,87 @@
import { string_has_contents, indented_line_iterator } from '@efforting.tech/data/string-utilities';
import * as CF from '@efforting.tech/data/field-configuration-factories';
import { Text_Settings } from '@efforting.tech/data/string-utilities';
export const Text_Tree_Settings = new CF.Schema({
//BUG - there is currently no way (I think) to put defaults into a sub schema - this should be fixed
text: Text_Settings,
trim_head: CF.boolean(false, 'Trim the empty lines from the head of a node'),
trim_tail: CF.boolean(false, 'Trim the empty lines from the tail of a node'),
trim_lines: CF.boolean(false, 'Trim lines'),
}, 'Text tree settings');
export class Text_Tree_Node {
constructor(text_tree_settings, line=undefined, indent=0, line_no=undefined, index=undefined, raw=undefined, parent=undefined) {
Object.assign(this, { text_tree_settings, line, indent, line_no, index, raw, parent, children: [] });
}
get has_line() {
return string_has_contents(this.line);
}
static from_string(text_tree_settings, str) {
const root = new this(text_tree_settings);
const { trim_head, trim_tail, trim_lines } = text_tree_settings;
// NOTE: This first Text_Node is not added to the tree, it serves as an initial cursor only.
let current = new this(root.text_tree_settings, undefined, 0, undefined, undefined, undefined, root);
for (const line_info of indented_line_iterator(text_tree_settings.text, str)) {
// TODO: Implement other variants than inherit-from-previous
const indent = string_has_contents(line_info.line) ? line_info.indent : current.indent;
const delta_indent = indent - current.indent;
if (delta_indent == 0) {
const pending = new this(current.text_tree_settings, undefined, current.indent, undefined, undefined, undefined, current.parent); // Partial insertion - same level
if (current.parent) {
current.parent.children.push(pending);
}
current = pending;
} else if (delta_indent > 0) {
for (let i=0; i<delta_indent; i++) {
const pending = new this(current.text_tree_settings, undefined, current.indent + 1, undefined, undefined, undefined, current); // Partial insertion
current.children.push(pending);
current = pending;
}
} else {
for (let i=0; i>delta_indent; i--) {
current = current.parent;
}
const pending = new this(current.text_tree_settings, undefined, current.indent, undefined, undefined, undefined, current.parent); // Partial insertion - same level
if (current.parent) {
current.parent.children.push(pending);
}
current = pending;
}
// Fill in partial insertion
Object.assign(current, {
line: trim_lines ? line_info.line.trim() : line_info.line,
line_no: line_info.line_no,
index: line_info.index,
raw: line_info.raw,
});
}
if (trim_head || trim_tail) { //TODO: Implement
throw new Error('Trimming is not implemented'); //TODO: Proper non implemented error
}
return root;
}
}
export const Indention_Mode = new CF.symbol_set({
AUTO: 'Automatic detection of indention mode',
SPACES: 'Indention is based on spaces',
TABULATORS: 'Indention is based on tabulators',
}, 'Indention mode');
// BUG: Current implementation of CF.symbol_set doesn't support default value
export const Text_Settings = new CF.Schema({
indention_mode: Indention_Mode,
indention_tabulator_width: CF.cardinal_value(4, 'Width of a tabulator in spaces'),
first_line: CF.natural_value(1, 'First line number'),
}, 'Text settings');

49
source/text/regexp.mjs Normal file
View File

@@ -0,0 +1,49 @@
export function get_flags(pattern) {
if (pattern instanceof RegExp) {
return new Set(pattern.flags);
} else {
return new Set();
}
}
export function get_source(pattern) {
if (pattern instanceof RegExp) {
return pattern.source;
} else {
return pattern;
}
}
export function concat(...pattern_list) {
let pending_source = '';
const pending_flags = new Set();
for (const pattern of pattern_list) {
if (pattern instanceof RegExp) {
pending_source += pattern.source;
for (const flag of pattern.flags) {
pending_flags.add(flag);
}
} else {
pending_source += pattern;
}
}
return new RegExp(pending_source, String.prototype.concat(...pending_flags));
}
export function join(pattern_list, separator, flags=undefined) {
return new RegExp(pattern_list.map(pattern => get_source(pattern)).join(get_source(separator)), flags);
}
export function update_flag(pattern, flag, state) {
const pattern_flags = get_flags(pattern);
if (state) {
pattern_flags.add(flag);
} else {
pattern_flags.delete(flag);
}
return new RegExp(pattern.source, String.prototype.concat(...pattern_flags));
}

11
tools/show-annotations.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
#NOTE: This tool require you to install https://gitea.efforting.tech/mikael-lovqvist/shell-utils but you could remove the last line if you don't want that.
#Example use: From the source-directory run ../tools/show-annotations.sh
grep --color=always \
-rne '//[A-Z ]*:' \
--include "*.mjs" \
--exclude-dir node_modules \
| grep-mtime-sorter | ansi-trunc -l 250

View File

@@ -60,9 +60,6 @@ const common_package_data = {
author,
version,
type: 'module',
publishConfig: {
registry
},
};
const root_package = {
@@ -127,9 +124,9 @@ for (const [package_name, package_data] of Object.entries(manifest.packages)) {
//const publish_tool = 'pnpm publish --no-git-checks';
const publish_tool = 'npm publish';
const publish_tool = `npm publish --registry "${registry}"`;
const publish_script_lines = published_packages.map(
pkg => `${publish_tool} ${pkg}`
pkg => `${publish_tool} ./${pkg}`
);
const dev_stage_script_lines = published_packages.map(