8 Commits

34 changed files with 1403 additions and 75 deletions

View File

@@ -1,4 +1,4 @@
import { Schema, Field_Configuration } from '@efforting.tech/data/field-configuration';
import { Schema, Field_Configuration } from '@efforting.tech/schema/field-configuration';
function mandatory_anything(value) {

View File

@@ -1,4 +1,4 @@
import * as F from '@efforting.tech/data/field-configuration-factories';
import * as F from '@efforting.tech/schema/field-configuration-factories';
const s = new F.Schema({
foo: F.value(123, 'The value'),

View File

@@ -0,0 +1,30 @@
import { Stub } from '@efforting.tech/feature/stub';
/*
export function Stub(meta, name, description, module_name, function_name) {
return function stub() {
throw new Error(`The feature "${name}" of "${meta.url}" is not enabled. Enable it by calling "${function_name}(${this.name})" imported from "${module_name}"`); //TODO - specific error
}
}
*/
class Thing {
static from_stuff = Stub(import.meta, 'stuff-loader', 'Creates Thing from stuff', '@efforting.tech/stuff/loader', 'enable_stuff_loader');
}
Thing.from_stuff()
/*
Error: The feature "stuff-loader" of "file:///srv/Projekt/efforting.tech/nodejs.esm-library/experiments/generic-parser-2.mjs" is not enabled. Enable it by calling "enable_stuff_loader(Thing)" imported from "@efforting.tech/stuff/loader"
at Thing.stub [as from_stuff] (file:///srv/Projekt/efforting.tech/nodejs.esm-library/build/packages/feature/stub.mjs:4:9)
at file:///srv/Projekt/efforting.tech/nodejs.esm-library/experiments/generic-parser-2.mjs:10:7
at ModuleJob.run (node:internal/modules/esm/module_job:430:25)
at async node:internal/modules/esm/loader:639:26
at async asyncRunEntryPointWithESMLoader (node:internal/modules/run_main:101:5)
*/

View File

@@ -0,0 +1,40 @@
import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch';
import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing';
import { inspect } from 'node:util';
const text = 'Hello World (how are you (doing)) I may ask';
const rt = new RegExp_Tokenizer();
rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token(match.value), 'word'));
rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'space'));
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape('('), (tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined, (tokenizer, value) => tokenizer.push_token({kind: 'sub expression', value}) ), 'lpar'));
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match) => tokenizer.leave_sub_tokenizer(), 'rpar'));
const p = new Parser(text, { tokenizer: rt });
console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null }));
/*
{
kind: 'parsing result',
value: [
'Hello',
'World',
{
kind: 'sub expression',
value: [
'how',
'are',
'you',
{ kind: 'sub expression', value: [ 'doing' ] }
]
},
'I',
'may',
'ask'
]
}
*/

View File

@@ -0,0 +1,146 @@
import { Row_Based_Table } from '@efforting.tech/table';
import { load_raster_table } from '@efforting.tech/table/raster-table';
import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch';
import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing';
function load_table(raster) {
const table = load_raster_table(raster, Row_Based_Table);
table.replace_all_cells(({cell}) => cell.trim());
return table;
}
const logic_ops = load_table(`
name symbol
---- ------
AND ∧
OR
XOR ⊕
NAND ↑
NOR ↓
XNOR ⊙
IMPLIES →
IFF ↔
NOT ¬
`);
const generic_ops = load_table(`
name symbol
---- ------
PLUS +
HYPHEN -
DOT ·
ASTERISK *
CROSS ×
SLASH /
CARET ^
UNDERSCORE _
PERCENT %
`);
const punctuation = load_table(`
name symbol
---- ------
COMMA ,
SEMI_COLON ;
COLON :
PERIOD .
`);
const grouping = load_table(`
name left right
---- ---- -----
PARENTESIS ( )
SQUARE_BRACKET [ ]
CURLY_BRACE { }
ANGLE_BRACKET ⟨ ⟩
DOUBLE_ARROW_BRACKET « »
`);
const greek_chars = load_table(`
name lower upper
---- ----- -----
ALPHA α Α
BETA β Β
GAMMA γ Γ
DELTA δ Δ
EPSILON ε Ε
ZETA ζ Ζ
ETA η Η
THETA θ Θ
IOTA ι Ι
KAPPA κ Κ
LAMBDA λ Λ
MU μ Μ
NU ν Ν
XI ξ Ξ
OMICRON ο Ο
PI π Π
RHO ρ Ρ
SIGMA σ Σ
TAU τ Τ
UPSILON υ Υ
PHI φ Φ
CHI χ Χ
PSI ψ Ψ
OMEGA ω Ω
`);
const rt = new RegExp_Tokenizer();
for (const { name, left, right } of grouping.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(left),
(tokenizer, ingress_match) => tokenizer.enter_sub_tokenizer(undefined,
(tokenizer, value, egress_match) => tokenizer.push_token(
{kind: 'EXPR', name, value, ingress_match, egress_match}
)
), `LEFT_${name}`
));
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(right),
(tokenizer, match) => tokenizer.leave_sub_tokenizer(match), `RIGHT_${name}`)
);
}
for (const table of [logic_ops, generic_ops, punctuation]) {
for (const { name, symbol } of table.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(symbol),
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), name)
);
}
}
for (const { name, lower, upper } of greek_chars.iter_objects()) {
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(lower),
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `LOWER_${name}`)
);
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(upper),
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `UPPER_${name}`)
);
}
rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), 'WORD'));
rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'WHITESPACE'));
const text = 'Hello World (how are you (doing)) I may ask';
const p = new Parser(text, { tokenizer: rt });
//console.log(rt.rules.at(-3));
console.log(p.parse())

View File

@@ -1,4 +1,7 @@
{
"name": "@efforting.tech/experiments",
"version": "0.0.1",
"private": true,
"devDependencies": {
"tree-sitter": "^0.25.0",
"tree-sitter-javascript": "^0.25.0"

View File

@@ -1,14 +1,109 @@
import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner';
//import { Sub_Sequence_Rule } from '@efforting.tech/rule-processing/rules';
import * as R from '@efforting.tech/rule-processing/rules';
import { inspect } from 'node:util';
/*
Here's what needs addressing:
**`perform_reduction` refactor**
- Remove the `start_index` loop — scanning is the condition's responsibility
- RULE_MAJOR: iterate rules, call `rule.match(sequence, context)`, apply first that returns a match
- POSITION_MAJOR: collect matches from all rules, apply the one with lowest `start_index` in result
**`Sequence_Condition.match` implementation**
- Iterate positions internally
- Return match result with `start_index`, `end_index`, captures
- Return null if no match found anywhere
**Rule interface**
- `rule.match(sequence, context)` → match result or null
- `rule.action(scanner, sequence, match)` → performs the transformation
- Decide: forwarding getters, bind in constructor, or scanner calls `rule.condition.match` directly
**Match result shape**
- `{ rule, sequence, start_index, end_index, captures, ...extra_info }`
- `captures` lazily evaluated via getter
**Normalization**
- Decide when rules get normalized/compiled (construction, first transform, explicit `prepare()`)
- Normalize bare functions to condition objects at that point
**`context` shape**
- What does the scanner inject into context beyond `start_index`/`end_index`?
- How does `extra_info` from resolver flow through to condition match?
*/
const rss = Reduction_Settings.load();
class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
constructor(condition, handler) {
Object.assign(this, { condition, handler });
}
get match() {
return this.condition.match.bind(this.condition);
}
get action() {
return this.handler;
}
}
const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
const ASTERISK = new R.Strict_Equality('*');
const PLUS = new R.Strict_Equality('+');
const rss = Reduction_Settings.load({
// Switching this on or off affects whether add comes before mul or not
//reduction_order: 'POSITION_MAJOR',
});
const rs = new Reduction_Scanner(rss);
rss.rules.push();
rss.rules.push(
new Rule(
new R.Sequence_Condition([N, ASTERISK, N]),
(rs, sequence, match) => {
const MS = match.match_start;
const ME = match.match_end;
sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'MUL', operands: [sequence[MS], sequence[ME]]});
}
),
new Rule(
new R.Sequence_Condition([N, PLUS, N]),
(rs, sequence, match) => {
const MS = match.match_start;
const ME = match.match_end;
sequence.splice(MS, ME - MS + 1, { type: 'BINOP', op: 'ADD', operands: [sequence[MS], sequence[ME]]});
},
),
);
const arr = [10, '+', 20, '*', 30];
console.log(rs.perform_reduction(arr));
console.log(arr);
console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
/* OUTPUT
[
{
type: 'BINOP',
op: 'ADD',
operands: [ 10, { type: 'BINOP', op: 'MUL', operands: [ 20, 30 ] } ]
}
]
*/
// These are for testing conditions without reduction
// const sc = new R.Sequence_Condition([N, PLUS, N]);
// console.log(sc.match([10, '+', 20, '*', 30]));

View File

@@ -0,0 +1,76 @@
import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner';
import * as R from '@efforting.tech/rule-processing/rules';
import { inspect } from 'node:util';
class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
constructor(condition, handler) {
Object.assign(this, { condition, handler });
}
get match() {
return this.condition.match.bind(this.condition);
}
get action() {
return this.handler;
}
}
function sequence_rule(sequence, transform_fn) {
return new Rule(
new R.Sequence_Condition(sequence),
(rs, sequence, match) => {
const MS = match.match_start;
const ME = match.match_end;
sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1)));
}
);
}
const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
const ASTERISK = new R.Strict_Equality('*');
const PLUS = new R.Strict_Equality('+');
const HAT = new R.Strict_Equality('^');
const rss = Reduction_Settings.load({
// Switching this on or off affects whether add comes before mul or not
//reduction_order: 'POSITION_MAJOR',
});
const rs = new Reduction_Scanner(rss);
rss.rules.push(
sequence_rule([N, HAT, N], (left, op, right) => ({ type: 'BINOP', op: 'HAT', operands: [left, right]})),
sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})),
sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})),
);
const arr = [10, '^', 5, '+', 20, '*', 30];
console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
/* OUTPUT
[
{
type: 'BINOP',
op: 'PLUS',
operands: [
{ type: 'BINOP', op: 'HAT', operands: [ 10, 5 ] },
{ type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] }
]
}
]
*/

View File

@@ -0,0 +1,157 @@
import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner';
import * as R from '@efforting.tech/rule-processing/rules';
import { inspect } from 'node:util';
class Rule_Match {
constructor(rule, match) {
Object.assign(this, { rule, match });
}
get action() {
return this.rule.handler;
}
}
class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
constructor(condition, handler) {
Object.assign(this, { condition, handler });
}
match(...args) {
const match = this.condition.match(...args);
if (match) {
return new Rule_Match(this, match);
}
}
}
class Sub_Scan_Rule_Match {
constructor(rule, sub_scan_candidate) {
Object.assign(this, { rule, sub_scan_candidate });
}
get action() {
return this.sub_scan_candidate.match.action;
}
get match() {
return this.sub_scan_candidate.match.match;
}
}
class Sub_Scan_Rule {
constructor(sub_system) {
Object.assign(this, { sub_system });
}
match(...args) {
const candidate = this.sub_system.find_reduction_candidate(...args);
if (candidate) {
return new Sub_Scan_Rule_Match(this, candidate)
}
}
}
function sequence_rule(sequence, transform_fn) {
return new Rule(
new R.Sequence_Condition(sequence),
({sequence, match}) => {
const MS = match.match_start;
const ME = match.match_end;
sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1)));
}
);
}
const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
const CARET = new R.Strict_Equality('^');
const CARON = new R.Strict_Equality('ˇ');
const ASTERISK = new R.Strict_Equality('*');
const SLASH = new R.Strict_Equality('/');
const PLUS = new R.Strict_Equality('+');
const MINUS = new R.Strict_Equality('-');
// These are the outer settings
const rss = Reduction_Settings.load();
// These are the inner settings
const rss_inner = Reduction_Settings.load({
reduction_order: 'POSITION_MAJOR',
});
const rs = new Reduction_Scanner(rss);
// Local factory for sub system
function sub_system(...rules) {
const sub_settings = { ...rss_inner, rules };
const scanner = new Reduction_Scanner(sub_settings);
return new Sub_Scan_Rule(scanner);
}
rss.rules.push(
sub_system(
sequence_rule([N, CARET, N], (left, op, right) => ({ type: 'BINOP', op: 'CARET', operands: [left, right]})),
sequence_rule([N, CARON, N], (left, op, right) => ({ type: 'BINOP', op: 'CARON', operands: [left, right]})),
),
sub_system(
sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})),
sequence_rule([N, SLASH, N], (left, op, right) => ({ type: 'BINOP', op: 'SLASH', operands: [left, right]})),
),
sub_system(
sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})),
sequence_rule([N, MINUS, N], (left, op, right) => ({ type: 'BINOP', op: 'MINUS', operands: [left, right]})),
),
);
const arr = [5, '-', 10, '^', 5, 'ˇ', 2, '+', 20, '*', 30];
console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
/*
[
{
type: 'BINOP',
op: 'MINUS',
operands: [
5,
{
type: 'BINOP',
op: 'PLUS',
operands: [
{
type: 'BINOP',
op: 'CARON',
operands: [ { type: 'BINOP', op: 'CARET', operands: [ 10, 5 ] }, 2 ]
},
{ type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] }
]
}
]
}
]
*/

View File

@@ -12,4 +12,15 @@ rt.set_default_identifier('random stuff');
for (const m of rt.iter_matches('#Hello World!')) {
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
};
};
console.log('--=| Slicing |=--')
for (const m of rt.iter_matches('#Hello World!', 3, -3)) {
//console.log(m, m.pending_index)
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
};

23
experiments/table-1.mjs Normal file
View File

@@ -0,0 +1,23 @@
import { Row_Based_Table } from '@efforting.tech/table'
const t = new Row_Based_Table({ column_names: 'SKU, Quantity, Price' });
//console.log(t.column_names_lut); /* { SKU: 0, Quantity: 1, Price: 2 } */
t.push_rows(
['VOLVO-1', 3, 120_000],
['VOLVO-2', 4, 140_000],
)
const [A] = t.read_rows(0);
const [B] = t.snapshot_rows(1);
console.log(A.value);
console.log(B.value);
A.update({Quantity: 5});
console.log(A.value);
for (const r of t) {
console.log(r.object);
}

34
experiments/table-2.mjs Normal file
View File

@@ -0,0 +1,34 @@
import { Row_Based_Table } from '@efforting.tech/table';
import { load_raster_table } from '@efforting.tech/table/raster-table';
const t = load_raster_table(`
SKU Quantity Price
--- -------- -----
V-1 2 120_000
V-2 3 140_000
`, Row_Based_Table);
// { row_name, column_name, row_index, column_index, row, cell }
t.replace_all_cells(
({column_name, cell}) => {
const translation = {
SKU: (s) => s.trim(),
Quantity: parseInt,
Price: (p) => parseFloat(p.replace(/_/g, '')),
}[column_name];
return translation ? translation(cell) : cell;
}
);
for (const r of t) {
console.log(r.object);
}
/*
{ SKU: 'V-1', Quantity: 2, Price: 120000 }
{ SKU: 'V-2', Quantity: 3, Price: 140000 }
*/

View File

@@ -1,3 +1,4 @@
# TODO: Add a way to help making sure we keep internal-dependencies up to date
scope: '@efforting.tech'
registry: 'https://npm.efforting.tech/'
version: 0.2.9
@@ -25,6 +26,24 @@ packages:
path: source/data
documentation: documentation/data
description: Data management
internal-dependencies:
- schema
table:
path: source/table
description: Table management
internal-dependencies:
- schema
- text
feature:
path: source/feature
description: Feature management
schema:
path: source/schema
#documentation: documentation/schema
description: Schema system
internal-dependencies:
- errors

5
package.json Normal file
View File

@@ -0,0 +1,5 @@
{
"dependencies": {
"@efforting.tech/experiments": "file:experiments"
}
}

View File

@@ -0,0 +1,79 @@
# Mathematical Expression Subsystem
> [!NOTE]
> This document is written by Claude by Anthropic using Sonnet 4.6 and has yet to be vetted by Mikael Lövqvist
## Overview
A math-like expression language built on top of the reduction scanner, supporting
operator notation, matrix literals, subscripts, superscripts, and symbolic operators.
## Operator Notation
Operators are identified by their symbol name rather than semantic meaning, since the
same symbol can mean different things depending on operand types:
- `*` (ASTERISK) — could be scalar multiplication, Hadamard product, or scale depending on types
- `·` (DOT) — dot product
- `×` (CROSS) — cross product
- `⊕` (OPLUS) — direct sum or XOR
Semantic resolution (e.g. `ASTERISK(matrix, matrix)` → Hadamard) is a separate
type-inference pass, not part of the structural reduction.
## ASCII Input for Special Symbols
LaTeX-inspired escape sequences for entering special symbols in plain ASCII:
- `\oplus` → ⊕
- `\times`×
- `\cdot` → ·
- `\otimes` → ⊗
`^` is reserved for superscript (not XOR), `_` for subscript. `S_12` reads as S₁₂.
## Matrix Literals
Single-line input using nested brackets:
```
[[1, 0, 0], [0, 1, 0], [0, 0, 1]]
```
Pretty-printed output using Unicode bracket characters:
```
⎡1 0 0⎤
⎢0 1 0⎥
⎣0 0 1⎦
```
## 2D Raster Reduction Scanner
For parsing pretty-printed multi-line matrix literals within larger expressions like
`M + 2 * N` where M and N are written in 2D notation, a raster-based reduction pass
is needed before the standard 1D reduction pass.
### Approach
1. **Raster pass first** — operate on a 2D grid of characters
2. Locate matrix corner anchors `⎡⎤⎣⎦` — these are highly selective so candidate
detection is cheap
3. Scan right for `⎤`, down for `⎣`, verify `⎦` at intersection
4. Use `⎢`/`⎥` to identify row boundaries within the region
5. Collapse the identified rectangle into a single matrix token
6. **1D pass second** — the surrounding expression now contains ordinary tokens and
the collapsed matrix nodes, reducible by standard rules
### Scope Boundaries
Fraction bars define containment — a matrix appearing in a numerator or denominator
is only part of that sub-expression. The horizontal extent of the fraction bar bounds
the operand scan. Containment must be resolved outside-in: find outermost structure
first, recurse into sub-regions.
### Generalization
A 2D reduction scanner is a natural generalization of the 1D scanner — the "sequence"
becomes a 2D array and conditions match spatial patterns rather than linear ones.
The same anchor-point and backtracking concepts apply.

View File

@@ -0,0 +1,31 @@
export class Switchable_Iterator {
constructor(iterator=null, stack=[]) {
Object.assign(this, { iterator, stack });
}
push(iterator) {
this.stack.push(this.iterator);
this.switch_to(iterator);
}
pop(iterator) {
this.switch_to(this.stack.pop());
}
switch_to(iterator) {
this.iterator = iterator;
}
next() {
return this.iterator.next();
}
peek() {
return this.iterator.peek();
}
[Symbol.iterator]() {
return this;
}
}

View File

@@ -0,0 +1,9 @@
export function assign_defined(target, source) {
Object.assign(target, Object.fromEntries(Object.entries(source).filter(([k ,v]) => v !== undefined )));
}
export function assign_using_predicate(target, source, kv_predicate) {
Object.assign(target, Object.fromEntries(Object.entries(source).filter(kv_predicate))); // Call predicate with ([k, v])
}

54
source/data/stack.mjs Normal file
View File

@@ -0,0 +1,54 @@
export const DELETE_PROPERTY = Symbol('DELETE_PROPERTY');
export class String_Keyed_Stack {
constructor(target={}, stack=[]) {
Object.assign(this, { target, stack });
}
push(updates={}) {
const frame = {}
this.stack.push(frame);
for (const [key, value] of Object.entries(updates)) {
if (key in this.target) {
frame[key] = this.target[key];
} else {
frame[key] = DELETE_PROPERTY;
}
if (value === DELETE_PROPERTY) {
delete this.target[key];
} else {
this.target[key] = value;
}
}
return frame;
}
push_defined(updates={}) {
this.push(Object.fromEntries(Object.entries(updates).filter(([k ,v]) => v !== undefined )));
}
pop(copy_previous_state=false) {
const frame = this.stack.pop();
const { target } = this;
const return_value = copy_previous_state ? { ...target } : null;
for (const [key, value] of Object.entries(frame)) {
if (value === DELETE_PROPERTY) {
delete target[key];
} else {
target[key] = value;
}
}
return return_value;
}
get top_reverse_delta() {
return this.stack.at(-1);
}
}

View File

@@ -1,4 +1,4 @@
import * as CF from '@efforting.tech/data/field-configuration-factories';
import * as CF from '@efforting.tech/schema/field-configuration-factories';
export const Indention_Mode = new CF.symbol_set({

View File

@@ -6,7 +6,7 @@ import { inspect } from 'node:util';
export class Tokenization_Error extends Error {
constructor(data) {
const { parser, value, index, end_index } = data;
const {parser, text, start_position, end_position, match_start, match_end, value} = data;
super(`Tokenization_Error`); //TODO: Format message
this.data = data;
}

7
source/feature/stub.mjs Normal file
View File

@@ -0,0 +1,7 @@
export function Stub(meta, name, description, module_name, function_name) {
return function stub() {
throw new Error(`The feature "${name}" of "${meta.url}" is not enabled. Enable it by calling "${function_name}(${this.name})" imported from "${module_name}"`); //TODO - specific error
}
}

View File

@@ -0,0 +1,100 @@
import { RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
import { String_Keyed_Stack } from '@efforting.tech/data/stack';
import { assign_defined } from '@efforting.tech/data/object-utilities';
import * as F from '@efforting.tech/schema/field-configuration-factories';
export class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
constructor(pattern, action, identifier=undefined) {
super(pattern, identifier);
Object.assign(this, { action });
}
}
export const Parser_State = new F.Schema({
position: F.value(0, 'Pending position in source'),
value: F.factory(() => [], 'Pending value to return'),
sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
tokenizer: F.value(null, 'Current tokenizer'),
context: F.value(null, 'User supplied context'),
}, 'Parser state');
export class Parser {
constructor(source, state=undefined) {
state = Parser_State.load(state);
const token_generator = new Switchable_Iterator();
const stack = new String_Keyed_Stack(state);
Object.assign(this, { source, state, stack, token_generator });
this.switch_to();
}
switch_to(tokenizer=undefined, position=undefined) {
assign_defined(this.state, { tokenizer, position });
this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
}
parse(handler=undefined) {
for (const match of this.token_generator) {
const { action } = match.rule;
if (!action) { continue; }
if (typeof action !== 'function') { //TODO - proper error (possibly a warning, the warning is nice when you are developing, have to think about this one)
console.log('NOT IMPLEMENTED', match.rule.action);
continue;
}
this.state.position = match.pending_index;
this.state.match = match;
action(this, match);
}
if (handler) {
this.state.match = null; //TODO: Decide if we should reset match here or not
handler(this, this.state.value);
return this.state.value;
} else {
return this.state.value;
}
}
push_token(...tokens) {
this.state.value.push(...tokens);
}
replace_value(value) {
this.state.value = value;
}
enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
this.stack.push_defined({ tokenizer, value: [] });
if (handler) {
this.state.sub_tokenizer_handlers.push(handler);
}
this.switch_to(tokenizer);
}
leave_sub_tokenizer(egress_match=null) {
const frame = this.stack.pop(true);
const { sub_tokenizer_handlers } = this.state;
if (sub_tokenizer_handlers.length) {
const handler = sub_tokenizer_handlers.pop();
this.state.match = null; //TODO: Decide if we should reset match here or not
handler(this, frame.value, egress_match);
} else {
this.push_token(frame.value);
}
this.switch_to();
}
}

View File

@@ -7,13 +7,24 @@ import { Tokenization_Error } from '@efforting.tech/errors';
//
// Specifically it is not currently decided where the boundary between rule/action/capture should be
function normalize_bounds(text, start_position, end_position) {
const len = text.length;
const norm_start = start_position < 0 ? Math.max(0, len + start_position) : start_position;
const norm_end = end_position == undefined ? undefined : (end_position < 0 ? Math.max(0, len + end_position) : end_position);
return [norm_start, norm_end];
}
export class Pattern_Match {
constructor(match, rule) {
Object.assign(this, { match, rule });
constructor(text, start_position, end_position, match, rule) {
// Normalize positions
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
Object.assign(this, { text, start_position, end_position, match, rule });
}
get identifier() {
return this.rule. identifier;
return this.rule.identifier;
}
get value() {
@@ -24,28 +35,34 @@ export class Pattern_Match {
return this.match.slice(1);
}
get pending_index() {
return this.match.index + this.match[0].length;
get absolute_start() {
return this.match.index + this.start_position;
}
get absolute_end() {
return this.match.index + this.start_position + this.match[0].length - 1;
}
get pending_index() {
return this.match.index + this.start_position + this.match[0].length;
}
}
export class Default_Match {
constructor(text, index, end_index, action) {
const identifier = action(this);
Object.assign(this, { text, index, end_index, action, identifier });
}
//TBD: Here we invoke action while creating this object, and assign the identifier but we don't do that on Pattern_Match - this feels a bit sketchy
constructor(text, start_position, end_position, match_start, match_end, value, action) {
// Normalize positions
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
[match_start, match_end] = normalize_bounds(text, match_start, match_end);
get value() {
return this.text;
const identifier = action(this); //TODO: action protocol in accordance with issue #5
Object.assign(this, { text, start_position, end_position, match_start, match_end, value, action, identifier });
}
get pending_index() {
if (this.end_index === null) {
return null;
} else {
return this.end_index;
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
if (this.match_end != undefined) {
return this.match_end + 1;
}
}
}
@@ -70,12 +87,13 @@ export class Abstract_RegExp_Token_Rule {
}
export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
constructor(pattern, identifier=this) {
constructor(pattern, identifier=undefined) {
super(pattern);
Object.assign(this, { identifier });
Object.assign(this, { identifier: identifier ?? this });
}
}
export class RegExp_Tokenizer {
constructor(rules=[], default_action=undefined) {
Object.assign(this, { rules, default_action });
@@ -91,53 +109,58 @@ export class RegExp_Tokenizer {
this.rules.push(...rules_to_add);
}
immediate_match(text, position=0) {
immediate_match(text, start_position=0, end_position=undefined) {
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
const bounded = start_position !== 0 || end_position != undefined;
const text_to_search = bounded ? text.slice(start_position, end_position) : text;
for (const rule of this.rules) {
const pattern = rule.immediate_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
pattern.lastIndex = 0;
const match = pattern.exec(text_to_search);
if (match) {
return new Pattern_Match(match, rule);
return new Pattern_Match(text, start_position, end_position, match, rule);
}
}
}
_handle_default_match(value, index, end_index=null) {
_handle_default_match(text, start_position, end_position, match_start, match_end, value) {
const { default_action } = this;
if (!default_action) {
throw new Tokenization_Error({ parser: this, value, index, end_index });
throw new Tokenization_Error({ parser: this, text, start_position, end_position, match_start, match_end, value });
}
return new Default_Match(value, index, end_index, default_action);
return new Default_Match(text, start_position, end_position, match_start, match_end, value, default_action);
}
closest_scanning_match(text, position=0) {
const immediate_match = this.immediate_match(text, position);
closest_scanning_match(text, start_position=0, end_position=undefined) {
const immediate_match = this.immediate_match(text, start_position, end_position);
if (immediate_match) {
return immediate_match;
}
let best_candidate;
for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
for (const candidate of this.iter_scanning_rule_candidates(text, start_position, end_position)) {
if ((best_candidate === undefined) || (best_candidate.absolute_start > candidate.absolute_start)) {
best_candidate = candidate;
}
}
// There was no match, just get the tail
if (!best_candidate) {
const tail = text.slice(position);
const tail = text.slice(start_position);
if (tail.length) {
return this._handle_default_match(tail, position);
return this._handle_default_match(text, start_position, end_position, start_position, end_position, tail);
}
}
// There was a match, check the head
if (best_candidate) {
const head = text.slice(position, best_candidate.match.index);
const head = text.slice(start_position, best_candidate.absolute_start);
if (head.length) {
return this._handle_default_match(head, position, best_candidate.match.index);
return this._handle_default_match(text, start_position, end_position, start_position, best_candidate.absolute_start - 1, head);
}
}
@@ -146,32 +169,41 @@ export class RegExp_Tokenizer {
}
*iter_scanning_rule_candidates(text, position=0) {
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
*iter_scanning_rule_candidates(text, start_position=0, end_position=undefined) {
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
const bounded = start_position !== 0 || end_position != undefined;
const text_to_search = bounded ? text.slice(start_position, end_position) : text;
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
for (const rule of this.rules) {
const pattern = rule.scanning_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
pattern.lastIndex = 0;
const match = pattern.exec(text_to_search);
if (match) {
yield new Pattern_Match(match, rule);
yield new Pattern_Match(text, start_position, end_position, match, rule);
}
}
}
*iter_matches(text, position=0) {
*iter_matches(text, start_position=0, end_position=undefined) {
// Normalize positions
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
while (true) {
const pending = this.closest_scanning_match(text, position);
const pending = this.closest_scanning_match(text, start_position, end_position);
if (pending) {
yield pending;
}
if (!pending || pending.pending_index === null) {
// CLARIFICATION: loose equality ( == ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
if (!pending || pending.pending_index == null || pending.pending_index === end_position ) {
break;
}
position = pending.pending_index;
start_position = pending.pending_index;
}
}

View File

@@ -1,7 +1,5 @@
import { Reduction_Contract, FPR_Contract } from './contracts.mjs';
import * as CF from '@efforting.tech/data/field-configuration-factories';
import * as CF from '@efforting.tech/schema/field-configuration-factories';
export const Reduction_Order = new CF.symbol_set({
@@ -25,6 +23,8 @@ export const FP_Reduction_Settings = new CF.Schema({
//TODO - we should probably have a pre-defined record shape as argument for actions and such rather than using an ever growing list of positionals or an anonymous Object()
export class Reduction_Scanner {
static settings_schema = Reduction_Settings;
@@ -38,38 +38,58 @@ export class Reduction_Scanner {
}
}
perform_reduction(sequence) {
find_reduction_candidate(sequence) {
const { settings } = this;
switch (settings.reduction_order) {
case Reduction_Order.symbols.RULE_MAJOR:
for (const rule of settings.rules) {
for (let start_index=0; start_index < sequence.length; start_index++) {
const match = rule.match(sequence, start_index);
if (match) {
rule.action(this, sequence, match);
return true;
}
const match = rule.match(sequence);
if (match) {
return { sequence, rule, match };
}
}
return false;
return;
case Reduction_Order.symbols.POSITION_MAJOR:
for (let start_index=0; start_index < sequence.length; start_index++) {
for (const rule of settings.rules) {
const match = rule.match(sequence, start_index);
if (match) {
rule.action(this, sequence, match);
return true;
let best_match, best_rule;
for (const rule of settings.rules) {
const match = rule.match(sequence);
if (match) {
if (!best_match || best_match.match_start > match.match_start) {
//TODO - early return if start of sequence
best_match = match;
best_rule = rule;
}
}
}
return false;
if (best_match) {
return { sequence, rule: best_rule, match: best_match };
}
return;
default:
throw new Error(`Unknown reduction order: ${this.reduction_order}`); //TODO: Force invalid configuration error
}
}
perform_reduction(sequence) {
const candidate = this.find_reduction_candidate(sequence);
if (candidate) {
const { sequence, rule, match } = candidate;
//console.log('ACT', match.match)
match.action({ reduction_system: this, rule, sequence, match: match.match });
return true;
} else {
return false;
}
}
clear_transform_state() {

View File

@@ -1,5 +1,7 @@
import { Item_Unresolvable } from '@efforting.tech/errors';
//TODO: Should this be integrated with rules.mjs for predicates? Possibly a normalization step during rule insertion.
export class Abstract_Resolver {
resolve(item, extra_info={}) {
const result = this.resolve_handler(item, extra_info);

View File

@@ -0,0 +1,108 @@
const ABORT_SEQUENCE = Symbol('ABORT_SEQUENCE');
export class Abstract_Item_Condition {
match(sequence, context={}) {
return this.match_value(sequence[context.start_index ?? 0], context);
}
}
export class Abstract_Sequence_Condition {
}
export class Match {
constructor(rule, value, context) {
Object.assign(this, { rule, value, ...context });
}
}
export class Predicate extends Abstract_Item_Condition {
constructor(predicate) {
super();
Object.assign(this, { predicate });
}
match_value(item, context={}) {
if (this.predicate(item, context)) {
const si = context.start_index ?? 0;
return new Match(this, item, { ...context, match_start: si, match_end: si });
};
}
}
export class Type_Is extends Abstract_Item_Condition {
constructor(type) {
super();
Object.assign(this, { type });
}
match_value(item, context={}) {
if (typeof item === this.type) {
const si = context.start_index ?? 0;
return new Match(this, item, { ...context, match_start: si, match_end: si });
};
}
}
export class Strict_Equality extends Abstract_Item_Condition {
constructor(value) {
super();
Object.assign(this, { value });
}
match_value(item, context={}) {
if (item === this.value) {
const si = context.start_index ?? 0;
return new Match(this, item, { ...context, match_start: si, match_end: si });
}
}
}
export class Sequence_Condition extends Abstract_Sequence_Condition {
constructor(sequence) {
super();
Object.assign(this, { sequence });
}
match(sequence, context={}) {
//TODO: For anchors we need anchor_start_index and anchor_end_index (compare with regexp ^ and $)
const start_index = context.start_index ?? 0;
const end_index = context.end_index ?? sequence.length - 1;
const match_from = (sequence_index, pattern_index=0) => {
if (pattern_index === this.sequence.length) {
return [];
}
//TODO - There are plenty of optimizations to be implemented here but we must be suer they are correct - we will start naively
const sub_condition = this.sequence[pattern_index];
const sub_match = sub_condition.match(sequence, { ...context, start_index: sequence_index });
//console.log('match_from', {sequence_index, pattern_index, sub_condition, sub_match})
if (sub_match) {
const remaining = match_from(sub_match.match_end + 1, pattern_index + 1);
if (remaining) {
return [sub_match, ...remaining];
}
}
}
for (let i=start_index; i<=end_index; i++) {
const m = match_from(i);
if (m) {
// NOTE: If result is empty array which can be a positive match, match_start and match_end will be undefined which is by design
return new Match(this, m, { match_start: m.at(0)?.match_start, match_end: m.at(-1)?.match_end, ...context});
}
}
}
}

View File

@@ -1,4 +1,4 @@
import * as CF from '@efforting.tech/data/field-configuration-factories';
import * as CF from '@efforting.tech/schema/field-configuration-factories';
export const Object_Serialization_Strategy = new CF.Schema({

View File

@@ -0,0 +1,46 @@
import { tabs_to_spaces } from '@efforting.tech/text/text-utilities';
const raster_table_pattern = /^((?:[^\S\n]|[\w-])+)\n((?:[^\S\n]|-)+)\n(.+)/ms;
const column_pattern = /\s*(.+?)(?=\s{2,}|$)/gd;
const row_pattern = /^(.+)$/mg
// If loader is null we return a raw representation
export function load_raster_table(raster, loader=null, tab_width=4, null_padding=true) {
if (raster.match(/\t/g)) {
raster = tabs_to_spaces(raster, tab_width);
}
if (null_padding) { //TODO - this is just experimental proof of concept
raster = raster.replace(/〃/g, '\0〃');
}
const m = raster.match(raster_table_pattern);
const column_matches = [...m[1].matchAll(column_pattern)];
const column_positions = [...column_matches.map(cm => cm.indices[1][0])]
const column_names = [...column_matches.map(cm => cm[1])]
const rows = [];
for (const row of m[3].matchAll(row_pattern)) {
const pending_row = [];
for (let ci=0; ci<column_positions.length; ci++) {
const cell = row[1].slice(column_positions[ci], column_positions[ci+1]);
if (null_padding) {
pending_row.push(cell.replace(/\0/, ''));
} else {
pending_row.push(cell);
}
}
rows.push(pending_row);
}
if (loader) {
return new loader({ rows, column_names });
} else {
return {column_names, column_positions, rows};
}
}
//TODO: load_ditto_raster_table

165
source/table/table.mjs Normal file
View File

@@ -0,0 +1,165 @@
import * as F from '@efforting.tech/schema/field-configuration-factories';
import { parse_csv } from '@efforting.tech/data/string-utilities';
export const Row_Based_Table_Settings = new F.Schema({
rows: F.factory(() => [], 'Rows to initialize table with'),
row_names: F.typed_factory((v) => typeof v === 'string' ? parse_csv(v) : v, () => [], 'Names of rows'),
column_names: F.typed_factory((v) => typeof v === 'string' ? parse_csv(v) : v, () => [], 'Names of columns'),
}, 'Row based table settings');
export class Table_Row_Reference {
constructor(table, index, snapshot=null) {
Object.assign(this, { table, index, snapshot });
}
get value() {
const { table, index, snapshot } = this;
return snapshot ?? table.read_row(index);
}
get object() {
const { table, index, snapshot } = this;
const { column_names } = table;
const value = snapshot ?? table.read_row(index);
//TODO - check shape of column_names vs the value
return Object.fromEntries(value.map((cell, column_index) => [column_names[column_index], cell]));
}
update(updates) {
const { table, index, snapshot } = this;
if (snapshot) {
throw new Error('Can not update snapshot references, clear the snapshot to reuse this index'); //TODO - proper error
}
table.write_row(index, updates);
}
}
export class Row_Based_Table {
constructor(settings) {
const { rows, column_names, row_names } = Row_Based_Table_Settings.load(settings);
Object.assign(this, { rows });
this.set_column_names(...column_names);
this.set_row_names(...row_names);
}
// General operations
get width() {
const { rows, column_names } = this;
return rows[0]?.length || column_names.length || undefined;
}
get length() {
const { rows } = this;
return rows.length;
}
get size() {
const { width, length } = this;
return [width, length];
}
replace_all_cells(replacement_fn) {
for (const [row_index, row] of this.rows.entries()) {
const pending_row = [];
for (const [column_index, cell] of row.entries()) {
const row_name = this.row_names[row_index];
const column_name = this.column_names[column_index];
pending_row.push(replacement_fn({ row_name, column_name, row_index, column_index, row, cell }));
}
this.rows[row_index] = pending_row;
}
}
// TODO: Make sure column and row operations covers the same uses
// TODO: Sub table operations
// Column operations
set_column_names(...names) {
this.column_names = names;
this.column_names_lut = Object.fromEntries(names.map((name, index) => [name, index]));
}
// Row operations
set_row_names(...names) {
this.row_names = names;
this.row_names_lut = Object.fromEntries(names.map((name, index) => [name, index]));
}
read_rows(...indices) {
const result = [];
for (const index of indices) {
result.push(new Table_Row_Reference(this, index));
}
return result;
}
snapshot_rows(...indices) {
//NOTE: Snapshot doesn't include current layout or other settings, just the contents of the row at the time, and it currently is a shallow copy
const result = [];
for (const index of indices) {
result.push(new Table_Row_Reference(this, index, [...this.rows[index]]));
}
return result;
}
push_rows(...rows) {
//TODO - verify shape
this.rows.push(...rows);
}
read_row(index) {
return this.rows[index];
}
write_row(index, row_data) {
const { rows, column_names_lut } = this;
if (Array.isArray(row_data)) {
//TODO - shape check
rows[index] = row_data;
} else {
//TODO - possibly allow array of array for using numerical indices
const work_row = rows[index];
for (const [key, value] of Object.entries(row_data)) {
const col_index = column_names_lut[key];
if (col_index === undefined) {
throw new Error(`Unknown column: ${key}`); //TODO - proper error
}
work_row[col_index] = value;
}
}
}
//TODO: Implement map and other functions expected by collections
*[Symbol.iterator]() {
for (const index of this.rows.keys()) {
yield new Table_Row_Reference(this, index);
}
}
*iter_objects() {
for (const index of this.rows.keys()) {
yield new Table_Row_Reference(this, index).object;
}
}
}

View File

@@ -1,6 +1,5 @@
import { string_has_contents, indented_line_iterator } from '@efforting.tech/data/string-utilities';
import * as CF from '@efforting.tech/data/field-configuration-factories';
import { Text_Settings } from '@efforting.tech/data/string-utilities';
import { Text_Settings, string_has_contents, indented_line_iterator } from '@efforting.tech/data/string-utilities';
import * as CF from '@efforting.tech/schema/field-configuration-factories';
export const Text_Tree_Settings = new CF.Schema({
//BUG - there is currently no way (I think) to put defaults into a sub schema - this should be fixed

View File

@@ -0,0 +1,30 @@
export const char_width_lut = {
'〃': 2,
}
//NOTE - this one only support posix newlines, this should be fixed later
export function tabs_to_spaces(text, tab_width=4) {
let pending_column_index=0;
let result = '';
for (const char of text) {
if (char == '\t') {
const new_column_index = (Math.floor(pending_column_index / tab_width) + 1) * tab_width;
result += ' '.repeat(new_column_index - pending_column_index);
pending_column_index = new_column_index;
} else {
if (char.charCodeAt(0) === 10) {
result += '\n';
pending_column_index = 0;
/* } else if (char.charCodeAt(0) < 32) {
result += '<27>';
*/ } else {
pending_column_index += (char_width_lut[char] ?? 1)
result += char;
}
}
}
return result;
}

View File

@@ -88,6 +88,9 @@ for (const [package_name, package_data] of Object.entries(manifest.packages)) {
const linked_docs = pkg.documentation ? link_tree(pkg.documentation, pkg_dir).map(p => path.relative(pkg_dir, p)) : [];
//console.log('DOCS', { linked_docs });
// A file named after its package (e.g. table.mjs in the table package) becomes the root export '.'
// All other files become named subpath exports e.g. './raster-table'
const exports_map = {};
for (const file of linked_sources) {
const name = path.basename(file, '.mjs');
@@ -145,11 +148,15 @@ const publish_script = (
`${publish_script_lines.join('\n')}\n`
);
const repo_root = path.resolve(output_directory, '../..');
const experiments_link_line = `mkdir -p "${repo_root}/experiments/node_modules"\nln -sf "../../build/packages" "${repo_root}/experiments/node_modules/@efforting.tech"`;
const dev_stage_script = (
'#!/usr/bin/env bash\n' +
'set -e\n' +
`${dev_stage_mkdir_lines.join('\n')}\n` +
`${dev_stage_script_lines.join('\n')}\n`
`${dev_stage_script_lines.join('\n')}\n` +
experiments_link_line
);