Compare commits
2 Commits
235e12e7db
...
d584a49579
| Author | SHA1 | Date | |
|---|---|---|---|
| d584a49579 | |||
| 1842d3de9c |
@@ -13,3 +13,14 @@ rt.set_default_identifier('random stuff');
|
||||
for (const m of rt.iter_matches('#Hello World!')) {
|
||||
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
|
||||
};
|
||||
|
||||
|
||||
console.log('--=| Slicing |=--')
|
||||
|
||||
for (const m of rt.iter_matches('#Hello World!', 3, -3)) {
|
||||
//console.log(m, m.pending_index)
|
||||
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
79
planning/math-subsystem.md
Normal file
79
planning/math-subsystem.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# Mathematical Expression Subsystem
|
||||
|
||||
> [!NOTE]
|
||||
> This document is written by Claude by Anthropic using Sonnet 4.6 and has yet to be vetted by Mikael Lövqvist
|
||||
|
||||
## Overview
|
||||
|
||||
A math-like expression language built on top of the reduction scanner, supporting
|
||||
operator notation, matrix literals, subscripts, superscripts, and symbolic operators.
|
||||
|
||||
## Operator Notation
|
||||
|
||||
Operators are identified by their symbol name rather than semantic meaning, since the
|
||||
same symbol can mean different things depending on operand types:
|
||||
|
||||
- `*` (ASTERISK) — could be scalar multiplication, Hadamard product, or scale depending on types
|
||||
- `·` (DOT) — dot product
|
||||
- `×` (CROSS) — cross product
|
||||
- `⊕` (OPLUS) — direct sum or XOR
|
||||
|
||||
Semantic resolution (e.g. `ASTERISK(matrix, matrix)` → Hadamard) is a separate
|
||||
type-inference pass, not part of the structural reduction.
|
||||
|
||||
## ASCII Input for Special Symbols
|
||||
|
||||
LaTeX-inspired escape sequences for entering special symbols in plain ASCII:
|
||||
|
||||
- `\oplus` → ⊕
|
||||
- `\times` → ×
|
||||
- `\cdot` → ·
|
||||
- `\otimes` → ⊗
|
||||
|
||||
`^` is reserved for superscript (not XOR), `_` for subscript. `S_12` reads as S₁₂.
|
||||
|
||||
## Matrix Literals
|
||||
|
||||
Single-line input using nested brackets:
|
||||
|
||||
```
|
||||
[[1, 0, 0], [0, 1, 0], [0, 0, 1]]
|
||||
```
|
||||
|
||||
Pretty-printed output using Unicode bracket characters:
|
||||
|
||||
```
|
||||
⎡1 0 0⎤
|
||||
⎢0 1 0⎥
|
||||
⎣0 0 1⎦
|
||||
```
|
||||
|
||||
## 2D Raster Reduction Scanner
|
||||
|
||||
For parsing pretty-printed multi-line matrix literals within larger expressions like
|
||||
`M + 2 * N` where M and N are written in 2D notation, a raster-based reduction pass
|
||||
is needed before the standard 1D reduction pass.
|
||||
|
||||
### Approach
|
||||
|
||||
1. **Raster pass first** — operate on a 2D grid of characters
|
||||
2. Locate matrix corner anchors `⎡⎤⎣⎦` — these are highly selective so candidate
|
||||
detection is cheap
|
||||
3. Scan right for `⎤`, down for `⎣`, verify `⎦` at intersection
|
||||
4. Use `⎢`/`⎥` to identify row boundaries within the region
|
||||
5. Collapse the identified rectangle into a single matrix token
|
||||
6. **1D pass second** — the surrounding expression now contains ordinary tokens and
|
||||
the collapsed matrix nodes, reducible by standard rules
|
||||
|
||||
### Scope Boundaries
|
||||
|
||||
Fraction bars define containment — a matrix appearing in a numerator or denominator
|
||||
is only part of that sub-expression. The horizontal extent of the fraction bar bounds
|
||||
the operand scan. Containment must be resolved outside-in: find outermost structure
|
||||
first, recurse into sub-regions.
|
||||
|
||||
### Generalization
|
||||
|
||||
A 2D reduction scanner is a natural generalization of the 1D scanner — the "sequence"
|
||||
becomes a 2D array and conditions match spatial patterns rather than linear ones.
|
||||
The same anchor-point and backtracking concepts apply.
|
||||
@@ -6,7 +6,7 @@ import { inspect } from 'node:util';
|
||||
|
||||
export class Tokenization_Error extends Error {
|
||||
constructor(data) {
|
||||
const { parser, value, index, end_index } = data;
|
||||
const {parser, text, start_position, end_position, match_start, match_end, value} = data;
|
||||
super(`Tokenization_Error`); //TODO: Format message
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@@ -7,9 +7,20 @@ import { Tokenization_Error } from '@efforting.tech/errors';
|
||||
//
|
||||
// Specifically it is not currently decided where the boundary between rule/action/capture should be
|
||||
|
||||
|
||||
function normalize_bounds(text, start_position, end_position) {
|
||||
const len = text.length;
|
||||
const norm_start = start_position < 0 ? Math.max(0, len + start_position) : start_position;
|
||||
const norm_end = end_position == undefined ? undefined : (end_position < 0 ? Math.max(0, len + end_position) : end_position);
|
||||
return [norm_start, norm_end];
|
||||
}
|
||||
|
||||
|
||||
export class Pattern_Match {
|
||||
constructor(match, rule) {
|
||||
Object.assign(this, { match, rule });
|
||||
constructor(text, start_position, end_position, match, rule) {
|
||||
// Normalize positions
|
||||
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
|
||||
Object.assign(this, { text, start_position, end_position, match, rule });
|
||||
}
|
||||
|
||||
get identifier() {
|
||||
@@ -24,28 +35,34 @@ export class Pattern_Match {
|
||||
return this.match.slice(1);
|
||||
}
|
||||
|
||||
get pending_index() {
|
||||
return this.match.index + this.match[0].length;
|
||||
get absolute_start() {
|
||||
return this.match.index + this.start_position;
|
||||
}
|
||||
|
||||
get absolute_end() {
|
||||
return this.match.index + this.start_position + this.match[0].length - 1;
|
||||
}
|
||||
|
||||
get pending_index() {
|
||||
return this.match.index + this.start_position + this.match[0].length;
|
||||
}
|
||||
}
|
||||
|
||||
export class Default_Match {
|
||||
constructor(text, index, end_index, action) {
|
||||
const identifier = action(this);
|
||||
Object.assign(this, { text, index, end_index, action, identifier });
|
||||
}
|
||||
//TBD: Here we invoke action while creating this object, and assign the identifier but we don't do that on Pattern_Match - this feels a bit sketchy
|
||||
constructor(text, start_position, end_position, match_start, match_end, value, action) {
|
||||
// Normalize positions
|
||||
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
|
||||
[match_start, match_end] = normalize_bounds(text, match_start, match_end);
|
||||
|
||||
get value() {
|
||||
return this.text;
|
||||
const identifier = action(this); //TODO: action protocol in accordance with issue #5
|
||||
Object.assign(this, { text, start_position, end_position, match_start, match_end, value, action, identifier });
|
||||
}
|
||||
|
||||
|
||||
get pending_index() {
|
||||
if (this.end_index === null) {
|
||||
return null;
|
||||
} else {
|
||||
return this.end_index;
|
||||
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
|
||||
if (this.match_end != undefined) {
|
||||
return this.match_end + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -76,8 +93,6 @@ export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
|
||||
}
|
||||
}
|
||||
|
||||
// Note: There is no clean built in way to set an end position of a RegExp pattern, the only generic way is to slice the string we match before.
|
||||
// We may at some point implement support for this (and it would only be done if end position was given)
|
||||
|
||||
export class RegExp_Tokenizer {
|
||||
constructor(rules=[], default_action=undefined) {
|
||||
@@ -94,53 +109,58 @@ export class RegExp_Tokenizer {
|
||||
this.rules.push(...rules_to_add);
|
||||
}
|
||||
|
||||
immediate_match(text, position=0) {
|
||||
immediate_match(text, start_position=0, end_position=undefined) {
|
||||
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
|
||||
const bounded = start_position !== 0 || end_position != undefined;
|
||||
const text_to_search = bounded ? text.slice(start_position, end_position) : text;
|
||||
|
||||
for (const rule of this.rules) {
|
||||
const pattern = rule.immediate_pattern;
|
||||
pattern.lastIndex = position;
|
||||
const match = pattern.exec(text);
|
||||
pattern.lastIndex = 0;
|
||||
const match = pattern.exec(text_to_search);
|
||||
if (match) {
|
||||
return new Pattern_Match(match, rule);
|
||||
return new Pattern_Match(text, start_position, end_position, match, rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
_handle_default_match(value, index, end_index=null) {
|
||||
_handle_default_match(text, start_position, end_position, match_start, match_end, value) {
|
||||
const { default_action } = this;
|
||||
if (!default_action) {
|
||||
throw new Tokenization_Error({ parser: this, value, index, end_index });
|
||||
throw new Tokenization_Error({ parser: this, text, start_position, end_position, match_start, match_end, value });
|
||||
}
|
||||
return new Default_Match(value, index, end_index, default_action);
|
||||
return new Default_Match(text, start_position, end_position, match_start, match_end, value, default_action);
|
||||
}
|
||||
|
||||
|
||||
closest_scanning_match(text, position=0) {
|
||||
const immediate_match = this.immediate_match(text, position);
|
||||
closest_scanning_match(text, start_position=0, end_position=undefined) {
|
||||
|
||||
const immediate_match = this.immediate_match(text, start_position, end_position);
|
||||
if (immediate_match) {
|
||||
return immediate_match;
|
||||
}
|
||||
|
||||
let best_candidate;
|
||||
for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
|
||||
if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
|
||||
for (const candidate of this.iter_scanning_rule_candidates(text, start_position, end_position)) {
|
||||
if ((best_candidate === undefined) || (best_candidate.absolute_start > candidate.absolute_start)) {
|
||||
best_candidate = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
// There was no match, just get the tail
|
||||
if (!best_candidate) {
|
||||
const tail = text.slice(position);
|
||||
const tail = text.slice(start_position);
|
||||
if (tail.length) {
|
||||
return this._handle_default_match(tail, position);
|
||||
return this._handle_default_match(text, start_position, end_position, start_position, end_position, tail);
|
||||
}
|
||||
}
|
||||
|
||||
// There was a match, check the head
|
||||
if (best_candidate) {
|
||||
const head = text.slice(position, best_candidate.match.index);
|
||||
const head = text.slice(start_position, best_candidate.absolute_start);
|
||||
if (head.length) {
|
||||
return this._handle_default_match(head, position, best_candidate.match.index);
|
||||
return this._handle_default_match(text, start_position, end_position, start_position, best_candidate.absolute_start - 1, head);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,32 +169,41 @@ export class RegExp_Tokenizer {
|
||||
}
|
||||
|
||||
|
||||
*iter_scanning_rule_candidates(text, position=0) {
|
||||
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
|
||||
*iter_scanning_rule_candidates(text, start_position=0, end_position=undefined) {
|
||||
// CLARIFICATION: loose inequality ( != ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
|
||||
const bounded = start_position !== 0 || end_position != undefined;
|
||||
const text_to_search = bounded ? text.slice(start_position, end_position) : text;
|
||||
|
||||
|
||||
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
|
||||
for (const rule of this.rules) {
|
||||
const pattern = rule.scanning_pattern;
|
||||
pattern.lastIndex = position;
|
||||
const match = pattern.exec(text);
|
||||
pattern.lastIndex = 0;
|
||||
const match = pattern.exec(text_to_search);
|
||||
|
||||
if (match) {
|
||||
yield new Pattern_Match(match, rule);
|
||||
yield new Pattern_Match(text, start_position, end_position, match, rule);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
*iter_matches(text, position=0) {
|
||||
*iter_matches(text, start_position=0, end_position=undefined) {
|
||||
|
||||
// Normalize positions
|
||||
[start_position, end_position] = normalize_bounds(text, start_position, end_position);
|
||||
|
||||
while (true) {
|
||||
const pending = this.closest_scanning_match(text, position);
|
||||
const pending = this.closest_scanning_match(text, start_position, end_position);
|
||||
if (pending) {
|
||||
yield pending;
|
||||
}
|
||||
if (!pending || pending.pending_index === null) {
|
||||
// CLARIFICATION: loose equality ( == ) matches null and undefined but not false/0/'' but we use strict for start_position since it is always a number
|
||||
if (!pending || pending.pending_index == null || pending.pending_index === end_position ) {
|
||||
break;
|
||||
}
|
||||
position = pending.pending_index;
|
||||
start_position = pending.pending_index;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user