Added dev target in makefile, added regexp-dispatch and example

This commit is contained in:
2026-04-15 22:25:34 +02:00
parent 1fdaee0b57
commit 5afd363aa7
5 changed files with 256 additions and 1 deletions

View File

@@ -7,7 +7,10 @@ build/packages:
publish: publish:
cd build/packages && ./publish-all.sh cd build/packages && ./publish-all.sh
dev:
build/packages/local-install.sh
clean: clean:
rm -rf build rm -rf build
.PHONY: clean build/packages publish .PHONY: clean build/packages publish dev

View File

@@ -0,0 +1,15 @@
import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
const rt = new RegExp_Tokenizer();
rt.add_rules(new RegExp_Token_Rule(/\w+/, 'word'));
rt.set_default_identifier('random stuff');
//console.log(rt.rules);
//console.log(rt.closest_scanning_match('#Hello World!'));
for (const m of rt.iter_matches('#Hello World!')) {
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
};

View File

@@ -35,6 +35,14 @@ packages:
internal-dependencies: internal-dependencies:
- data - data
parsing:
path: source/parsing
#documentation: documentation/text
description: Generic string parsing
internal-dependencies:
- errors
- text
wip-packages: wip-packages:
object-graph-storage: object-graph-storage:
path: source/object-graph-storage path: source/object-graph-storage

View File

@@ -0,0 +1,180 @@
import * as RE from '@efforting.tech/text/regexp';
// NOTE: There are some open questions about this implementation and API which may change as the library matures.
// Check out the example at experiments/regexp-tokenizer.mjs for more information on how to use this in its current state.
//
// Specifically it is not currently decided where the boundary between rule/action/capture should be
export class Pattern_Match {
constructor(match, rule) {
Object.assign(this, { match, rule });
}
get identifier() {
return this.rule. identifier;
}
get value() {
return this.match[0];
}
get captured() {
return this.match.slice(1);
}
get pending_index() {
return this.match.index + this.match[0].length;
}
}
export class Default_Match {
constructor(text, index, end_index, action) {
const identifier = action(this);
Object.assign(this, { text, index, end_index, action, identifier });
}
get value() {
return this.text;
}
get pending_index() {
if (this.end_index === null) {
return null;
} else {
return this.end_index;
}
}
}
export class Abstract_RegExp_Token_Rule {
constructor(pattern) {
const pattern_source = RE.get_source(pattern);
const pattern_flags = RE.get_flags(pattern);
const immediate_flags = String.prototype.concat(...(new Set([...pattern_flags, 'y'])));
const scanning_flags = String.prototype.concat(...(new Set([...pattern_flags, 'g'])));
const immediate_pattern = new RegExp(pattern_source, immediate_flags);
const scanning_pattern = new RegExp(pattern_source, scanning_flags);
Object.assign(this, { pattern, immediate_pattern, scanning_pattern });
}
}
export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
constructor(pattern, identifier=this) {
super(pattern);
Object.assign(this, { identifier });
}
}
export class RegExp_Tokenizer {
constructor(rules=[], default_action=undefined) {
Object.assign(this, { rules, default_action });
}
set_default_identifier(identifier) {
this.default_action = (
() => identifier
);
}
add_rules(...rules_to_add) {
this.rules.push(...rules_to_add);
}
immediate_match(text, position=0) {
for (const rule of this.rules) {
const pattern = rule.immediate_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
if (match) {
return new Pattern_Match(match, rule);
}
}
}
_handle_default_match(value, index, end_index=null) {
const { default_action } = this;
if (!default_action) {
throw new Parsing_Error({ parser: this, value, index, end_index });
}
return new Default_Match(value, index, end_index, default_action);
}
closest_scanning_match(text, position=0) {
const immediate_match = this.immediate_match(text, position);
if (immediate_match) {
return immediate_match;
}
let best_candidate;
for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
best_candidate = candidate;
}
}
// There was no match, just get the tail
if (!best_candidate) {
const tail = text.slice(position);
if (tail.length) {
return this._handle_default_match(tail, position);
}
}
// There was a match, check the head
if (best_candidate) {
const head = text.slice(position, best_candidate.match.index);
if (head.length) {
return this._handle_default_match(head, position, best_candidate.match.index);
}
}
return best_candidate;
}
*iter_scanning_rule_candidates(text, position=0) {
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
for (const rule of this.rules) {
const pattern = rule.scanning_pattern;
pattern.lastIndex = position;
const match = pattern.exec(text);
if (match) {
yield new Pattern_Match(match, rule);
}
}
}
*iter_matches(text, position=0) {
while (true) {
const pending = this.closest_scanning_match(text, position);
if (pending) {
yield pending;
}
if (!pending || pending.pending_index === null) {
break;
}
position = pending.pending_index;
}
}
}

49
source/text/regexp.mjs Normal file
View File

@@ -0,0 +1,49 @@
export function get_flags(pattern) {
if (pattern instanceof RegExp) {
return new Set(pattern.flags);
} else {
return new Set();
}
}
export function get_source(pattern) {
if (pattern instanceof RegExp) {
return pattern.source;
} else {
return pattern;
}
}
export function concat(...pattern_list) {
let pending_source = '';
const pending_flags = new Set();
for (const pattern of pattern_list) {
if (pattern instanceof RegExp) {
pending_source += pattern.source;
for (const flag of pattern.flags) {
pending_flags.add(flag);
}
} else {
pending_source += pattern;
}
}
return new RegExp(pending_source, String.prototype.concat(...pending_flags));
}
export function join(pattern_list, separator, flags=undefined) {
return new RegExp(pattern_list.map(pattern => get_source(pattern)).join(get_source(separator)), flags);
}
export function update_flag(pattern, flag, state) {
const pattern_flags = get_flags(pattern);
if (state) {
pattern_flags.add(flag);
} else {
pattern_flags.delete(flag);
}
return new RegExp(pattern.source, String.prototype.concat(...pattern_flags));
}