Added dev target in makefile, added regexp-dispatch and example

2026-04-15 22:25:34 +02:00
parent 1fdaee0b57
commit 5afd363aa7
5 changed files with 256 additions and 1 deletions
--- a/source/parsing/regexp-dispatch.mjs
+++ b/source/parsing/regexp-dispatch.mjs
@@ -0,0 +1,180 @@
+import * as RE from '@efforting.tech/text/regexp';
+
+// NOTE: 	There are some open questions about this implementation and API which may change as the library matures.
+//			Check out the example at experiments/regexp-tokenizer.mjs for more information on how to use this in its current state.
+//
+//			Specifically it is not currently decided where the boundary between rule/action/capture should be
+
+export class Pattern_Match {
+	constructor(match, rule) {
+		Object.assign(this, { match, rule });
+	}
+
+	get identifier() {
+		return this.rule. identifier;
+	}
+
+	get value() {
+		return this.match[0];
+	}
+
+	get captured() {
+		return this.match.slice(1);
+	}
+
+	get pending_index() {
+		return this.match.index + this.match[0].length;
+	}
+
+}
+
+export class Default_Match {
+	constructor(text, index, end_index, action) {
+		const identifier = action(this);
+		Object.assign(this, { text, index, end_index, action, identifier });
+	}
+
+	get value() {
+		return this.text;
+	}
+
+
+	get pending_index() {
+		if (this.end_index === null) {
+			return null;
+		} else {
+			return this.end_index;
+		}
+	}
+}
+
+
+export class Abstract_RegExp_Token_Rule {
+
+	constructor(pattern) {
+
+		const pattern_source = RE.get_source(pattern);
+		const pattern_flags = RE.get_flags(pattern);
+
+		const immediate_flags = String.prototype.concat(...(new Set([...pattern_flags, 'y'])));
+		const scanning_flags =  String.prototype.concat(...(new Set([...pattern_flags, 'g'])));
+
+		const immediate_pattern = new RegExp(pattern_source, immediate_flags);
+		const scanning_pattern = new RegExp(pattern_source, scanning_flags);
+
+		Object.assign(this, { pattern, immediate_pattern, scanning_pattern });
+	}
+
+}
+
+export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
+	constructor(pattern, identifier=this) {
+		super(pattern);
+		Object.assign(this, { identifier });
+	}
+}
+
+export class RegExp_Tokenizer {
+	constructor(rules=[], default_action=undefined) {
+		Object.assign(this, { rules, default_action });
+	}
+
+	set_default_identifier(identifier) {
+		this.default_action = (
+			() => identifier
+		);
+	}
+
+	add_rules(...rules_to_add) {
+		this.rules.push(...rules_to_add);
+	}
+
+	immediate_match(text, position=0) {
+		for (const rule of this.rules) {
+			const pattern = rule.immediate_pattern;
+			pattern.lastIndex = position;
+			const match = pattern.exec(text);
+			if (match) {
+				return new Pattern_Match(match, rule);
+			}
+		}
+	}
+
+
+	_handle_default_match(value, index, end_index=null) {
+		const { default_action } = this;
+		if (!default_action) {
+			throw new Parsing_Error({ parser: this, value, index, end_index });
+		}
+		return new Default_Match(value, index, end_index, default_action);
+	}
+
+
+	closest_scanning_match(text, position=0) {
+		const immediate_match = this.immediate_match(text, position);
+		if (immediate_match) {
+			return immediate_match;
+		}
+
+		let best_candidate;
+		for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
+			if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
+				best_candidate = candidate;
+			}
+		}
+
+		// There was no match, just get the tail
+		if (!best_candidate) {
+			const tail = text.slice(position);
+			if (tail.length) {
+				return this._handle_default_match(tail, position);
+			}
+		}
+
+		// There was a match, check the head
+		if (best_candidate) {
+			const head = text.slice(position, best_candidate.match.index);
+			if (head.length) {
+				return this._handle_default_match(head, position, best_candidate.match.index);
+			}
+		}
+
+		return best_candidate;
+
+	}
+
+
+	*iter_scanning_rule_candidates(text, position=0) {
+		// Iterates over all rules and yields any matches found anywhere (but only once per rule)
+
+		for (const rule of this.rules) {
+			const pattern = rule.scanning_pattern;
+			pattern.lastIndex = position;
+			const match = pattern.exec(text);
+
+			if (match) {
+				yield new Pattern_Match(match, rule);
+			}
+		}
+
+	}
+
+
+	*iter_matches(text, position=0) {
+		while (true) {
+			const pending = this.closest_scanning_match(text, position);
+			if (pending) {
+				yield pending;
+			}
+			if (!pending || pending.pending_index === null) {
+				break;
+			}
+			position = pending.pending_index;
+
+		}
+	}
+
+
+
+}
+