Updated stale imports. Started experiment with parsing system. Got reduction scanner feature ready for use. Added several data utilities.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { Schema, Field_Configuration } from '@efforting.tech/data/field-configuration';
|
||||
import { Schema, Field_Configuration } from '@efforting.tech/schema/field-configuration';
|
||||
|
||||
|
||||
function mandatory_anything(value) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import * as F from '@efforting.tech/data/field-configuration-factories';
|
||||
import * as F from '@efforting.tech/schema/field-configuration-factories';
|
||||
|
||||
const s = new F.Schema({
|
||||
foo: F.value(123, 'The value'),
|
||||
|
||||
156
experiments/generic-parser-1.mjs
Normal file
156
experiments/generic-parser-1.mjs
Normal file
@@ -0,0 +1,156 @@
|
||||
import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
|
||||
import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
|
||||
import { String_Keyed_Stack } from '@efforting.tech/data/stack';
|
||||
import { assign_defined } from '@efforting.tech/data/object-utilities';
|
||||
import * as F from '@efforting.tech/schema/field-configuration-factories';
|
||||
import { inspect } from 'node:util';
|
||||
|
||||
|
||||
class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
|
||||
constructor(pattern, action, identifier=undefined) {
|
||||
super(pattern, identifier);
|
||||
Object.assign(this, { action });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const Parser_State = new F.Schema({
|
||||
|
||||
position: F.value(0, 'Pending position in source'),
|
||||
value: F.factory(() => [], 'Pending value to return'),
|
||||
sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
|
||||
tokenizer: F.value(null, 'Current tokenizer'),
|
||||
context: F.value(null, 'User supplied context'),
|
||||
|
||||
}, 'Parser state');
|
||||
|
||||
|
||||
|
||||
class Parser {
|
||||
constructor(source, state=undefined) {
|
||||
state = Parser_State.load(state);
|
||||
const token_generator = new Switchable_Iterator();
|
||||
const stack = new String_Keyed_Stack(state);
|
||||
Object.assign(this, { source, state, stack, token_generator });
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
|
||||
switch_to(tokenizer=undefined, position=undefined) {
|
||||
assign_defined(this.state, { tokenizer, position });
|
||||
this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
|
||||
}
|
||||
|
||||
parse(handler=undefined) {
|
||||
|
||||
for (const match of this.token_generator) {
|
||||
const { action } = match.rule;
|
||||
if (!action) { continue; }
|
||||
if (typeof action !== 'function') {
|
||||
console.log('NOT IMPLEMENTED', match.rule.action);
|
||||
continue;
|
||||
}
|
||||
|
||||
this.state.position = match.pending_index;
|
||||
this.state.match = match;
|
||||
action(this, match);
|
||||
|
||||
}
|
||||
|
||||
if (handler) {
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, this.state.value);
|
||||
return this.state.value;
|
||||
} else {
|
||||
return this.state.value;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
push_token(...tokens) {
|
||||
this.state.value.push(...tokens);
|
||||
}
|
||||
|
||||
replace_value(value) {
|
||||
this.state.value = value;
|
||||
}
|
||||
|
||||
enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
|
||||
this.stack.push_defined({ tokenizer, value: [] });
|
||||
if (handler) {
|
||||
this.state.sub_tokenizer_handlers.push(handler);
|
||||
}
|
||||
this.switch_to(tokenizer);
|
||||
}
|
||||
|
||||
leave_sub_tokenizer() {
|
||||
const frame = this.stack.pop(true);
|
||||
const { sub_tokenizer_handlers } = this.state;
|
||||
|
||||
if (sub_tokenizer_handlers.length) {
|
||||
const handler = sub_tokenizer_handlers.pop();
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, frame.value);
|
||||
} else {
|
||||
this.push_token(frame.value);
|
||||
}
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
const text = 'Hello World (how are you (doing)) I may ask';
|
||||
|
||||
const rt = new RegExp_Tokenizer();
|
||||
rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token(match.value), 'word'));
|
||||
rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'space'));
|
||||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape('('), (tokenizer, match) => tokenizer.enter_sub_tokenizer(undefined, (tokenizer, value) => tokenizer.push_token({kind: 'sub expression', value}) ), 'lpar'));
|
||||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match) => tokenizer.leave_sub_tokenizer(), 'rpar'));
|
||||
|
||||
const p = new Parser(text, { tokenizer: rt });
|
||||
|
||||
|
||||
console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null }));
|
||||
|
||||
/*
|
||||
|
||||
{
|
||||
kind: 'parsing result',
|
||||
value: [
|
||||
'Hello',
|
||||
'World',
|
||||
{
|
||||
kind: 'sub expression',
|
||||
value: [
|
||||
'how',
|
||||
'are',
|
||||
'you',
|
||||
{ kind: 'sub expression', value: [ 'doing' ] }
|
||||
]
|
||||
},
|
||||
'I',
|
||||
'may',
|
||||
'ask'
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
process.exit()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for (const m of rt.iter_matches(text)) {
|
||||
console.log({identifier: m.identifier, value: m.value });
|
||||
};
|
||||
76
experiments/reduction-scanner-2.mjs
Normal file
76
experiments/reduction-scanner-2.mjs
Normal file
@@ -0,0 +1,76 @@
|
||||
import { Reduction_Scanner, Reduction_Settings } from '@efforting.tech/rule-processing/reduction-scanner';
|
||||
import * as R from '@efforting.tech/rule-processing/rules';
|
||||
|
||||
import { inspect } from 'node:util';
|
||||
|
||||
|
||||
|
||||
class Rule { //NOTE: This is somewhat of a place holder because we may want to declare specific transformations later rather than always having an opaque handler function
|
||||
constructor(condition, handler) {
|
||||
Object.assign(this, { condition, handler });
|
||||
}
|
||||
|
||||
get match() {
|
||||
return this.condition.match.bind(this.condition);
|
||||
}
|
||||
|
||||
get action() {
|
||||
return this.handler;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
function sequence_rule(sequence, transform_fn) {
|
||||
return new Rule(
|
||||
new R.Sequence_Condition(sequence),
|
||||
(rs, sequence, match) => {
|
||||
const MS = match.match_start;
|
||||
const ME = match.match_end;
|
||||
sequence.splice(MS, ME - MS + 1, transform_fn(...sequence.slice(MS, ME + 1)));
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
const N = new R.Predicate((i) => typeof i === 'number' || i.type == 'BINOP' );
|
||||
const ASTERISK = new R.Strict_Equality('*');
|
||||
const PLUS = new R.Strict_Equality('+');
|
||||
const HAT = new R.Strict_Equality('^');
|
||||
|
||||
|
||||
const rss = Reduction_Settings.load({
|
||||
// Switching this on or off affects whether add comes before mul or not
|
||||
//reduction_order: 'POSITION_MAJOR',
|
||||
});
|
||||
const rs = new Reduction_Scanner(rss);
|
||||
|
||||
|
||||
rss.rules.push(
|
||||
|
||||
sequence_rule([N, HAT, N], (left, op, right) => ({ type: 'BINOP', op: 'HAT', operands: [left, right]})),
|
||||
sequence_rule([N, ASTERISK, N], (left, op, right) => ({ type: 'BINOP', op: 'ASTERISK', operands: [left, right]})),
|
||||
sequence_rule([N, PLUS, N], (left, op, right) => ({ type: 'BINOP', op: 'PLUS', operands: [left, right]})),
|
||||
|
||||
);
|
||||
|
||||
const arr = [10, '^', 5, '+', 20, '*', 30];
|
||||
console.log(inspect(rs.transform(arr), { colors: true, depth: null }));
|
||||
|
||||
/* OUTPUT
|
||||
|
||||
|
||||
[
|
||||
{
|
||||
type: 'BINOP',
|
||||
op: 'PLUS',
|
||||
operands: [
|
||||
{ type: 'BINOP', op: 'HAT', operands: [ 10, 5 ] },
|
||||
{ type: 'BINOP', op: 'ASTERISK', operands: [ 20, 30 ] }
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
*/
|
||||
31
source/data/iteration-utilities.mjs
Normal file
31
source/data/iteration-utilities.mjs
Normal file
@@ -0,0 +1,31 @@
|
||||
export class Switchable_Iterator {
|
||||
constructor(iterator=null, stack=[]) {
|
||||
Object.assign(this, { iterator, stack });
|
||||
}
|
||||
|
||||
push(iterator) {
|
||||
this.stack.push(this.iterator);
|
||||
this.switch_to(iterator);
|
||||
}
|
||||
|
||||
pop(iterator) {
|
||||
this.switch_to(this.stack.pop());
|
||||
}
|
||||
|
||||
switch_to(iterator) {
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
next() {
|
||||
return this.iterator.next();
|
||||
}
|
||||
|
||||
peek() {
|
||||
return this.iterator.peek();
|
||||
}
|
||||
|
||||
[Symbol.iterator]() {
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
9
source/data/object-utilities.mjs
Normal file
9
source/data/object-utilities.mjs
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
export function assign_defined(target, source) {
|
||||
Object.assign(target, Object.fromEntries(Object.entries(source).filter(([k ,v]) => v !== undefined )));
|
||||
}
|
||||
|
||||
|
||||
export function assign_using_predicate(target, source, kv_predicate) {
|
||||
Object.assign(target, Object.fromEntries(Object.entries(source).filter(kv_predicate))); // Call predicate with ([k, v])
|
||||
}
|
||||
54
source/data/stack.mjs
Normal file
54
source/data/stack.mjs
Normal file
@@ -0,0 +1,54 @@
|
||||
export const DELETE_PROPERTY = Symbol('DELETE_PROPERTY');
|
||||
|
||||
|
||||
export class String_Keyed_Stack {
|
||||
constructor(target={}, stack=[]) {
|
||||
Object.assign(this, { target, stack });
|
||||
}
|
||||
|
||||
push(updates={}) {
|
||||
const frame = {}
|
||||
this.stack.push(frame);
|
||||
for (const [key, value] of Object.entries(updates)) {
|
||||
|
||||
if (key in this.target) {
|
||||
frame[key] = this.target[key];
|
||||
} else {
|
||||
frame[key] = DELETE_PROPERTY;
|
||||
}
|
||||
|
||||
if (value === DELETE_PROPERTY) {
|
||||
delete this.target[key];
|
||||
} else {
|
||||
this.target[key] = value;
|
||||
}
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
push_defined(updates={}) {
|
||||
this.push(Object.fromEntries(Object.entries(updates).filter(([k ,v]) => v !== undefined )));
|
||||
}
|
||||
|
||||
pop(copy_previous_state=false) {
|
||||
const frame = this.stack.pop();
|
||||
const { target } = this;
|
||||
|
||||
const return_value = copy_previous_state ? { ...target } : null;
|
||||
|
||||
for (const [key, value] of Object.entries(frame)) {
|
||||
if (value === DELETE_PROPERTY) {
|
||||
delete target[key];
|
||||
} else {
|
||||
target[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
get top_reverse_delta() {
|
||||
return this.stack.at(-1);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -70,12 +70,15 @@ export class Abstract_RegExp_Token_Rule {
|
||||
}
|
||||
|
||||
export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
|
||||
constructor(pattern, identifier=this) {
|
||||
constructor(pattern, identifier=undefined) {
|
||||
super(pattern);
|
||||
Object.assign(this, { identifier });
|
||||
Object.assign(this, { identifier: identifier ?? this });
|
||||
}
|
||||
}
|
||||
|
||||
// Note: There is no clean built in way to set an end position of a RegExp pattern, the only generic way is to slice the string we match before.
|
||||
// We may at some point implement support for this (and it would only be done if end position was given)
|
||||
|
||||
export class RegExp_Tokenizer {
|
||||
constructor(rules=[], default_action=undefined) {
|
||||
Object.assign(this, { rules, default_action });
|
||||
|
||||
Reference in New Issue
Block a user