Files
nodejs.esm-library/source/parsing/generic-parsing.mjs

101 lines
2.8 KiB
JavaScript

import { RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
import { String_Keyed_Stack } from '@efforting.tech/data/stack';
import { assign_defined } from '@efforting.tech/data/object-utilities';
import * as F from '@efforting.tech/schema/field-configuration-factories';
export class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
constructor(pattern, action, identifier=undefined) {
super(pattern, identifier);
Object.assign(this, { action });
}
}
export const Parser_State = new F.Schema({
position: F.value(0, 'Pending position in source'),
value: F.factory(() => [], 'Pending value to return'),
sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
tokenizer: F.value(null, 'Current tokenizer'),
context: F.value(null, 'User supplied context'),
}, 'Parser state');
export class Parser {
constructor(source, state=undefined) {
state = Parser_State.load(state);
const token_generator = new Switchable_Iterator();
const stack = new String_Keyed_Stack(state);
Object.assign(this, { source, state, stack, token_generator });
this.switch_to();
}
switch_to(tokenizer=undefined, position=undefined) {
assign_defined(this.state, { tokenizer, position });
this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
}
parse(handler=undefined) {
for (const match of this.token_generator) {
const { action } = match.rule;
if (!action) { continue; }
if (typeof action !== 'function') { //TODO - proper error (possibly a warning, the warning is nice when you are developing, have to think about this one)
console.log('NOT IMPLEMENTED', match.rule.action);
continue;
}
this.state.position = match.pending_index;
this.state.match = match;
action(this, match);
}
if (handler) {
this.state.match = null; //TODO: Decide if we should reset match here or not
handler(this, this.state.value);
return this.state.value;
} else {
return this.state.value;
}
}
push_token(...tokens) {
this.state.value.push(...tokens);
}
replace_value(value) {
this.state.value = value;
}
enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
this.stack.push_defined({ tokenizer, value: [] });
if (handler) {
this.state.sub_tokenizer_handlers.push(handler);
}
this.switch_to(tokenizer);
}
leave_sub_tokenizer(egress_match=null) {
const frame = this.stack.pop(true);
const { sub_tokenizer_handlers } = this.state;
if (sub_tokenizer_handlers.length) {
const handler = sub_tokenizer_handlers.pop();
this.state.match = null; //TODO: Decide if we should reset match here or not
handler(this, frame.value, egress_match);
} else {
this.push_token(frame.value);
}
this.switch_to();
}
}