Moved generic parsing to library, cleaned up experiment
This commit is contained in:
@@ -1,107 +1,8 @@
|
||||
import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
|
||||
import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
|
||||
import { String_Keyed_Stack } from '@efforting.tech/data/stack';
|
||||
import { assign_defined } from '@efforting.tech/data/object-utilities';
|
||||
import * as F from '@efforting.tech/schema/field-configuration-factories';
|
||||
import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch';
|
||||
import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing';
|
||||
|
||||
import { inspect } from 'node:util';
|
||||
|
||||
|
||||
class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
|
||||
constructor(pattern, action, identifier=undefined) {
|
||||
super(pattern, identifier);
|
||||
Object.assign(this, { action });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const Parser_State = new F.Schema({
|
||||
|
||||
position: F.value(0, 'Pending position in source'),
|
||||
value: F.factory(() => [], 'Pending value to return'),
|
||||
sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
|
||||
tokenizer: F.value(null, 'Current tokenizer'),
|
||||
context: F.value(null, 'User supplied context'),
|
||||
|
||||
}, 'Parser state');
|
||||
|
||||
|
||||
|
||||
class Parser {
|
||||
constructor(source, state=undefined) {
|
||||
state = Parser_State.load(state);
|
||||
const token_generator = new Switchable_Iterator();
|
||||
const stack = new String_Keyed_Stack(state);
|
||||
Object.assign(this, { source, state, stack, token_generator });
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
|
||||
switch_to(tokenizer=undefined, position=undefined) {
|
||||
assign_defined(this.state, { tokenizer, position });
|
||||
this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
|
||||
}
|
||||
|
||||
parse(handler=undefined) {
|
||||
|
||||
for (const match of this.token_generator) {
|
||||
const { action } = match.rule;
|
||||
if (!action) { continue; }
|
||||
if (typeof action !== 'function') {
|
||||
console.log('NOT IMPLEMENTED', match.rule.action);
|
||||
continue;
|
||||
}
|
||||
|
||||
this.state.position = match.pending_index;
|
||||
this.state.match = match;
|
||||
action(this, match);
|
||||
|
||||
}
|
||||
|
||||
if (handler) {
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, this.state.value);
|
||||
return this.state.value;
|
||||
} else {
|
||||
return this.state.value;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
push_token(...tokens) {
|
||||
this.state.value.push(...tokens);
|
||||
}
|
||||
|
||||
replace_value(value) {
|
||||
this.state.value = value;
|
||||
}
|
||||
|
||||
enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
|
||||
this.stack.push_defined({ tokenizer, value: [] });
|
||||
if (handler) {
|
||||
this.state.sub_tokenizer_handlers.push(handler);
|
||||
}
|
||||
this.switch_to(tokenizer);
|
||||
}
|
||||
|
||||
leave_sub_tokenizer() {
|
||||
const frame = this.stack.pop(true);
|
||||
const { sub_tokenizer_handlers } = this.state;
|
||||
|
||||
if (sub_tokenizer_handlers.length) {
|
||||
const handler = sub_tokenizer_handlers.pop();
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, frame.value);
|
||||
} else {
|
||||
this.push_token(frame.value);
|
||||
}
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
const text = 'Hello World (how are you (doing)) I may ask';
|
||||
|
||||
const rt = new RegExp_Tokenizer();
|
||||
@@ -112,7 +13,6 @@ rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match
|
||||
|
||||
const p = new Parser(text, { tokenizer: rt });
|
||||
|
||||
|
||||
console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null }));
|
||||
|
||||
/*
|
||||
@@ -137,20 +37,4 @@ console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind:
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
process.exit()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for (const m of rt.iter_matches(text)) {
|
||||
console.log({identifier: m.identifier, value: m.value });
|
||||
};
|
||||
100
source/parsing/generic-parsing.mjs
Normal file
100
source/parsing/generic-parsing.mjs
Normal file
@@ -0,0 +1,100 @@
|
||||
import { RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
|
||||
import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities';
|
||||
import { String_Keyed_Stack } from '@efforting.tech/data/stack';
|
||||
import { assign_defined } from '@efforting.tech/data/object-utilities';
|
||||
import * as F from '@efforting.tech/schema/field-configuration-factories';
|
||||
|
||||
export class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule {
|
||||
constructor(pattern, action, identifier=undefined) {
|
||||
super(pattern, identifier);
|
||||
Object.assign(this, { action });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export const Parser_State = new F.Schema({
|
||||
|
||||
position: F.value(0, 'Pending position in source'),
|
||||
value: F.factory(() => [], 'Pending value to return'),
|
||||
sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'),
|
||||
tokenizer: F.value(null, 'Current tokenizer'),
|
||||
context: F.value(null, 'User supplied context'),
|
||||
|
||||
}, 'Parser state');
|
||||
|
||||
|
||||
|
||||
export class Parser {
|
||||
constructor(source, state=undefined) {
|
||||
state = Parser_State.load(state);
|
||||
const token_generator = new Switchable_Iterator();
|
||||
const stack = new String_Keyed_Stack(state);
|
||||
Object.assign(this, { source, state, stack, token_generator });
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
|
||||
switch_to(tokenizer=undefined, position=undefined) {
|
||||
assign_defined(this.state, { tokenizer, position });
|
||||
this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position));
|
||||
}
|
||||
|
||||
parse(handler=undefined) {
|
||||
|
||||
for (const match of this.token_generator) {
|
||||
const { action } = match.rule;
|
||||
if (!action) { continue; }
|
||||
|
||||
if (typeof action !== 'function') { //TODO - proper error (possibly a warning, the warning is nice when you are developing, have to think about this one)
|
||||
console.log('NOT IMPLEMENTED', match.rule.action);
|
||||
continue;
|
||||
}
|
||||
|
||||
this.state.position = match.pending_index;
|
||||
this.state.match = match;
|
||||
action(this, match);
|
||||
|
||||
}
|
||||
|
||||
if (handler) {
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, this.state.value);
|
||||
return this.state.value;
|
||||
} else {
|
||||
return this.state.value;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
push_token(...tokens) {
|
||||
this.state.value.push(...tokens);
|
||||
}
|
||||
|
||||
replace_value(value) {
|
||||
this.state.value = value;
|
||||
}
|
||||
|
||||
enter_sub_tokenizer(tokenizer=undefined, handler=undefined) {
|
||||
this.stack.push_defined({ tokenizer, value: [] });
|
||||
if (handler) {
|
||||
this.state.sub_tokenizer_handlers.push(handler);
|
||||
}
|
||||
this.switch_to(tokenizer);
|
||||
}
|
||||
|
||||
leave_sub_tokenizer() {
|
||||
const frame = this.stack.pop(true);
|
||||
const { sub_tokenizer_handlers } = this.state;
|
||||
|
||||
if (sub_tokenizer_handlers.length) {
|
||||
const handler = sub_tokenizer_handlers.pop();
|
||||
this.state.match = null; //TODO: Decide if we should reset match here or not
|
||||
handler(this, frame.value);
|
||||
} else {
|
||||
this.push_token(frame.value);
|
||||
}
|
||||
this.switch_to();
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user