diff --git a/experiments/generic-parser-1.mjs b/experiments/generic-parser-1.mjs index 034addd..9c1b289 100644 --- a/experiments/generic-parser-1.mjs +++ b/experiments/generic-parser-1.mjs @@ -1,107 +1,8 @@ -import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch'; -import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities'; -import { String_Keyed_Stack } from '@efforting.tech/data/stack'; -import { assign_defined } from '@efforting.tech/data/object-utilities'; -import * as F from '@efforting.tech/schema/field-configuration-factories'; +import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch'; +import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing'; + import { inspect } from 'node:util'; - -class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule { - constructor(pattern, action, identifier=undefined) { - super(pattern, identifier); - Object.assign(this, { action }); - } -} - - -const Parser_State = new F.Schema({ - - position: F.value(0, 'Pending position in source'), - value: F.factory(() => [], 'Pending value to return'), - sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'), - tokenizer: F.value(null, 'Current tokenizer'), - context: F.value(null, 'User supplied context'), - -}, 'Parser state'); - - - -class Parser { - constructor(source, state=undefined) { - state = Parser_State.load(state); - const token_generator = new Switchable_Iterator(); - const stack = new String_Keyed_Stack(state); - Object.assign(this, { source, state, stack, token_generator }); - this.switch_to(); - } - - - switch_to(tokenizer=undefined, position=undefined) { - assign_defined(this.state, { tokenizer, position }); - this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position)); - } - - parse(handler=undefined) { - - for (const match of this.token_generator) { - const { action } = match.rule; - if (!action) { continue; } - if (typeof action !== 'function') { - console.log('NOT IMPLEMENTED', match.rule.action); - continue; - } - - this.state.position = match.pending_index; - this.state.match = match; - action(this, match); - - } - - if (handler) { - this.state.match = null; //TODO: Decide if we should reset match here or not - handler(this, this.state.value); - return this.state.value; - } else { - return this.state.value; - } - - - } - - push_token(...tokens) { - this.state.value.push(...tokens); - } - - replace_value(value) { - this.state.value = value; - } - - enter_sub_tokenizer(tokenizer=undefined, handler=undefined) { - this.stack.push_defined({ tokenizer, value: [] }); - if (handler) { - this.state.sub_tokenizer_handlers.push(handler); - } - this.switch_to(tokenizer); - } - - leave_sub_tokenizer() { - const frame = this.stack.pop(true); - const { sub_tokenizer_handlers } = this.state; - - if (sub_tokenizer_handlers.length) { - const handler = sub_tokenizer_handlers.pop(); - this.state.match = null; //TODO: Decide if we should reset match here or not - handler(this, frame.value); - } else { - this.push_token(frame.value); - } - this.switch_to(); - } - -} - - - const text = 'Hello World (how are you (doing)) I may ask'; const rt = new RegExp_Tokenizer(); @@ -112,7 +13,6 @@ rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(')'), (tokenizer, match const p = new Parser(text, { tokenizer: rt }); - console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: 'parsing result', value})), { colors: true, depth: null })); /* @@ -137,20 +37,4 @@ console.log(inspect(p.parse((tokenizer, value) => tokenizer.replace_value({kind: ] } - - */ - - - -process.exit() - - - - - - - -for (const m of rt.iter_matches(text)) { - console.log({identifier: m.identifier, value: m.value }); -}; \ No newline at end of file diff --git a/source/parsing/generic-parsing.mjs b/source/parsing/generic-parsing.mjs new file mode 100644 index 0000000..b5b4299 --- /dev/null +++ b/source/parsing/generic-parsing.mjs @@ -0,0 +1,100 @@ +import { RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch'; +import { Switchable_Iterator } from '@efforting.tech/data/iteration-utilities'; +import { String_Keyed_Stack } from '@efforting.tech/data/stack'; +import { assign_defined } from '@efforting.tech/data/object-utilities'; +import * as F from '@efforting.tech/schema/field-configuration-factories'; + +export class RegExp_Token_Parsing_Rule extends RegExp_Token_Rule { + constructor(pattern, action, identifier=undefined) { + super(pattern, identifier); + Object.assign(this, { action }); + } +} + + +export const Parser_State = new F.Schema({ + + position: F.value(0, 'Pending position in source'), + value: F.factory(() => [], 'Pending value to return'), + sub_tokenizer_handlers: F.factory(() => [], 'Pending sub tokenizer handlers'), + tokenizer: F.value(null, 'Current tokenizer'), + context: F.value(null, 'User supplied context'), + +}, 'Parser state'); + + + +export class Parser { + constructor(source, state=undefined) { + state = Parser_State.load(state); + const token_generator = new Switchable_Iterator(); + const stack = new String_Keyed_Stack(state); + Object.assign(this, { source, state, stack, token_generator }); + this.switch_to(); + } + + + switch_to(tokenizer=undefined, position=undefined) { + assign_defined(this.state, { tokenizer, position }); + this.token_generator.switch_to(this.state.tokenizer.iter_matches(this.source, this.state.position)); + } + + parse(handler=undefined) { + + for (const match of this.token_generator) { + const { action } = match.rule; + if (!action) { continue; } + + if (typeof action !== 'function') { //TODO - proper error (possibly a warning, the warning is nice when you are developing, have to think about this one) + console.log('NOT IMPLEMENTED', match.rule.action); + continue; + } + + this.state.position = match.pending_index; + this.state.match = match; + action(this, match); + + } + + if (handler) { + this.state.match = null; //TODO: Decide if we should reset match here or not + handler(this, this.state.value); + return this.state.value; + } else { + return this.state.value; + } + + + } + + push_token(...tokens) { + this.state.value.push(...tokens); + } + + replace_value(value) { + this.state.value = value; + } + + enter_sub_tokenizer(tokenizer=undefined, handler=undefined) { + this.stack.push_defined({ tokenizer, value: [] }); + if (handler) { + this.state.sub_tokenizer_handlers.push(handler); + } + this.switch_to(tokenizer); + } + + leave_sub_tokenizer() { + const frame = this.stack.pop(true); + const { sub_tokenizer_handlers } = this.state; + + if (sub_tokenizer_handlers.length) { + const handler = sub_tokenizer_handlers.pop(); + this.state.match = null; //TODO: Decide if we should reset match here or not + handler(this, frame.value); + } else { + this.push_token(frame.value); + } + this.switch_to(); + } + +}