import { Row_Based_Table } from '@efforting.tech/table'; import { load_raster_table } from '@efforting.tech/table/raster-table'; import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch'; import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing'; function load_table(raster) { const table = load_raster_table(raster, Row_Based_Table); table.replace_all_cells(({cell}) => cell.trim()); return table; } const logic_ops = load_table(` name symbol ---- ------ AND ∧ OR ∨ XOR ⊕ NAND ↑ NOR ↓ XNOR ⊙ IMPLIES → IFF ↔ NOT ¬ `); const generic_ops = load_table(` name symbol ---- ------ PLUS + HYPHEN - DOT · ASTERISK * CROSS × SLASH / CARET ^ UNDERSCORE _ PERCENT % `); const punctuation = load_table(` name symbol ---- ------ COMMA , SEMI_COLON ; COLON : PERIOD . `); const grouping = load_table(` name left right ---- ---- ----- PARENTESIS ( ) SQUARE_BRACKET [ ] CURLY_BRACE { } ANGLE_BRACKET ⟨ ⟩ DOUBLE_ARROW_BRACKET « » `); const greek_chars = load_table(` name lower upper ---- ----- ----- ALPHA α Α BETA β Β GAMMA γ Γ DELTA δ Δ EPSILON ε Ε ZETA ζ Ζ ETA η Η THETA θ Θ IOTA ι Ι KAPPA κ Κ LAMBDA λ Λ MU μ Μ NU ν Ν XI ξ Ξ OMICRON ο Ο PI π Π RHO ρ Ρ SIGMA σ Σ TAU τ Τ UPSILON υ Υ PHI φ Φ CHI χ Χ PSI ψ Ψ OMEGA ω Ω `); const rt = new RegExp_Tokenizer(); for (const { name, left, right } of grouping.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(left), (tokenizer, ingress_match) => tokenizer.enter_sub_tokenizer(undefined, (tokenizer, value, egress_match) => tokenizer.push_token( {kind: 'EXPR', name, value, ingress_match, egress_match} ) ), `LEFT_${name}` )); rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(right), (tokenizer, match) => tokenizer.leave_sub_tokenizer(match), `RIGHT_${name}`) ); } for (const table of [logic_ops, generic_ops, punctuation]) { for (const { name, symbol } of table.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(symbol), (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), name) ); } } for (const { name, lower, upper } of greek_chars.iter_objects()) { rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(lower), (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `LOWER_${name}`) ); rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(upper), (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `UPPER_${name}`) ); } rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), 'WORD')); rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'WHITESPACE')); const text = 'Hello World (how are you (doing)) I may ask'; const p = new Parser(text, { tokenizer: rt }); //console.log(rt.rules.at(-3)); console.log(p.parse())