146 lines
3.0 KiB
JavaScript
146 lines
3.0 KiB
JavaScript
import { Row_Based_Table } from '@efforting.tech/table';
|
||
import { load_raster_table } from '@efforting.tech/table/raster-table';
|
||
import { RegExp_Tokenizer } from '@efforting.tech/parsing/regexp-dispatch';
|
||
import { RegExp_Token_Parsing_Rule, Parser } from '@efforting.tech/parsing/generic-parsing';
|
||
|
||
|
||
function load_table(raster) {
|
||
const table = load_raster_table(raster, Row_Based_Table);
|
||
table.replace_all_cells(({cell}) => cell.trim());
|
||
return table;
|
||
}
|
||
|
||
const logic_ops = load_table(`
|
||
|
||
name symbol
|
||
---- ------
|
||
AND ∧
|
||
OR ∨
|
||
XOR ⊕
|
||
NAND ↑
|
||
NOR ↓
|
||
XNOR ⊙
|
||
IMPLIES →
|
||
IFF ↔
|
||
NOT ¬
|
||
|
||
`);
|
||
|
||
const generic_ops = load_table(`
|
||
|
||
name symbol
|
||
---- ------
|
||
PLUS +
|
||
HYPHEN -
|
||
DOT ·
|
||
ASTERISK *
|
||
CROSS ×
|
||
SLASH /
|
||
CARET ^
|
||
UNDERSCORE _
|
||
PERCENT %
|
||
|
||
`);
|
||
|
||
const punctuation = load_table(`
|
||
|
||
name symbol
|
||
---- ------
|
||
COMMA ,
|
||
SEMI_COLON ;
|
||
COLON :
|
||
PERIOD .
|
||
|
||
`);
|
||
|
||
const grouping = load_table(`
|
||
|
||
name left right
|
||
---- ---- -----
|
||
PARENTESIS ( )
|
||
SQUARE_BRACKET [ ]
|
||
CURLY_BRACE { }
|
||
ANGLE_BRACKET ⟨ ⟩
|
||
DOUBLE_ARROW_BRACKET « »
|
||
|
||
`);
|
||
|
||
const greek_chars = load_table(`
|
||
|
||
name lower upper
|
||
---- ----- -----
|
||
ALPHA α Α
|
||
BETA β Β
|
||
GAMMA γ Γ
|
||
DELTA δ Δ
|
||
EPSILON ε Ε
|
||
ZETA ζ Ζ
|
||
ETA η Η
|
||
THETA θ Θ
|
||
IOTA ι Ι
|
||
KAPPA κ Κ
|
||
LAMBDA λ Λ
|
||
MU μ Μ
|
||
NU ν Ν
|
||
XI ξ Ξ
|
||
OMICRON ο Ο
|
||
PI π Π
|
||
RHO ρ Ρ
|
||
SIGMA σ Σ
|
||
TAU τ Τ
|
||
UPSILON υ Υ
|
||
PHI φ Φ
|
||
CHI χ Χ
|
||
PSI ψ Ψ
|
||
OMEGA ω Ω
|
||
|
||
`);
|
||
|
||
|
||
|
||
|
||
|
||
const rt = new RegExp_Tokenizer();
|
||
|
||
for (const { name, left, right } of grouping.iter_objects()) {
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(left),
|
||
(tokenizer, ingress_match) => tokenizer.enter_sub_tokenizer(undefined,
|
||
(tokenizer, value, egress_match) => tokenizer.push_token(
|
||
{kind: 'EXPR', name, value, ingress_match, egress_match}
|
||
)
|
||
), `LEFT_${name}`
|
||
));
|
||
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(right),
|
||
(tokenizer, match) => tokenizer.leave_sub_tokenizer(match), `RIGHT_${name}`)
|
||
);
|
||
}
|
||
|
||
|
||
for (const table of [logic_ops, generic_ops, punctuation]) {
|
||
for (const { name, symbol } of table.iter_objects()) {
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(symbol),
|
||
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), name)
|
||
);
|
||
}
|
||
}
|
||
|
||
for (const { name, lower, upper } of greek_chars.iter_objects()) {
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(lower),
|
||
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `LOWER_${name}`)
|
||
);
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(RegExp.escape(upper),
|
||
(tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), `UPPER_${name}`)
|
||
);
|
||
}
|
||
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(/\w+/, (tokenizer, match) => tokenizer.push_token({ kind: 'TOKEN', match }), 'WORD'));
|
||
rt.add_rules(new RegExp_Token_Parsing_Rule(/\s+/, null, 'WHITESPACE'));
|
||
|
||
|
||
const text = 'Hello World (how are you (doing)) I may ask';
|
||
const p = new Parser(text, { tokenizer: rt });
|
||
|
||
//console.log(rt.rules.at(-3));
|
||
|
||
console.log(p.parse()) |