Compare commits
5 Commits
cf1abadfc9
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 8b7d99393d | |||
| 376ca6d2f2 | |||
| bee32ec5fa | |||
| 5afd363aa7 | |||
| 1fdaee0b57 |
7
Makefile
7
Makefile
@@ -2,12 +2,15 @@
|
|||||||
|
|
||||||
build/packages:
|
build/packages:
|
||||||
mkdir -p $@
|
mkdir -p $@
|
||||||
node tools/stage-for-pnpm.mjs package-manifest.yaml source $@
|
node tools/stage-for-npm.mjs package-manifest.yaml source $@
|
||||||
|
|
||||||
publish:
|
publish:
|
||||||
cd build/packages && ./publish-all.sh
|
cd build/packages && ./publish-all.sh
|
||||||
|
|
||||||
|
dev:
|
||||||
|
build/packages/local-install.sh
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf build
|
rm -rf build
|
||||||
|
|
||||||
.PHONY: clean build/packages publish
|
.PHONY: clean build/packages publish dev
|
||||||
6
experiments/package.json
Normal file
6
experiments/package.json
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"devDependencies": {
|
||||||
|
"tree-sitter": "^0.25.0",
|
||||||
|
"tree-sitter-javascript": "^0.25.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
15
experiments/regexp-tokenizer.mjs
Normal file
15
experiments/regexp-tokenizer.mjs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import { RegExp_Tokenizer, RegExp_Token_Rule } from '@efforting.tech/parsing/regexp-dispatch';
|
||||||
|
|
||||||
|
|
||||||
|
const rt = new RegExp_Tokenizer();
|
||||||
|
|
||||||
|
rt.add_rules(new RegExp_Token_Rule(/\w+/, 'word'));
|
||||||
|
rt.set_default_identifier('random stuff');
|
||||||
|
|
||||||
|
//console.log(rt.rules);
|
||||||
|
|
||||||
|
//console.log(rt.closest_scanning_match('#Hello World!'));
|
||||||
|
|
||||||
|
for (const m of rt.iter_matches('#Hello World!')) {
|
||||||
|
console.log({class: m.constructor.name, identifier: m.identifier, value: m.value, captured: m.captured });
|
||||||
|
};
|
||||||
@@ -3,6 +3,9 @@ import * as CF from '@efforting.tech/data/field-configuration-factories';
|
|||||||
import { inspect } from 'node:util';
|
import { inspect } from 'node:util';
|
||||||
import { Text_Tree_Node, Text_Tree_Settings } from '@efforting.tech/text/basic-tree';
|
import { Text_Tree_Node, Text_Tree_Settings } from '@efforting.tech/text/basic-tree';
|
||||||
import { RegExp_Resolver } from '@efforting.tech/rule-processing/resolvers';
|
import { RegExp_Resolver } from '@efforting.tech/rule-processing/resolvers';
|
||||||
|
import { parse_csv } from '@efforting.tech/data/string-utilities';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -26,11 +29,11 @@ const root = Text_Tree_Node.from_string(ts, example_string);
|
|||||||
|
|
||||||
const d = new RegExp_Resolver([
|
const d = new RegExp_Resolver([
|
||||||
[/^animals:\s*(.*)$/, (c) => console.log("ANIMAL", c)],
|
[/^animals:\s*(.*)$/, (c) => console.log("ANIMAL", c)],
|
||||||
[/^trees:\s*(.*)$/, () => console.log("TREE")],
|
[/^trees:\s*(.*)$/, ({node, predicate_result}) => console.log(`TREE of node at line ${node.line_no}:`, parse_csv(predicate_result[1]))],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
for (const child of root.children) {
|
for (const child of root.children) {
|
||||||
if (child.has_line) {
|
if (child.has_line) {
|
||||||
console.log(child.line, d.resolve(child.line));
|
console.log(child.line, d.resolve(child.line, { node: child }));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
52
experiments/tree-sitter-1.mjs
Normal file
52
experiments/tree-sitter-1.mjs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import Parser from 'tree-sitter';
|
||||||
|
import JavaScript from 'tree-sitter-javascript';
|
||||||
|
import { readFileSync } from 'node:fs';
|
||||||
|
import { inspect } from 'node:util';
|
||||||
|
|
||||||
|
// NOTE: once upon a time
|
||||||
|
// there was some sort of
|
||||||
|
//
|
||||||
|
// example comment that we wanted to investigate
|
||||||
|
|
||||||
|
const parser = new Parser();
|
||||||
|
parser.setLanguage(JavaScript);
|
||||||
|
|
||||||
|
class example {
|
||||||
|
|
||||||
|
#private = 123
|
||||||
|
|
||||||
|
/*
|
||||||
|
This other comment
|
||||||
|
is of the style of a
|
||||||
|
block comment of course
|
||||||
|
*/
|
||||||
|
|
||||||
|
stuff() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const source = readFileSync('./tree-sitter-1.mjs', 'utf-8');
|
||||||
|
const tree = parser.parse(source);
|
||||||
|
|
||||||
|
function* iter_nodes(node) {
|
||||||
|
yield node;
|
||||||
|
for (let i = 0; i < node.childCount; i++) {
|
||||||
|
yield* iter_nodes(node.child(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function *iter_child_nodes(node) {
|
||||||
|
for (let i = 0; i < node.childCount; i++) {
|
||||||
|
yield node.child(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (const node of iter_child_nodes(tree.rootNode)) {
|
||||||
|
//console.log({type: node.type, text: node.text?.slice(0, 40)});
|
||||||
|
console.log(inspect(node));
|
||||||
|
console.log({text: node.text});
|
||||||
|
}
|
||||||
@@ -35,6 +35,14 @@ packages:
|
|||||||
internal-dependencies:
|
internal-dependencies:
|
||||||
- data
|
- data
|
||||||
|
|
||||||
|
parsing:
|
||||||
|
path: source/parsing
|
||||||
|
#documentation: documentation/text
|
||||||
|
description: Generic string parsing
|
||||||
|
internal-dependencies:
|
||||||
|
- errors
|
||||||
|
- text
|
||||||
|
|
||||||
wip-packages:
|
wip-packages:
|
||||||
object-graph-storage:
|
object-graph-storage:
|
||||||
path: source/object-graph-storage
|
path: source/object-graph-storage
|
||||||
|
|||||||
25
planning/dsl.md
Normal file
25
planning/dsl.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
## Example of indented block
|
||||||
|
|
||||||
|
```
|
||||||
|
§ block
|
||||||
|
this content
|
||||||
|
is in the block body
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Inline
|
||||||
|
|
||||||
|
`«inline expression»`
|
||||||
|
|
||||||
|
## Escaping
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
«§» block
|
||||||
|
|
||||||
|
Here is an «la»inline expression«ra»
|
||||||
|
Or possibly «outer escape: inline expression»
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
19
planning/to-document.md
Normal file
19
planning/to-document.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Readme.md
|
||||||
|
|
||||||
|
In `readme.md` we should explain that `npm install` can be used both with and without development dependencies.
|
||||||
|
|
||||||
|
We are likely to want some development dependencies for the following purposes:
|
||||||
|
|
||||||
|
- Template processing
|
||||||
|
- Ecmascript parsing for inline rich semantic documentation
|
||||||
|
|
||||||
|
|
||||||
|
## Installing tree-sitter-javascript
|
||||||
|
|
||||||
|
- requires `node-gyp`
|
||||||
|
|
||||||
|
```sh
|
||||||
|
CXXFLAGS="-std=c++20" npm i -D tree-sitter tree-sitter-javascript
|
||||||
|
```
|
||||||
|
|
||||||
|
This section should be somewhat expanded - especially regarding node-gyp
|
||||||
@@ -20,6 +20,11 @@ export function string_has_contents(str) {
|
|||||||
return /\S/.test(str);
|
return /\S/.test(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function parse_csv(str) {
|
||||||
|
// NOTE: This is for simple comma separated values, a future RFC-4180 compatible version would have to be in a different module in this library (or be a third party thing)
|
||||||
|
return str.split(',').map(element => element.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export function *indented_line_iterator(settings, text) {
|
export function *indented_line_iterator(settings, text) {
|
||||||
let line_no = settings.first_line;
|
let line_no = settings.first_line;
|
||||||
|
|||||||
@@ -1,5 +1,17 @@
|
|||||||
import { inspect } from 'node:util';
|
import { inspect } from 'node:util';
|
||||||
|
|
||||||
|
// § GROUP: Regexp tokenization
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
export class Tokenization_Error extends Error {
|
||||||
|
constructor(data) {
|
||||||
|
const { parser, value, index, end_index } = data;
|
||||||
|
super(`Tokenization_Error`); //TODO: Format message
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// § GROUP: Configuration field errors
|
// § GROUP: Configuration field errors
|
||||||
|
|
||||||
export class Data_Validation_Failed extends Error {
|
export class Data_Validation_Failed extends Error {
|
||||||
|
|||||||
182
source/parsing/regexp-dispatch.mjs
Normal file
182
source/parsing/regexp-dispatch.mjs
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
import * as RE from '@efforting.tech/text/regexp';
|
||||||
|
import { Tokenization_Error } from '@efforting.tech/errors';
|
||||||
|
|
||||||
|
|
||||||
|
// NOTE: There are some open questions about this implementation and API which may change as the library matures.
|
||||||
|
// Check out the example at experiments/regexp-tokenizer.mjs for more information on how to use this in its current state.
|
||||||
|
//
|
||||||
|
// Specifically it is not currently decided where the boundary between rule/action/capture should be
|
||||||
|
|
||||||
|
export class Pattern_Match {
|
||||||
|
constructor(match, rule) {
|
||||||
|
Object.assign(this, { match, rule });
|
||||||
|
}
|
||||||
|
|
||||||
|
get identifier() {
|
||||||
|
return this.rule. identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
get value() {
|
||||||
|
return this.match[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
get captured() {
|
||||||
|
return this.match.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
get pending_index() {
|
||||||
|
return this.match.index + this.match[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Default_Match {
|
||||||
|
constructor(text, index, end_index, action) {
|
||||||
|
const identifier = action(this);
|
||||||
|
Object.assign(this, { text, index, end_index, action, identifier });
|
||||||
|
}
|
||||||
|
|
||||||
|
get value() {
|
||||||
|
return this.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
get pending_index() {
|
||||||
|
if (this.end_index === null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return this.end_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export class Abstract_RegExp_Token_Rule {
|
||||||
|
|
||||||
|
constructor(pattern) {
|
||||||
|
|
||||||
|
const pattern_source = RE.get_source(pattern);
|
||||||
|
const pattern_flags = RE.get_flags(pattern);
|
||||||
|
|
||||||
|
const immediate_flags = String.prototype.concat(...(new Set([...pattern_flags, 'y'])));
|
||||||
|
const scanning_flags = String.prototype.concat(...(new Set([...pattern_flags, 'g'])));
|
||||||
|
|
||||||
|
const immediate_pattern = new RegExp(pattern_source, immediate_flags);
|
||||||
|
const scanning_pattern = new RegExp(pattern_source, scanning_flags);
|
||||||
|
|
||||||
|
Object.assign(this, { pattern, immediate_pattern, scanning_pattern });
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RegExp_Token_Rule extends Abstract_RegExp_Token_Rule {
|
||||||
|
constructor(pattern, identifier=this) {
|
||||||
|
super(pattern);
|
||||||
|
Object.assign(this, { identifier });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RegExp_Tokenizer {
|
||||||
|
constructor(rules=[], default_action=undefined) {
|
||||||
|
Object.assign(this, { rules, default_action });
|
||||||
|
}
|
||||||
|
|
||||||
|
set_default_identifier(identifier) {
|
||||||
|
this.default_action = (
|
||||||
|
() => identifier
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
add_rules(...rules_to_add) {
|
||||||
|
this.rules.push(...rules_to_add);
|
||||||
|
}
|
||||||
|
|
||||||
|
immediate_match(text, position=0) {
|
||||||
|
for (const rule of this.rules) {
|
||||||
|
const pattern = rule.immediate_pattern;
|
||||||
|
pattern.lastIndex = position;
|
||||||
|
const match = pattern.exec(text);
|
||||||
|
if (match) {
|
||||||
|
return new Pattern_Match(match, rule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_handle_default_match(value, index, end_index=null) {
|
||||||
|
const { default_action } = this;
|
||||||
|
if (!default_action) {
|
||||||
|
throw new Tokenization_Error({ parser: this, value, index, end_index });
|
||||||
|
}
|
||||||
|
return new Default_Match(value, index, end_index, default_action);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
closest_scanning_match(text, position=0) {
|
||||||
|
const immediate_match = this.immediate_match(text, position);
|
||||||
|
if (immediate_match) {
|
||||||
|
return immediate_match;
|
||||||
|
}
|
||||||
|
|
||||||
|
let best_candidate;
|
||||||
|
for (const candidate of this.iter_scanning_rule_candidates(text, position)) {
|
||||||
|
if ((best_candidate === undefined) || (best_candidate.match.index > candidate.match.index)) {
|
||||||
|
best_candidate = candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// There was no match, just get the tail
|
||||||
|
if (!best_candidate) {
|
||||||
|
const tail = text.slice(position);
|
||||||
|
if (tail.length) {
|
||||||
|
return this._handle_default_match(tail, position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// There was a match, check the head
|
||||||
|
if (best_candidate) {
|
||||||
|
const head = text.slice(position, best_candidate.match.index);
|
||||||
|
if (head.length) {
|
||||||
|
return this._handle_default_match(head, position, best_candidate.match.index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best_candidate;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*iter_scanning_rule_candidates(text, position=0) {
|
||||||
|
// Iterates over all rules and yields any matches found anywhere (but only once per rule)
|
||||||
|
|
||||||
|
for (const rule of this.rules) {
|
||||||
|
const pattern = rule.scanning_pattern;
|
||||||
|
pattern.lastIndex = position;
|
||||||
|
const match = pattern.exec(text);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
yield new Pattern_Match(match, rule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*iter_matches(text, position=0) {
|
||||||
|
while (true) {
|
||||||
|
const pending = this.closest_scanning_match(text, position);
|
||||||
|
if (pending) {
|
||||||
|
yield pending;
|
||||||
|
}
|
||||||
|
if (!pending || pending.pending_index === null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
position = pending.pending_index;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
import { Item_Unresolvable } from '@efforting.tech/errors';
|
import { Item_Unresolvable } from '@efforting.tech/errors';
|
||||||
|
|
||||||
export class Abstract_Resolver {
|
export class Abstract_Resolver {
|
||||||
resolve(item) {
|
resolve(item, extra_info={}) {
|
||||||
const result = this.resolve_handler(item);
|
const result = this.resolve_handler(item);
|
||||||
if (!result?.handler) {
|
if (!result?.handler) {
|
||||||
throw new Item_Unresolvable({ resolver: this, item });
|
throw new Item_Unresolvable({ resolver: this, item });
|
||||||
}
|
}
|
||||||
// TO DOC: Spreading result into the resulting context means there are some reserved keys we need to be mindful of to avoid clobbering them
|
// TO DOC: Spreading result into the resulting context means there are some reserved keys we need to be mindful of to avoid clobbering them
|
||||||
return result.handler({ resolver: this, item, ...result });
|
return result.handler({ resolver: this, item, ...extra_info, ...result });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,7 +39,8 @@ export class Predicate_Resolver extends Abstract_Resolver {
|
|||||||
const { rules } = this;
|
const { rules } = this;
|
||||||
for (const [predicate, handler] of rules) {
|
for (const [predicate, handler] of rules) {
|
||||||
const predicate_result = predicate(item);
|
const predicate_result = predicate(item);
|
||||||
if (predicate_result !== undefined) {
|
// NOTE: to return a falsy predicate_result as a positive hit you must wrap it in something
|
||||||
|
if (predicate_result) {
|
||||||
return { handler, predicate_result };
|
return { handler, predicate_result };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -50,13 +51,9 @@ export class RegExp_Resolver extends Predicate_Resolver {
|
|||||||
constructor(rules=[]) {
|
constructor(rules=[]) {
|
||||||
// NOTE: Rules should be iterable as [predicate, handler] pairs
|
// NOTE: Rules should be iterable as [predicate, handler] pairs
|
||||||
super();
|
super();
|
||||||
Object.assign(this, { rules: rules.map(([pattern, handler]) => {
|
Object.assign(this, {
|
||||||
|
rules: rules.map(([pattern, handler]) => [(str) => str.match(pattern), handler])
|
||||||
const wrapped_handler = handler; //TODO
|
});
|
||||||
const predicate = ((str) => str.match(pattern));
|
|
||||||
|
|
||||||
return [predicate, wrapped_handler];
|
|
||||||
})});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
49
source/text/regexp.mjs
Normal file
49
source/text/regexp.mjs
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
export function get_flags(pattern) {
|
||||||
|
if (pattern instanceof RegExp) {
|
||||||
|
return new Set(pattern.flags);
|
||||||
|
} else {
|
||||||
|
return new Set();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
export function get_source(pattern) {
|
||||||
|
if (pattern instanceof RegExp) {
|
||||||
|
return pattern.source;
|
||||||
|
} else {
|
||||||
|
return pattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function concat(...pattern_list) {
|
||||||
|
let pending_source = '';
|
||||||
|
const pending_flags = new Set();
|
||||||
|
|
||||||
|
for (const pattern of pattern_list) {
|
||||||
|
|
||||||
|
if (pattern instanceof RegExp) {
|
||||||
|
pending_source += pattern.source;
|
||||||
|
for (const flag of pattern.flags) {
|
||||||
|
pending_flags.add(flag);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pending_source += pattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new RegExp(pending_source, String.prototype.concat(...pending_flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export function join(pattern_list, separator, flags=undefined) {
|
||||||
|
return new RegExp(pattern_list.map(pattern => get_source(pattern)).join(get_source(separator)), flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export function update_flag(pattern, flag, state) {
|
||||||
|
const pattern_flags = get_flags(pattern);
|
||||||
|
if (state) {
|
||||||
|
pattern_flags.add(flag);
|
||||||
|
} else {
|
||||||
|
pattern_flags.delete(flag);
|
||||||
|
}
|
||||||
|
return new RegExp(pattern.source, String.prototype.concat(...pattern_flags));
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user