From ba67366cd659608d6492829fc60c2283b75f8e19 Mon Sep 17 00:00:00 2001 From: mikael-lovqvists-claude-agent Date: Sat, 7 Mar 2026 01:47:31 +0000 Subject: [PATCH] Use numeric indices for delta filenames, document limitations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delta files now named 0.zst, 1.zst etc — avoids path length issues and ambiguous separator substitution; manifest maps index to path - PLAN.md: document delta naming rationale - PLAN.md: document cross-file deduplication limitation and possible future approaches (zstd dictionary training, content-addressing, tar stream) --- PLAN.md | 19 +++++++++++++++++++ lib/commands/run.js | 8 +++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/PLAN.md b/PLAN.md index 092ee40..6868468 100644 --- a/PLAN.md +++ b/PLAN.md @@ -161,6 +161,25 @@ rsync meaningful exit codes: Currently basic: any non-zero exit code throws. Finer-grained handling planned as part of the operation abstraction refactor. +## Known Limitations + +### Delta file naming +Delta files are named by numeric index (e.g. `0.zst`, `1.zst`) rather than by path. The manifest +maps each index to its source path. Path-based naming was considered but rejected because: +- Deep directory trees can exceed filesystem filename length limits +- Path separator substitution (e.g. `/` → `__`) is ambiguous for filenames containing that sequence + +### Cross-file deduplication +Per-file deltas cannot exploit similarity between different files — each file is compressed/diffed +in isolation. Identical or near-identical files in different locations get no benefit from each +other. Approaches that could address this: +- `zstd --train` to build a shared dictionary from the corpus, then compress all deltas against it +- Content-addressed storage (deduplicate at the block or file level before delta generation) +- Tar the entire PEND tree and delta against the previous tar (single-stream, cross-file repetition + is visible to the compressor — but random access for restore becomes harder) + +These are significant complexity increases and out of scope for now. + ## Occasional Snapshots Delta chains are efficient but fragile over long chains. Periodic full snapshots (every N deltas, diff --git a/lib/commands/run.js b/lib/commands/run.js index 0bb354a..d63add5 100644 --- a/lib/commands/run.js +++ b/lib/commands/run.js @@ -65,16 +65,16 @@ export async function runCommand(config) { } const manifestChanges = []; + let fileIndex = 0; for (const change of changes) { - const deltaFilename = change.path.replaceAll('/', '__') + backend.ext; - const outFile = join(filesDir, deltaFilename); - if (change.status === 'deleted') { manifestChanges.push({ path: change.path, status: 'deleted' }); continue; } + const deltaFilename = `${fileIndex}${backend.ext}`; + const outFile = join(filesDir, deltaFilename); const prevFile = join(prev, change.path); const newFile = join(pend, change.path); @@ -97,6 +97,8 @@ export async function runCommand(config) { status: change.status, delta: join('files', deltaFilename), }); + + fileIndex++; } // ── Phase 5: Write manifest + atomic commit ──────────────────