Compare commits
10 Commits
84801a7971
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 8d1d1241b6 | |||
| f8829af7a8 | |||
| ba67366cd6 | |||
| f1faa992c9 | |||
| 45924cbcd7 | |||
| ab7479e62d | |||
| d1c65a06d5 | |||
| e999fca352 | |||
| 96e3024991 | |||
| 30b90193d7 |
75
PLAN.md
75
PLAN.md
@@ -127,6 +127,81 @@ CWD or implicit defaults for directories — explicit is safer.
|
||||
All external tools (rsync, zstd, xdelta3) are spawned with explicit argument arrays.
|
||||
No shell string interpolation ever. Use Node's `child_process.spawn` or similar.
|
||||
|
||||
### Planned: Operation Abstractions
|
||||
|
||||
Currently dry-run logic is scattered inline throughout the run command. The intent is to refactor
|
||||
toward self-describing operation objects — each operation knows both how to describe itself (for
|
||||
dry-run) and how to execute itself. This makes the run command a clean sequence of operations,
|
||||
makes per-tool behavior easy to adjust (e.g. rsync exit code handling), and makes dry-run output
|
||||
a natural consequence of the abstraction rather than duplicated conditional logic.
|
||||
|
||||
Sketch:
|
||||
```js
|
||||
// Each tool gets its own operation type
|
||||
const op = rsyncOp({ args: [...], allowedExitCodes: [0, 24] });
|
||||
op.describe(); // prints what it would do
|
||||
await op.run(); // executes
|
||||
|
||||
// Run command becomes:
|
||||
const ops = buildOps(config);
|
||||
if (dry) ops.forEach(op => op.describe());
|
||||
else for (const op of ops) await op.run();
|
||||
```
|
||||
|
||||
Per-tool exit code handling (e.g. rsync's partial transfer codes) lives inside the operation,
|
||||
not scattered across callers.
|
||||
|
||||
### Current: rsync Exit Code Handling
|
||||
|
||||
rsync meaningful exit codes:
|
||||
- `0` — success
|
||||
- `23` — partial transfer due to error (fatal)
|
||||
- `24` — partial transfer due to vanished source files (acceptable in some cases)
|
||||
|
||||
Currently basic: any non-zero exit code throws. Finer-grained handling planned as part of the
|
||||
operation abstraction refactor.
|
||||
|
||||
## Known Limitations
|
||||
|
||||
### Delta file naming
|
||||
Delta files are named by numeric index (e.g. `0.zst`, `1.zst`) rather than by path. The manifest
|
||||
maps each index to its source path. Path-based naming was considered but rejected because:
|
||||
- Deep directory trees can exceed filesystem filename length limits
|
||||
- Path separator substitution (e.g. `/` → `__`) is ambiguous for filenames containing that sequence
|
||||
|
||||
### Cross-file deduplication
|
||||
Per-file deltas cannot exploit similarity between different files — each file is compressed/diffed
|
||||
in isolation. Identical or near-identical files in different locations get no benefit from each
|
||||
other. Approaches that could address this:
|
||||
- `zstd --train` to build a shared dictionary from the corpus, then compress all deltas against it
|
||||
- Content-addressed storage (deduplicate at the block or file level before delta generation)
|
||||
- Tar the entire PEND tree and delta against the previous tar (single-stream, cross-file repetition
|
||||
is visible to the compressor — but random access for restore becomes harder)
|
||||
|
||||
These are significant complexity increases and out of scope for now.
|
||||
|
||||
### File attribute tracking (TODO)
|
||||
|
||||
Currently the manifest records only file content changes. File metadata (permissions, mtime,
|
||||
ownership, xattrs) is not tracked, meaning restore cannot faithfully reconstruct the original
|
||||
state.
|
||||
|
||||
**Planned approach:**
|
||||
- On each run, compare attributes between PREV and PEND for every file in the change list
|
||||
- Encode attribute changes explicitly in the manifest alongside content changes
|
||||
- Restore walks the delta chain applying both content deltas and attribute deltas in sequence
|
||||
|
||||
**Design considerations:**
|
||||
- `fs.stat()` gives mode, mtime, uid, gid — but not xattrs, ACLs, or fs-specific attributes
|
||||
- Attribute richness is highly filesystem-dependent (ext4, btrfs, APFS, NTFS all differ)
|
||||
- Need a pluggable attribute backend, similar to the delta backend, so the attribute set captured
|
||||
and restored can be tuned per deployment without changing core logic
|
||||
- Restore must handle the case where an attribute from an older delta is no longer representable
|
||||
on the target filesystem (e.g. restoring to a different fs type) — fail loudly rather than
|
||||
silently skip
|
||||
- rsync `-a` already preserves attributes into PEND, so PEND is always the authoritative source
|
||||
of truth for what attributes should be at that point in time
|
||||
|
||||
## Occasional Snapshots
|
||||
|
||||
Delta chains are efficient but fragile over long chains. Periodic full snapshots (every N deltas,
|
||||
|
||||
@@ -12,9 +12,10 @@ Commands:
|
||||
|
||||
Options:
|
||||
--source <path> SOURCE directory (required)
|
||||
--prev <path> PREV directory (required)
|
||||
--pend <path> PEND directory (required)
|
||||
--deltas <path> DELTAS directory (required)
|
||||
--base <path> Sets --prev, --pend, --deltas as subdirs of base path
|
||||
--prev <path> PREV directory (default: <base>/previous)
|
||||
--pend <path> PEND directory (default: <base>/pending)
|
||||
--deltas <path> DELTAS directory (default: <base>/deltas)
|
||||
--backend <name> Delta backend: zstd (default), xdelta3
|
||||
--config <file> Load options from JSON config file (flags override)
|
||||
--dry-run Print what would happen, execute nothing
|
||||
@@ -26,6 +27,7 @@ export function parseArgs(argv) {
|
||||
args: argv,
|
||||
options: {
|
||||
source: { type: 'string' },
|
||||
base: { type: 'string' },
|
||||
prev: { type: 'string' },
|
||||
pend: { type: 'string' },
|
||||
deltas: { type: 'string' },
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
/**
|
||||
* run command — full backup run.
|
||||
*/
|
||||
import { rm, mkdir } from 'fs/promises';
|
||||
import { mkdir, rename, rm, writeFile } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { run as spawn } from '../spawn.js';
|
||||
import { run as spawn, rsync } from '../spawn.js';
|
||||
import { parseItemize } from '../itemize.js';
|
||||
import { getBackend } from '../backends/index.js';
|
||||
import { readState, writeState, PHASES } from '../state.js';
|
||||
|
||||
export async function runCommand(config) {
|
||||
const { source, prev, pend, deltas, backend: backendName, dryRun } = config;
|
||||
const { source, prev, pend, deltas, backend: backendName, dryRun: dry } = config;
|
||||
const backend = getBackend(backendName);
|
||||
const dry = dryRun;
|
||||
|
||||
if (dry) console.log('[dry-run] No changes will be made.\n');
|
||||
|
||||
@@ -20,42 +20,130 @@ export async function runCommand(config) {
|
||||
|
||||
console.log(`Starting run — seq ${seq} (last complete: ${state.last_complete})`);
|
||||
|
||||
// TODO: detect and handle partially-committed previous run
|
||||
// TODO: detect and recover from partially-committed previous run
|
||||
|
||||
// ── Phase 1: Clear PEND ─────────────────────────────────────
|
||||
// ── Phase 1: Ensure PEND exists ─────────────────────────────
|
||||
await setPhase(deltas, state, PHASES.CLEARING_PEND, dry);
|
||||
console.log('\n── Clear PEND ──');
|
||||
if (!dry) {
|
||||
await rm(pend, { recursive: true, force: true });
|
||||
await mkdir(pend, { recursive: true });
|
||||
} else {
|
||||
console.log(`[dry-run] rm -rf ${pend} && mkdir -p ${pend}`);
|
||||
console.log(`[dry-run] mkdir -p ${pend}`);
|
||||
}
|
||||
|
||||
// ── Phase 2: rsync PREV → PEND (local seed) ─────────────────
|
||||
// ── Phase 2: rsync PREV → PEND (local seed, with delete) ────
|
||||
await setPhase(deltas, state, PHASES.RSYNC_LOCAL, dry);
|
||||
console.log('\n── rsync PREV → PEND (local seed) ──');
|
||||
await spawn('rsync', ['-aP', trailingSlash(prev), pend], { dryRun: dry });
|
||||
await rsync(['-aP', '--delete', trailingSlash(prev), trailingSlash(pend)], { dryRun: dry });
|
||||
|
||||
// ── Phase 3: rsync SOURCE → PEND (remote changes) ───────────
|
||||
// ── Phase 3: rsync SOURCE → PEND, capture change list ───────
|
||||
await setPhase(deltas, state, PHASES.RSYNC_REMOTE, dry);
|
||||
console.log('\n── rsync SOURCE → PEND ──');
|
||||
await spawn('rsync', ['-aP', trailingSlash(source), pend], { dryRun: dry });
|
||||
|
||||
// ── Phase 4: Generate delta ──────────────────────────────────
|
||||
const output = await rsync(
|
||||
['-aP', '--itemize-changes', '--delete', trailingSlash(source), trailingSlash(pend)],
|
||||
{ dryRun: dry, capture: true },
|
||||
);
|
||||
const changes = dry ? [] : parseItemize(output);
|
||||
if (!dry) {
|
||||
console.log(` ${changes.length} file(s) changed`);
|
||||
for (const c of changes) console.log(` [${c.status}] ${c.path}`);
|
||||
} else {
|
||||
console.log(' [dry-run] change list determined at runtime');
|
||||
}
|
||||
|
||||
// ── Phase 4: Generate per-file deltas into DELTAS/tmp/N/files/
|
||||
await setPhase(deltas, state, PHASES.GENERATING, dry);
|
||||
console.log('\n── Generate delta ──');
|
||||
// TODO: walk PREV and PEND, diff per file, build manifest
|
||||
|
||||
// ── Phase 5: Commit delta ────────────────────────────────────
|
||||
const tmpDir = join(deltas, 'tmp', String(seq));
|
||||
const filesDir = join(tmpDir, 'files');
|
||||
const tarFile = join(tmpDir, 'delta.tar');
|
||||
const bundleFile = join(tmpDir, 'delta.tar.zst');
|
||||
|
||||
if (!dry) {
|
||||
await mkdir(filesDir, { recursive: true });
|
||||
} else {
|
||||
console.log(`[dry-run] mkdir -p ${filesDir}`);
|
||||
}
|
||||
|
||||
const manifestChanges = [];
|
||||
let fileIndex = 0;
|
||||
|
||||
for (const change of changes) {
|
||||
if (change.status === 'deleted') {
|
||||
manifestChanges.push({ path: change.path, status: 'deleted' });
|
||||
continue;
|
||||
}
|
||||
|
||||
const deltaFilename = `${fileIndex}${backend.ext}`;
|
||||
const outFile = join(filesDir, deltaFilename);
|
||||
const prevFile = join(prev, change.path);
|
||||
const newFile = join(pend, change.path);
|
||||
|
||||
console.log(` [${change.status}] ${change.path}`);
|
||||
|
||||
if (!dry) {
|
||||
await backend.createDelta(
|
||||
change.status === 'modified' ? prevFile : null,
|
||||
newFile,
|
||||
outFile,
|
||||
);
|
||||
} else {
|
||||
console.log(`[dry-run] ${change.status === 'modified'
|
||||
? `zstd --patch-from ${prevFile} ${newFile} -o ${outFile}`
|
||||
: `zstd ${newFile} -o ${outFile}`}`);
|
||||
}
|
||||
|
||||
manifestChanges.push({
|
||||
path: change.path,
|
||||
status: change.status,
|
||||
delta: deltaFilename,
|
||||
});
|
||||
|
||||
fileIndex++;
|
||||
}
|
||||
|
||||
// ── Bundle: tar files/ → delta.tar → delta.tar.zst ──────────
|
||||
console.log('\n── Bundle deltas ──');
|
||||
// tar with -C so paths inside the archive are relative (just filenames)
|
||||
await spawn('tar', ['cf', tarFile, '-C', filesDir, '.'], { dryRun: dry });
|
||||
await spawn('zstd', [tarFile, '-o', bundleFile, '-f'], { dryRun: dry });
|
||||
if (!dry) {
|
||||
await rm(filesDir, { recursive: true });
|
||||
await rm(tarFile);
|
||||
} else {
|
||||
console.log(`[dry-run] rm -rf ${filesDir} ${tarFile}`);
|
||||
}
|
||||
|
||||
// ── Phase 5: Write manifest + atomic commit ──────────────────
|
||||
await setPhase(deltas, state, PHASES.COMMITTING, dry);
|
||||
console.log('\n── Commit delta ──');
|
||||
// TODO: atomic rename DELTAS/tmp/N → DELTAS/N
|
||||
|
||||
const manifest = {
|
||||
seq,
|
||||
timestamp: new Date().toISOString(),
|
||||
prev_seq: state.last_complete,
|
||||
backend: backendName,
|
||||
bundle: 'delta.tar.zst',
|
||||
changes: manifestChanges,
|
||||
};
|
||||
|
||||
const seqDir = join(deltas, String(seq));
|
||||
|
||||
if (!dry) {
|
||||
await writeFile(join(tmpDir, 'manifest.json'), JSON.stringify(manifest, null, 2) + '\n');
|
||||
// Atomic rename: tmp/N → N
|
||||
await rename(tmpDir, seqDir);
|
||||
console.log(` Committed to ${seqDir}`);
|
||||
} else {
|
||||
console.log(`[dry-run] write manifest to ${tmpDir}/manifest.json`);
|
||||
console.log(`[dry-run] rename ${tmpDir} → ${seqDir}`);
|
||||
}
|
||||
|
||||
// ── Phase 6: Promote PEND → PREV ────────────────────────────
|
||||
await setPhase(deltas, state, PHASES.PROMOTING, dry);
|
||||
console.log('\n── Promote PEND → PREV ──');
|
||||
// TODO: mv PEND PREV (swap)
|
||||
await rsync(['-aP', '--delete', trailingSlash(pend), trailingSlash(prev)], { dryRun: dry });
|
||||
|
||||
// ── Done ─────────────────────────────────────────────────────
|
||||
state.last_complete = seq;
|
||||
@@ -63,7 +151,7 @@ export async function runCommand(config) {
|
||||
state.phase = PHASES.IDLE;
|
||||
if (!dry) await writeState(deltas, state);
|
||||
|
||||
console.log(`\nRun complete — seq ${seq} committed.`);
|
||||
console.log(`\nRun complete — seq ${seq} committed. ${manifestChanges.length} file(s) in delta.`);
|
||||
}
|
||||
|
||||
async function setPhase(deltas, state, phase, dry) {
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* CLI args always win. Required paths are validated here.
|
||||
*/
|
||||
import { readFile } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
|
||||
const REQUIRED_PATHS = ['source', 'prev', 'pend', 'deltas'];
|
||||
const DEFAULTS = {
|
||||
@@ -25,6 +26,13 @@ export async function loadConfig(args) {
|
||||
// CLI args override file config, file config overrides defaults
|
||||
const config = { ...DEFAULTS, ...fileConfig, ...filterDefined(args) };
|
||||
|
||||
// Expand --base into --prev/--pend/--deltas, explicit flags take priority
|
||||
if (config.base) {
|
||||
config.prev ??= join(config.base, 'previous');
|
||||
config.pend ??= join(config.base, 'pending');
|
||||
config.deltas ??= join(config.base, 'deltas');
|
||||
}
|
||||
|
||||
// Guard: refuse to run if any required path is missing
|
||||
if (config.command === 'run') {
|
||||
const missing = REQUIRED_PATHS.filter(k => !config[k]);
|
||||
|
||||
56
lib/itemize.js
Normal file
56
lib/itemize.js
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Parse rsync --itemize-changes output into a structured change list.
|
||||
*
|
||||
* rsync itemize format: 11-character code + space + path
|
||||
*
|
||||
* Code structure: YXcstpoguax
|
||||
* Y = update type: > (transfer), * (message/delete), c (local change), . (no update), h (hard link)
|
||||
* X = file type: f (file), d (dir), L (symlink), D (device), S (special)
|
||||
* remaining chars = what changed (size, time, perms, etc.) or '+++++++++' for new
|
||||
*
|
||||
* We care about:
|
||||
* >f... = file transferred (new or modified)
|
||||
* *deleting = file deleted
|
||||
* cd... = directory (ignored for delta purposes)
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {{ status: 'added'|'modified'|'deleted', path: string }} Change
|
||||
*/
|
||||
|
||||
/**
|
||||
* Parse rsync --itemize-changes stdout into a list of file changes.
|
||||
* @param {string} output
|
||||
* @returns {Change[]}
|
||||
*/
|
||||
export function parseItemize(output) {
|
||||
const changes = [];
|
||||
|
||||
for (const raw of output.split('\n')) {
|
||||
const line = raw.trimEnd();
|
||||
if (!line) continue;
|
||||
|
||||
// Deleted files: "*deleting path/to/file"
|
||||
if (line.startsWith('*deleting ')) {
|
||||
const path = line.slice('*deleting '.length).trimStart();
|
||||
// Skip directory deletions (trailing slash)
|
||||
if (!path.endsWith('/')) {
|
||||
changes.push({ status: 'deleted', path });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// File transfers: ">f......... path" (new or modified)
|
||||
if (line.length > 12 && line[0] === '>' && line[1] === 'f') {
|
||||
const code = line.slice(0, 11);
|
||||
const path = line.slice(12);
|
||||
const isNew = code.slice(2) === '+++++++++';
|
||||
changes.push({ status: isNew ? 'added' : 'modified', path });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Everything else (dirs, symlinks, attribute-only changes) — ignore
|
||||
}
|
||||
|
||||
return changes;
|
||||
}
|
||||
71
lib/spawn.js
71
lib/spawn.js
@@ -4,17 +4,14 @@
|
||||
import { spawn } from 'child_process';
|
||||
|
||||
/**
|
||||
* Spawn a process and stream its output.
|
||||
* Spawn a process and stream its output to stdout/stderr.
|
||||
* @param {string} cmd
|
||||
* @param {string[]} args
|
||||
* @param {{ dryRun?: boolean, label?: string }} opts
|
||||
* @param {{ dryRun?: boolean }} opts
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
export async function run(cmd, args, { dryRun = false, label } = {}) {
|
||||
const display = [cmd, ...args].join(' ');
|
||||
if (label) console.log(`[${label}] ${display}`);
|
||||
else console.log(`$ ${display}`);
|
||||
|
||||
export async function run(cmd, args, { dryRun = false } = {}) {
|
||||
console.log(`$ ${[cmd, ...args].join(' ')}`);
|
||||
if (dryRun) return;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -26,3 +23,63 @@ export async function run(cmd, args, { dryRun = false, label } = {}) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a process and capture stdout as a string.
|
||||
* stderr is inherited (shown to user). Never used in dry-run context.
|
||||
* @param {string} cmd
|
||||
* @param {string[]} args
|
||||
* @param {{ allowedExitCodes?: number[] }} opts
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
export async function capture(cmd, args, { allowedExitCodes = [0] } = {}) {
|
||||
console.log(`$ ${[cmd, ...args].join(' ')}`);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(cmd, args, { stdio: ['inherit', 'pipe', 'inherit'] });
|
||||
const chunks = [];
|
||||
child.stdout.on('data', chunk => chunks.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', code => {
|
||||
if (allowedExitCodes.includes(code)) resolve(Buffer.concat(chunks).toString('utf8'));
|
||||
else reject(new Error(`${cmd} exited with code ${code}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// rsync exit codes that are not errors
|
||||
const RSYNC_OK_CODES = [
|
||||
0, // success
|
||||
24, // partial transfer: source files vanished mid-run (acceptable)
|
||||
];
|
||||
|
||||
const RSYNC_ERROR_CODES = {
|
||||
23: 'partial transfer due to error',
|
||||
};
|
||||
|
||||
/**
|
||||
* Run rsync with exit code awareness.
|
||||
* @param {string[]} args
|
||||
* @param {{ dryRun?: boolean, capture?: boolean }} opts
|
||||
* @returns {Promise<void | string>}
|
||||
*/
|
||||
export async function rsync(args, { dryRun = false, capture: doCapture = false } = {}) {
|
||||
console.log(`$ rsync ${args.join(' ')}`);
|
||||
if (dryRun) return doCapture ? '' : undefined;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const stdio = doCapture ? ['inherit', 'pipe', 'inherit'] : 'inherit';
|
||||
const child = spawn('rsync', args, { stdio });
|
||||
const chunks = [];
|
||||
if (doCapture) child.stdout.on('data', chunk => chunks.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', code => {
|
||||
if (RSYNC_OK_CODES.includes(code)) {
|
||||
resolve(doCapture ? Buffer.concat(chunks).toString('utf8') : undefined);
|
||||
} else {
|
||||
const reason = RSYNC_ERROR_CODES[code] ?? `unknown error`;
|
||||
reject(new Error(`rsync exited with code ${code}: ${reason}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user