Path: blob/master/src/packages/file-server/zfs/archive.ts
1447 views
/*1Archiving and restore filesystems2*/34import { get, set } from "./db";5import { createSnapshot, zfsGetSnapshots } from "./snapshots";6import {7filesystemDataset,8filesystemArchivePath,9filesystemArchiveFilename,10filesystemDatasetTemp,11filesystemMountpoint,12} from "./names";13import { exec } from "./util";14import {15mountFilesystem,16unmountFilesystem,17zfsGetProperties,18} from "./properties";19import { delay } from "awaiting";20import { primaryKey, type PrimaryKey } from "./types";21import { isEqual } from "lodash";2223export async function dearchiveFilesystem(24opts: PrimaryKey & {25// called during dearchive with status updates:26progress?: (status: {27// a number between 0 and 100 indicating progress28progress: number;29// estimated number of seconds remaining30seconds_remaining?: number;31// how much of the total data we have de-archived32read?: number;33// total amount of data to de-archive34total?: number;35}) => void;36},37) {38const start = Date.now();39opts.progress?.({ progress: 0 });40const pk = primaryKey(opts);41const filesystem = get(pk);42if (!filesystem.archived) {43throw Error("filesystem is not archived");44}45const { used_by_dataset, used_by_snapshots } = filesystem;46const total = (used_by_dataset ?? 0) + (used_by_snapshots ?? 0);47const dataset = filesystemDataset(filesystem);48let done = false;49let progress = 0;50if (opts.progress && total > 0) {51(async () => {52const t0 = Date.now();53let lastProgress = 0;54while (!done) {55await delay(750);56let x;57try {58x = await zfsGetProperties(dataset);59} catch {60// this is expected to fail, e.g., if filesystem doesn't exist yet.61}62if (done) {63return;64}65const read = x.used_by_dataset + x.used_by_snapshots;66progress = Math.min(100, Math.round((read * 100) / total));67if (progress == lastProgress) {68continue;69}70lastProgress = progress;71let seconds_remaining: number | undefined = undefined;72if (progress > 0) {73const rate = (Date.now() - t0) / progress;74seconds_remaining = Math.ceil((rate * (100 - progress)) / 1000);75}76opts.progress?.({ progress, seconds_remaining, total, read });77if (progress >= 100) {78break;79}80}81})();82}8384// now we de-archive it:85const stream = filesystemArchiveFilename(filesystem);86await exec({87verbose: true,88// have to use sudo sh -c because zfs recv only supports reading from stdin:89command: `sudo sh -c 'cat ${stream} | zfs recv ${dataset}'`,90what: {91...pk,92desc: "de-archive a filesystem via zfs recv",93},94});95done = true;96if (progress < 100) {97opts.progress?.({98progress: 100,99seconds_remaining: 0,100total,101read: total,102});103}104await mountFilesystem(filesystem);105// mounting worked so remove the archive106await exec({107command: "sudo",108args: ["rm", stream],109what: {110...pk,111desc: "removing the stream during de-archive",112},113});114set({ ...pk, archived: false });115return { milliseconds: Date.now() - start };116}117118export async function archiveFilesystem(fs: PrimaryKey) {119const start = Date.now();120const pk = primaryKey(fs);121const filesystem = get(pk);122if (filesystem.archived) {123throw Error("filesystem is already archived");124}125// create or get most recent snapshot126const snapshot = await createSnapshot({ ...filesystem, ifChanged: true });127// where archive of this filesystem goes:128const archive = filesystemArchivePath(filesystem);129const stream = filesystemArchiveFilename(filesystem);130await exec({131command: "sudo",132args: ["mkdir", "-p", archive],133what: { ...pk, desc: "make archive target directory" },134});135136await mountFilesystem(filesystem);137const find = await hashFileTree({138verbose: true,139path: filesystemMountpoint(filesystem),140what: { ...pk, desc: "getting sha1sum of file listing" },141});142// mountpoint will be used for test below, and also no point in archiving143// if we can't even unmount filesystem144await unmountFilesystem(filesystem);145146// make *full* zfs send147await exec({148verbose: true,149// have to use sudo sh -c because zfs send only supports writing to stdout:150command: `sudo sh -c 'zfs send -e -c -R ${filesystemDataset(filesystem)}@${snapshot} > ${stream}'`,151what: {152...pk,153desc: "zfs send of full filesystem dataset to archive it",154},155});156157// verify that the entire send stream is valid158const temp = filesystemDatasetTemp(filesystem);159try {160await exec({161verbose: true,162// have to use sudo sh -c because zfs send only supports writing to stdout:163command: `sudo sh -c 'cat ${stream} | zfs recv ${temp}'`,164what: {165...pk,166desc: "verify the archive zfs send is valid",167},168});169// inspect the list of all files, and verify that it is identical (has same sha1sum).170// I think this should be not necessary because the above read didn't fail, and there171// are supposed to be checksums. But I also think there are some ways to corrupt a172// stream so it reads in as empty (say), so this will definitely catch that.173const findtest = await hashFileTree({174verbose: true,175path: filesystemMountpoint(filesystem), // same mountpoint due to being part of recv data176what: { ...pk, desc: "getting sha1sum of file listing" },177});178if (findtest != find) {179throw Error(180"files in archived filesystem do not match. Refusing to archive!",181);182}183// Inspect list of snapshots, and verify they are identical as well. This is another184// good consistency check that the stream works.185const snapshots = await zfsGetSnapshots(temp);186if (!isEqual(snapshots, filesystem.snapshots)) {187throw Error(188"snapshots in archived filesystem do not match. Refusing to archive!",189);190}191} finally {192// destroy the temporary filesystem193await exec({194verbose: true,195command: "sudo",196args: ["zfs", "destroy", "-r", temp],197what: {198...pk,199desc: "destroying temporary filesystem dataset used for testing archive stream",200},201});202}203204// destroy dataset205await exec({206verbose: true,207command: "sudo",208args: ["zfs", "destroy", "-r", filesystemDataset(filesystem)],209what: { ...pk, desc: "destroying filesystem dataset" },210});211212// set as archived in database213set({ ...pk, archived: true });214215return { snapshot, milliseconds: Date.now() - start };216}217218// Returns a hash of the file tree. This uses the find command to get path names, but219// doesn't actually read the *contents* of any files, so it's reasonbly fast.220async function hashFileTree({221path,222what,223verbose,224}: {225path: string;226what?;227verbose?;228}): Promise<String> {229const { stdout } = await exec({230verbose,231command: `sudo sh -c 'cd "${path}" && find . -xdev -printf "%p %s %TY-%Tm-%Td %TH:%TM\n" | sha1sum'`,232what,233});234return stdout;235}236237238