Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/file-server/zfs/archive.ts
1447 views
1
/*
2
Archiving and restore filesystems
3
*/
4
5
import { get, set } from "./db";
6
import { createSnapshot, zfsGetSnapshots } from "./snapshots";
7
import {
8
filesystemDataset,
9
filesystemArchivePath,
10
filesystemArchiveFilename,
11
filesystemDatasetTemp,
12
filesystemMountpoint,
13
} from "./names";
14
import { exec } from "./util";
15
import {
16
mountFilesystem,
17
unmountFilesystem,
18
zfsGetProperties,
19
} from "./properties";
20
import { delay } from "awaiting";
21
import { primaryKey, type PrimaryKey } from "./types";
22
import { isEqual } from "lodash";
23
24
export async function dearchiveFilesystem(
25
opts: PrimaryKey & {
26
// called during dearchive with status updates:
27
progress?: (status: {
28
// a number between 0 and 100 indicating progress
29
progress: number;
30
// estimated number of seconds remaining
31
seconds_remaining?: number;
32
// how much of the total data we have de-archived
33
read?: number;
34
// total amount of data to de-archive
35
total?: number;
36
}) => void;
37
},
38
) {
39
const start = Date.now();
40
opts.progress?.({ progress: 0 });
41
const pk = primaryKey(opts);
42
const filesystem = get(pk);
43
if (!filesystem.archived) {
44
throw Error("filesystem is not archived");
45
}
46
const { used_by_dataset, used_by_snapshots } = filesystem;
47
const total = (used_by_dataset ?? 0) + (used_by_snapshots ?? 0);
48
const dataset = filesystemDataset(filesystem);
49
let done = false;
50
let progress = 0;
51
if (opts.progress && total > 0) {
52
(async () => {
53
const t0 = Date.now();
54
let lastProgress = 0;
55
while (!done) {
56
await delay(750);
57
let x;
58
try {
59
x = await zfsGetProperties(dataset);
60
} catch {
61
// this is expected to fail, e.g., if filesystem doesn't exist yet.
62
}
63
if (done) {
64
return;
65
}
66
const read = x.used_by_dataset + x.used_by_snapshots;
67
progress = Math.min(100, Math.round((read * 100) / total));
68
if (progress == lastProgress) {
69
continue;
70
}
71
lastProgress = progress;
72
let seconds_remaining: number | undefined = undefined;
73
if (progress > 0) {
74
const rate = (Date.now() - t0) / progress;
75
seconds_remaining = Math.ceil((rate * (100 - progress)) / 1000);
76
}
77
opts.progress?.({ progress, seconds_remaining, total, read });
78
if (progress >= 100) {
79
break;
80
}
81
}
82
})();
83
}
84
85
// now we de-archive it:
86
const stream = filesystemArchiveFilename(filesystem);
87
await exec({
88
verbose: true,
89
// have to use sudo sh -c because zfs recv only supports reading from stdin:
90
command: `sudo sh -c 'cat ${stream} | zfs recv ${dataset}'`,
91
what: {
92
...pk,
93
desc: "de-archive a filesystem via zfs recv",
94
},
95
});
96
done = true;
97
if (progress < 100) {
98
opts.progress?.({
99
progress: 100,
100
seconds_remaining: 0,
101
total,
102
read: total,
103
});
104
}
105
await mountFilesystem(filesystem);
106
// mounting worked so remove the archive
107
await exec({
108
command: "sudo",
109
args: ["rm", stream],
110
what: {
111
...pk,
112
desc: "removing the stream during de-archive",
113
},
114
});
115
set({ ...pk, archived: false });
116
return { milliseconds: Date.now() - start };
117
}
118
119
export async function archiveFilesystem(fs: PrimaryKey) {
120
const start = Date.now();
121
const pk = primaryKey(fs);
122
const filesystem = get(pk);
123
if (filesystem.archived) {
124
throw Error("filesystem is already archived");
125
}
126
// create or get most recent snapshot
127
const snapshot = await createSnapshot({ ...filesystem, ifChanged: true });
128
// where archive of this filesystem goes:
129
const archive = filesystemArchivePath(filesystem);
130
const stream = filesystemArchiveFilename(filesystem);
131
await exec({
132
command: "sudo",
133
args: ["mkdir", "-p", archive],
134
what: { ...pk, desc: "make archive target directory" },
135
});
136
137
await mountFilesystem(filesystem);
138
const find = await hashFileTree({
139
verbose: true,
140
path: filesystemMountpoint(filesystem),
141
what: { ...pk, desc: "getting sha1sum of file listing" },
142
});
143
// mountpoint will be used for test below, and also no point in archiving
144
// if we can't even unmount filesystem
145
await unmountFilesystem(filesystem);
146
147
// make *full* zfs send
148
await exec({
149
verbose: true,
150
// have to use sudo sh -c because zfs send only supports writing to stdout:
151
command: `sudo sh -c 'zfs send -e -c -R ${filesystemDataset(filesystem)}@${snapshot} > ${stream}'`,
152
what: {
153
...pk,
154
desc: "zfs send of full filesystem dataset to archive it",
155
},
156
});
157
158
// verify that the entire send stream is valid
159
const temp = filesystemDatasetTemp(filesystem);
160
try {
161
await exec({
162
verbose: true,
163
// have to use sudo sh -c because zfs send only supports writing to stdout:
164
command: `sudo sh -c 'cat ${stream} | zfs recv ${temp}'`,
165
what: {
166
...pk,
167
desc: "verify the archive zfs send is valid",
168
},
169
});
170
// inspect the list of all files, and verify that it is identical (has same sha1sum).
171
// I think this should be not necessary because the above read didn't fail, and there
172
// are supposed to be checksums. But I also think there are some ways to corrupt a
173
// stream so it reads in as empty (say), so this will definitely catch that.
174
const findtest = await hashFileTree({
175
verbose: true,
176
path: filesystemMountpoint(filesystem), // same mountpoint due to being part of recv data
177
what: { ...pk, desc: "getting sha1sum of file listing" },
178
});
179
if (findtest != find) {
180
throw Error(
181
"files in archived filesystem do not match. Refusing to archive!",
182
);
183
}
184
// Inspect list of snapshots, and verify they are identical as well. This is another
185
// good consistency check that the stream works.
186
const snapshots = await zfsGetSnapshots(temp);
187
if (!isEqual(snapshots, filesystem.snapshots)) {
188
throw Error(
189
"snapshots in archived filesystem do not match. Refusing to archive!",
190
);
191
}
192
} finally {
193
// destroy the temporary filesystem
194
await exec({
195
verbose: true,
196
command: "sudo",
197
args: ["zfs", "destroy", "-r", temp],
198
what: {
199
...pk,
200
desc: "destroying temporary filesystem dataset used for testing archive stream",
201
},
202
});
203
}
204
205
// destroy dataset
206
await exec({
207
verbose: true,
208
command: "sudo",
209
args: ["zfs", "destroy", "-r", filesystemDataset(filesystem)],
210
what: { ...pk, desc: "destroying filesystem dataset" },
211
});
212
213
// set as archived in database
214
set({ ...pk, archived: true });
215
216
return { snapshot, milliseconds: Date.now() - start };
217
}
218
219
// Returns a hash of the file tree. This uses the find command to get path names, but
220
// doesn't actually read the *contents* of any files, so it's reasonbly fast.
221
async function hashFileTree({
222
path,
223
what,
224
verbose,
225
}: {
226
path: string;
227
what?;
228
verbose?;
229
}): Promise<String> {
230
const { stdout } = await exec({
231
verbose,
232
command: `sudo sh -c 'cd "${path}" && find . -xdev -printf "%p %s %TY-%Tm-%Td %TH:%TM\n" | sha1sum'`,
233
what,
234
});
235
return stdout;
236
}
237
238