Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/sync-fs/lib/util.ts
1496 views
1
import { readdir, rm, writeFile } from "fs/promises";
2
import { dirname, join } from "path";
3
import { exists } from "@cocalc/backend/misc/async-utils-node";
4
import { compressFrame } from "lz4-napi";
5
import { executeCode } from "@cocalc/backend/execute-code";
6
7
import getLogger from "@cocalc/backend/logger";
8
const log = getLogger("sync-fs:util").debug;
9
10
export async function exec(command: string, args?: string[], options?) {
11
log("exec", command, "...", args?.slice(-15)?.join(" "), options);
12
return await executeCode({ command, args, ...options });
13
}
14
15
// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,
16
// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo
17
// is not ignored.
18
export async function metadataFile({
19
path,
20
exclude,
21
}: {
22
path: string;
23
exclude: string[];
24
}): Promise<string> {
25
log("mtimeDirTree", path, exclude);
26
if (!(await exists(path))) {
27
return "";
28
}
29
// Recursively get enough metadata about all non-hidden top level path trees
30
// (this is VASTLY more efficient
31
// than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)
32
// Notes about the find output option to printf:
33
// - We use null characters as separators because they are the ONLY character
34
// that isn't allowed in a filename (besides '/')! Filenames can have newlines
35
// in them!
36
// BUT -- we are assuming filenames can be encoded as utf8; if not, sync will
37
// obviously not work.
38
// - The find output contains more than just what is needed for mtimeDirTree; it contains
39
// everything needed by websocketfs for doing stat, i.e., this output is used
40
// for the metadataFile functionality of websocketfs.
41
// - Just a little fact -- output from find is NOT sorted in any guaranteed way.
42
// Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.
43
const topPaths = (await readdir(path)).filter(
44
(p) => !p.startsWith(".") && !exclude.includes(p),
45
);
46
const { stdout } = await exec(
47
"find",
48
topPaths.concat([
49
// This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible
50
// way to sync them, and a user (not root) might not be able to fix this. See
51
// https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#25234419
52
"-not",
53
"-readable",
54
"-prune",
55
"-o",
56
...findExclude(exclude),
57
"-printf",
58
"%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",
59
]),
60
{
61
cwd: path,
62
},
63
);
64
return stdout;
65
}
66
67
// Compute the map from paths to their integral mtime for the entire directory tree
68
// NOTE: this could also be done with the walkdir library, but using find
69
// is several times faster in general. This is *the* bottleneck, and the
70
// subprocess IO isn't much, so calling find as a subprocess is the right
71
// solution! This is not a hack at all.
72
// IMPORTANT: top level hidden subdirectories in path are always ignored
73
export async function mtimeDirTree({
74
path,
75
exclude,
76
metadataFile,
77
}: {
78
path: string;
79
exclude: string[];
80
metadataFile?: string;
81
}): Promise<{ [path: string]: number }> {
82
log("mtimeDirTree", path, exclude);
83
if (!(await exists(path))) {
84
return {};
85
}
86
// If the string metadataFile is passed in (as output from metadataFile), then we use that
87
// If it isn't, then we compute just what is needed here.
88
if (metadataFile == null) {
89
const topPaths = (await readdir(path)).filter(
90
(p) => !p.startsWith(".") && !exclude.includes(p),
91
);
92
const args = topPaths.concat([
93
"-not", // '-not -readable -prune -o' - see comment in metadataFile
94
"-readable",
95
"-prune",
96
"-o",
97
...findExclude(exclude),
98
"-printf",
99
"%p\\0%T@\\0\\0",
100
]);
101
const { stdout } = await exec("find", [...args], {
102
cwd: path,
103
});
104
metadataFile = stdout;
105
}
106
const c: { [path: string]: number } = {};
107
const v = metadataFile.split("\0\0");
108
for (const record of v) {
109
if (!record) continue; // trailing blank line of file
110
const [path, meta] = record.split("\0");
111
if (path.startsWith(".")) {
112
// never include top level hidden paths, if they are there for some reason.
113
continue;
114
}
115
// NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,
116
// so this is right, since we will use tar for sending files.
117
c["./" + path] = parseInt(meta.split(" ")[0]);
118
}
119
return c;
120
}
121
122
function findExclude(exclude: string[]): string[] {
123
const v: string[] = [];
124
// We run "find *", not "find .", so no need to exclude hidden files here.
125
// Also, doing it here instead of with "find *" is massively slower in general!
126
for (const path of exclude) {
127
v.push("-not");
128
v.push("-path");
129
v.push(path);
130
v.push("-not");
131
v.push("-path");
132
v.push(`${path}/*`);
133
}
134
return v;
135
}
136
137
export async function remove(paths: string[], rel?: string) {
138
if (!rel) {
139
throw Error("rel must be defined");
140
}
141
// TODO/guess -- by sorting we remove files in directory, then containing directory (?).
142
for (const path of paths.sort().reverse()) {
143
try {
144
await rm(join(rel, path), { recursive: true });
145
} catch (err) {
146
log(`WARNING: issue removing '${path}' -- ${err}`);
147
}
148
}
149
}
150
151
// Write a utf8 string to file with lz4 compression.
152
// We are not using streaming because lz4-napi doesn't
153
// support streams: https://github.com/antoniomuso/lz4-napi/issues/429
154
// But our actual use of this is for files that are fairly small,
155
// which got sent via an api call.
156
export async function writeFileLz4(path: string, contents: string) {
157
// lz4-napi has no docs, but compressFrame works to create a file
158
// that the lz4 command can decompress, but "compress" does not.
159
const compressed = await compressFrame(Buffer.from(contents));
160
await writeFile(path, compressed);
161
}
162
163
/*
164
Given an array paths of relative paths (relative to my HOME directory),
165
the function parseCommonPrefixes outputs an array of objects
166
167
{prefix:string; paths:string[]}
168
169
where prefix is a common path prefix of all the paths, and paths is what
170
is after that prefix. Thus if the output is x, then
171
172
join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...
173
174
is exactly the original input string[] paths.
175
*/
176
177
export function parseCommonPrefixes(
178
paths: string[],
179
): { prefix: string; paths: string[] }[] {
180
// This function will slice the sorted path list into groups of paths having
181
// the same prefix, create an object that contains the prefix and the rest of the
182
// path for each group, and collect these objects into the result array. The rest
183
// of the path is created by slicing the common prefix from the absolute path and
184
// prepending '.' to get the relative path.
185
186
// sort the paths to group common prefixes
187
const sortedPaths = paths.slice().sort();
188
const result: { prefix: string; paths: string[] }[] = [];
189
190
let i = 0;
191
while (i < sortedPaths.length) {
192
const commonPrefix = dirname(sortedPaths[i]);
193
let j = i + 1;
194
195
// count the same prefixes
196
while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {
197
j++;
198
}
199
200
// slice the paths with the same prefix and remove the prefix
201
const subPaths = sortedPaths
202
.slice(i, j)
203
.map((p) => "." + p.slice(commonPrefix.length));
204
205
result.push({ prefix: commonPrefix, paths: subPaths });
206
207
i = j;
208
}
209
210
return result;
211
}
212
213