CoCalc -- util.ts

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/sync-fs/lib/util.ts
¹⁴⁹⁶ views
1
import { readdir, rm, writeFile } from "fs/promises";
2
import { dirname, join } from "path";
3
import { exists } from "@cocalc/backend/misc/async-utils-node";
4
import { compressFrame } from "lz4-napi";
5
import { executeCode } from "@cocalc/backend/execute-code";
6

7
import getLogger from "@cocalc/backend/logger";
8
const log = getLogger("sync-fs:util").debug;
9

10
export async function exec(command: string, args?: string[], options?) {
11
  log("exec", command, "...", args?.slice(-15)?.join(" "), options);
12
  return await executeCode({ command, args, ...options });
13
}
14

15
// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,
16
// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo
17
// is not ignored.
18
export async function metadataFile({
19
  path,
20
  exclude,
21
}: {
22
  path: string;
23
  exclude: string[];
24
}): Promise<string> {
25
  log("mtimeDirTree", path, exclude);
26
  if (!(await exists(path))) {
27
    return "";
28
  }
29
  // Recursively get enough metadata about all non-hidden top level path trees
30
  // (this is VASTLY more efficient
31
  // than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)
32
  // Notes about the find output option to printf:
33
  // - We use null characters as separators because they are the ONLY character
34
  //   that isn't allowed in a filename (besides '/')! Filenames can have newlines
35
  //   in them!
36
  //   BUT -- we are assuming filenames can be encoded as utf8; if not, sync will
37
  //   obviously not work.
38
  // - The find output contains more than just what is needed for mtimeDirTree; it contains
39
  //   everything needed by websocketfs for doing stat, i.e., this output is used
40
  //   for the metadataFile functionality of websocketfs.
41
  // - Just a little fact -- output from find is NOT sorted in any guaranteed way.
42
  // Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.
43
  const topPaths = (await readdir(path)).filter(
44
    (p) => !p.startsWith(".") && !exclude.includes(p),
45
  );
46
  const { stdout } = await exec(
47
    "find",
48
    topPaths.concat([
49
      // This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible
50
      // way to sync them, and a user (not root) might not be able to fix this.  See
51
      // https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#25234419
52
      "-not",
53
      "-readable",
54
      "-prune",
55
      "-o",
56
      ...findExclude(exclude),
57
      "-printf",
58
      "%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",
59
    ]),
60
    {
61
      cwd: path,
62
    },
63
  );
64
  return stdout;
65
}
66

67
// Compute the map from paths to their integral mtime for the entire directory tree
68
// NOTE: this could also be done with the walkdir library, but using find
69
// is several times faster in general. This is *the* bottleneck, and the
70
// subprocess IO isn't much, so calling find as a subprocess is the right
71
// solution!  This is not a hack at all.
72
// IMPORTANT: top level hidden subdirectories in path are always ignored
73
export async function mtimeDirTree({
74
  path,
75
  exclude,
76
  metadataFile,
77
}: {
78
  path: string;
79
  exclude: string[];
80
  metadataFile?: string;
81
}): Promise<{ [path: string]: number }> {
82
  log("mtimeDirTree", path, exclude);
83
  if (!(await exists(path))) {
84
    return {};
85
  }
86
  // If the string metadataFile is passed in (as output from metadataFile), then we use that
87
  // If it isn't, then we compute just what is needed here.
88
  if (metadataFile == null) {
89
    const topPaths = (await readdir(path)).filter(
90
      (p) => !p.startsWith(".") && !exclude.includes(p),
91
    );
92
    const args = topPaths.concat([
93
      "-not", // '-not -readable -prune -o' - see comment in metadataFile
94
      "-readable",
95
      "-prune",
96
      "-o",
97
      ...findExclude(exclude),
98
      "-printf",
99
      "%p\\0%T@\\0\\0",
100
    ]);
101
    const { stdout } = await exec("find", [...args], {
102
      cwd: path,
103
    });
104
    metadataFile = stdout;
105
  }
106
  const c: { [path: string]: number } = {};
107
  const v = metadataFile.split("\0\0");
108
  for (const record of v) {
109
    if (!record) continue; // trailing blank line of file
110
    const [path, meta] = record.split("\0");
111
    if (path.startsWith(".")) {
112
      // never include top level hidden paths, if they are there for some reason.
113
      continue;
114
    }
115
    // NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,
116
    // so this is right, since we will use tar for sending files.
117
    c["./" + path] = parseInt(meta.split(" ")[0]);
118
  }
119
  return c;
120
}
121

122
function findExclude(exclude: string[]): string[] {
123
  const v: string[] = [];
124
  // We run "find *", not "find .", so no need to exclude hidden files here.
125
  // Also, doing it here instead of with "find *" is massively slower in general!
126
  for (const path of exclude) {
127
    v.push("-not");
128
    v.push("-path");
129
    v.push(path);
130
    v.push("-not");
131
    v.push("-path");
132
    v.push(`${path}/*`);
133
  }
134
  return v;
135
}
136

137
export async function remove(paths: string[], rel?: string) {
138
  if (!rel) {
139
    throw Error("rel must be defined");
140
  }
141
  // TODO/guess -- by sorting we remove files in directory, then containing directory (?).
142
  for (const path of paths.sort().reverse()) {
143
    try {
144
      await rm(join(rel, path), { recursive: true });
145
    } catch (err) {
146
      log(`WARNING: issue removing '${path}' -- ${err}`);
147
    }
148
  }
149
}
150

151
// Write a utf8 string to file with lz4 compression.
152
// We are not using streaming because lz4-napi doesn't
153
// support streams: https://github.com/antoniomuso/lz4-napi/issues/429
154
// But our actual use of this is for files that are fairly small,
155
// which got sent via an api call.
156
export async function writeFileLz4(path: string, contents: string) {
157
  // lz4-napi has no docs, but compressFrame works to create a file
158
  // that the lz4 command can decompress, but "compress" does not.
159
  const compressed = await compressFrame(Buffer.from(contents));
160
  await writeFile(path, compressed);
161
}
162

163
/*
164
Given an array paths of relative paths (relative to my HOME directory),
165
the function parseCommonPrefixes outputs an array of objects
166

167
{prefix:string; paths:string[]}
168

169
where prefix is a common path prefix of all the paths, and paths is what
170
is after that prefix.  Thus if the output is x, then
171

172
join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...
173

174
is exactly the original input string[] paths.
175
*/
176

177
export function parseCommonPrefixes(
178
  paths: string[],
179
): { prefix: string; paths: string[] }[] {
180
  // This function will slice the sorted path list into groups of paths having
181
  // the same prefix, create an object that contains the prefix and the rest of the
182
  // path for each group, and collect these objects into the result array. The rest
183
  // of the path is created by slicing the common prefix from the absolute path and
184
  // prepending '.' to get the relative path.
185

186
  // sort the paths to group common prefixes
187
  const sortedPaths = paths.slice().sort();
188
  const result: { prefix: string; paths: string[] }[] = [];
189

190
  let i = 0;
191
  while (i < sortedPaths.length) {
192
    const commonPrefix = dirname(sortedPaths[i]);
193
    let j = i + 1;
194

195
    // count the same prefixes
196
    while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {
197
      j++;
198
    }
199

200
    // slice the paths with the same prefix and remove the prefix
201
    const subPaths = sortedPaths
202
      .slice(i, j)
203
      .map((p) => "." + p.slice(commonPrefix.length));
204

205
    result.push({ prefix: commonPrefix, paths: subPaths });
206

207
    i = j;
208
  }
209

210
  return result;
211
}
212

213
Product

Resources

Company