import { readdir, rm, writeFile } from "fs/promises";1import { dirname, join } from "path";2import { exists } from "@cocalc/backend/misc/async-utils-node";3import { compressFrame } from "lz4-napi";4import { executeCode } from "@cocalc/backend/execute-code";56import getLogger from "@cocalc/backend/logger";7const log = getLogger("sync-fs:util").debug;89export async function exec(command: string, args?: string[], options?) {10log("exec", command, "...", args?.slice(-15)?.join(" "), options);11return await executeCode({ command, args, ...options });12}1314// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,15// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo16// is not ignored.17export async function metadataFile({18path,19exclude,20}: {21path: string;22exclude: string[];23}): Promise<string> {24log("mtimeDirTree", path, exclude);25if (!(await exists(path))) {26return "";27}28// Recursively get enough metadata about all non-hidden top level path trees29// (this is VASTLY more efficient30// than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)31// Notes about the find output option to printf:32// - We use null characters as separators because they are the ONLY character33// that isn't allowed in a filename (besides '/')! Filenames can have newlines34// in them!35// BUT -- we are assuming filenames can be encoded as utf8; if not, sync will36// obviously not work.37// - The find output contains more than just what is needed for mtimeDirTree; it contains38// everything needed by websocketfs for doing stat, i.e., this output is used39// for the metadataFile functionality of websocketfs.40// - Just a little fact -- output from find is NOT sorted in any guaranteed way.41// Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.42const topPaths = (await readdir(path)).filter(43(p) => !p.startsWith(".") && !exclude.includes(p),44);45const { stdout } = await exec(46"find",47topPaths.concat([48// This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible49// way to sync them, and a user (not root) might not be able to fix this. See50// https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#2523441951"-not",52"-readable",53"-prune",54"-o",55...findExclude(exclude),56"-printf",57"%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",58]),59{60cwd: path,61},62);63return stdout;64}6566// Compute the map from paths to their integral mtime for the entire directory tree67// NOTE: this could also be done with the walkdir library, but using find68// is several times faster in general. This is *the* bottleneck, and the69// subprocess IO isn't much, so calling find as a subprocess is the right70// solution! This is not a hack at all.71// IMPORTANT: top level hidden subdirectories in path are always ignored72export async function mtimeDirTree({73path,74exclude,75metadataFile,76}: {77path: string;78exclude: string[];79metadataFile?: string;80}): Promise<{ [path: string]: number }> {81log("mtimeDirTree", path, exclude);82if (!(await exists(path))) {83return {};84}85// If the string metadataFile is passed in (as output from metadataFile), then we use that86// If it isn't, then we compute just what is needed here.87if (metadataFile == null) {88const topPaths = (await readdir(path)).filter(89(p) => !p.startsWith(".") && !exclude.includes(p),90);91const args = topPaths.concat([92"-not", // '-not -readable -prune -o' - see comment in metadataFile93"-readable",94"-prune",95"-o",96...findExclude(exclude),97"-printf",98"%p\\0%T@\\0\\0",99]);100const { stdout } = await exec("find", [...args], {101cwd: path,102});103metadataFile = stdout;104}105const c: { [path: string]: number } = {};106const v = metadataFile.split("\0\0");107for (const record of v) {108if (!record) continue; // trailing blank line of file109const [path, meta] = record.split("\0");110if (path.startsWith(".")) {111// never include top level hidden paths, if they are there for some reason.112continue;113}114// NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,115// so this is right, since we will use tar for sending files.116c["./" + path] = parseInt(meta.split(" ")[0]);117}118return c;119}120121function findExclude(exclude: string[]): string[] {122const v: string[] = [];123// We run "find *", not "find .", so no need to exclude hidden files here.124// Also, doing it here instead of with "find *" is massively slower in general!125for (const path of exclude) {126v.push("-not");127v.push("-path");128v.push(path);129v.push("-not");130v.push("-path");131v.push(`${path}/*`);132}133return v;134}135136export async function remove(paths: string[], rel?: string) {137if (!rel) {138throw Error("rel must be defined");139}140// TODO/guess -- by sorting we remove files in directory, then containing directory (?).141for (const path of paths.sort().reverse()) {142try {143await rm(join(rel, path), { recursive: true });144} catch (err) {145log(`WARNING: issue removing '${path}' -- ${err}`);146}147}148}149150// Write a utf8 string to file with lz4 compression.151// We are not using streaming because lz4-napi doesn't152// support streams: https://github.com/antoniomuso/lz4-napi/issues/429153// But our actual use of this is for files that are fairly small,154// which got sent via an api call.155export async function writeFileLz4(path: string, contents: string) {156// lz4-napi has no docs, but compressFrame works to create a file157// that the lz4 command can decompress, but "compress" does not.158const compressed = await compressFrame(Buffer.from(contents));159await writeFile(path, compressed);160}161162/*163Given an array paths of relative paths (relative to my HOME directory),164the function parseCommonPrefixes outputs an array of objects165166{prefix:string; paths:string[]}167168where prefix is a common path prefix of all the paths, and paths is what169is after that prefix. Thus if the output is x, then170171join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...172173is exactly the original input string[] paths.174*/175176export function parseCommonPrefixes(177paths: string[],178): { prefix: string; paths: string[] }[] {179// This function will slice the sorted path list into groups of paths having180// the same prefix, create an object that contains the prefix and the rest of the181// path for each group, and collect these objects into the result array. The rest182// of the path is created by slicing the common prefix from the absolute path and183// prepending '.' to get the relative path.184185// sort the paths to group common prefixes186const sortedPaths = paths.slice().sort();187const result: { prefix: string; paths: string[] }[] = [];188189let i = 0;190while (i < sortedPaths.length) {191const commonPrefix = dirname(sortedPaths[i]);192let j = i + 1;193194// count the same prefixes195while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {196j++;197}198199// slice the paths with the same prefix and remove the prefix200const subPaths = sortedPaths201.slice(i, j)202.map((p) => "." + p.slice(commonPrefix.length));203204result.push({ prefix: commonPrefix, paths: subPaths });205206i = j;207}208209return result;210}211212213