scripts/lib/dir-scanner.ts

132 lines
3.7 KiB
TypeScript

import {open, readdir, stat} from 'fs/promises';
import {join} from 'path';
import type {Dirent} from 'node:fs';
import {Buffer} from 'buffer';
const locale = 'en-US';
const Kilobyte = 1024;
const Megabyte = Kilobyte * 1024;
const Gigabyte = Megabyte * 1024;
const BLOCK_SIZE = 16 * Megabyte;
const BufferA: Buffer = Buffer.alloc(BLOCK_SIZE);
const BufferB = Buffer.alloc(BLOCK_SIZE);
export function formatSize(size: number) {
let suffix = '';
let digits = 0;
if (size >= Megabyte) {
digits = 1;
if (size >= Gigabyte) {
size /= Gigabyte;
suffix = ' G';
} else {
size /= Megabyte;
suffix = ' M';
}
} else if (size >= Kilobyte) {
digits = 1;
size /= Kilobyte;
suffix = ' K'
}
return `${size.toLocaleString(locale, {
minimumFractionDigits: digits,
maximumFractionDigits: digits,
useGrouping: false
})}${suffix}`
}
export async function dirEntries(path: string) {
return (await readdir(path, {withFileTypes: true}))
.reduce((acc, file) => {
(file.isDirectory() ? acc.dirs : acc.files).push(file)
return acc;
}, {files: [] as Dirent[], dirs: [] as Dirent[]})
}
export async function compareFiles(localPath: string, remotePath: string, size: number) {
const localFile = await open(localPath),
remoteFile = await open(remotePath);
try {
for (let i = 0; ;) {
const start = process.hrtime.bigint();
const a = await localFile.read(BufferA, 0, BLOCK_SIZE);
const b = await remoteFile.read(BufferB, 0, BLOCK_SIZE);
if (a.bytesRead !== b.bytesRead || BufferA.compare(BufferB) !== 0) {
// console.log(yellow(` files are different`));
// console.log();
return false;
}
if (a.bytesRead === 0) {
// console.log();
return true;
}
const seconds = Number(process.hrtime.bigint() - start) / 1_000_000_000;
i += a.bytesRead;
// process.stdout.write(`\r${(i * 100 / size).toLocaleString(locale, {
// minimumFractionDigits: 2,
// maximumFractionDigits: 2
// })}% ${formatSize((a.bytesRead + b.bytesRead) / seconds)}Bytes/sec`)
}
} finally {
await Promise.allSettled([localFile.close(), remoteFile.close()]);
}
}
export class DirScanner {
private bySize = new Map<number, string[]>();
async addDirectory(path: string) {
const entries = await dirEntries(path)
for (const file of entries.files) {
let absPath = join(path, file.name);
let {size} = (await stat(absPath));
let bySize = this.bySize.get(size);
if (bySize) {
bySize.push(absPath);
} else {
bySize = [absPath];
this.bySize.set(size, bySize);
}
}
for (const dir of entries.dirs) {
await this.addDirectory(join(path, dir.name))
}
}
async compareTo(other: DirScanner) {
const unique: string[] = [];
const duplicated = new Map<string, string>();
const result = {unique, duplicated}
let bySize = this.bySize;
for (let size of [...bySize.keys()].sort()) {
for (let name of bySize.get(size)) {
let found = await other.findCopy(name);
if (found) duplicated.set(name, found);
else unique.push(name)
}
}
return result;
}
private async findCopy(file: string) {
const fileStat = await stat(file);
const sameSize = this.bySize.get(fileStat.size);
if (!sameSize) return false;
for (let i = 0; i < sameSize.length; i++) {
const existingPath = sameSize[i];
const existingStat = await stat(existingPath);
if (fileStat.ino === existingStat.ino) continue
if (await compareFiles(file, existingPath, fileStat.size)) return existingPath;
}
return false;
}
}