import {open, readdir, stat} from 'fs/promises'; import {join} from 'path'; import type {Dirent} from 'node:fs'; import {Buffer} from 'buffer'; const locale = 'en-US'; const Kilobyte = 1024; const Megabyte = Kilobyte * 1024; const Gigabyte = Megabyte * 1024; const BLOCK_SIZE = 16 * Megabyte; const BufferA: Buffer = Buffer.alloc(BLOCK_SIZE); const BufferB = Buffer.alloc(BLOCK_SIZE); export function formatSize(size: number) { let suffix = ''; let digits = 0; if (size >= Megabyte) { digits = 1; if (size >= Gigabyte) { size /= Gigabyte; suffix = ' G'; } else { size /= Megabyte; suffix = ' M'; } } else if (size >= Kilobyte) { digits = 1; size /= Kilobyte; suffix = ' K' } return `${size.toLocaleString(locale, { minimumFractionDigits: digits, maximumFractionDigits: digits, useGrouping: false })}${suffix}` } export async function dirEntries(path: string) { return (await readdir(path, {withFileTypes: true})) .reduce((acc, file) => { (file.isDirectory() ? acc.dirs : acc.files).push(file) return acc; }, {files: [] as Dirent[], dirs: [] as Dirent[]}) } export async function compareFiles(localPath: string, remotePath: string, size: number) { const localFile = await open(localPath), remoteFile = await open(remotePath); try { for (let i = 0; ;) { const start = process.hrtime.bigint(); const a = await localFile.read(BufferA, 0, BLOCK_SIZE); const b = await remoteFile.read(BufferB, 0, BLOCK_SIZE); if (a.bytesRead !== b.bytesRead || BufferA.compare(BufferB) !== 0) { // console.log(yellow(` files are different`)); // console.log(); return false; } if (a.bytesRead === 0) { // console.log(); return true; } const seconds = Number(process.hrtime.bigint() - start) / 1_000_000_000; i += a.bytesRead; // process.stdout.write(`\r${(i * 100 / size).toLocaleString(locale, { // minimumFractionDigits: 2, // maximumFractionDigits: 2 // })}% ${formatSize((a.bytesRead + b.bytesRead) / seconds)}Bytes/sec`) } } finally { await Promise.allSettled([localFile.close(), remoteFile.close()]); } } export class DirScanner { private bySize = new Map(); async addDirectory(path: string) { const entries = await dirEntries(path) for (const file of entries.files) { let absPath = join(path, file.name); let {size} = (await stat(absPath)); let bySize = this.bySize.get(size); if (bySize) { bySize.push(absPath); } else { bySize = [absPath]; this.bySize.set(size, bySize); } } for (const dir of entries.dirs) { await this.addDirectory(join(path, dir.name)) } } async compareTo(other: DirScanner) { const unique: string[] = []; const duplicated = new Map(); const result = {unique, duplicated} let bySize = this.bySize; for (let size of [...bySize.keys()].sort()) { for (let name of bySize.get(size)) { let found = await other.findCopy(name); if (found) duplicated.set(name, found); else unique.push(name) } } return result; } private async findCopy(file: string) { const fileStat = await stat(file); const sameSize = this.bySize.get(fileStat.size); if (!sameSize) return false; for (let i = 0; i < sameSize.length; i++) { const existingPath = sameSize[i]; const existingStat = await stat(existingPath); if (fileStat.ino === existingStat.ino) continue if (await compareFiles(file, existingPath, fileStat.size)) return existingPath; } return false; } }