From 76e3f8917c283367086a56331dcbb432a4e9faf3 Mon Sep 17 00:00:00 2001 From: Grief Date: Tue, 12 Aug 2025 00:47:54 +0100 Subject: [PATCH] re-written in typescript --- .gitignore | 2 + bin/has-copy | 206 --------------------------------------------- bin/has-copy.ts | 33 ++++++++ bun.lock | 20 +++++ lib/dir-scanner.ts | 131 ++++++++++++++++++++++++++++ package.json | 9 ++ tsconfig.json | 7 ++ 7 files changed, 202 insertions(+), 206 deletions(-) create mode 100644 .gitignore delete mode 100755 bin/has-copy create mode 100755 bin/has-copy.ts create mode 100644 bun.lock create mode 100644 lib/dir-scanner.ts create mode 100644 package.json create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f0fdca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.idea/ +/node_modules/ diff --git a/bin/has-copy b/bin/has-copy deleted file mode 100755 index b54f9c0..0000000 --- a/bin/has-copy +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env node -const {open, readdir, stat, rmdir} = require('node:fs/promises'); -const {join} = require('node:path'); -const {unlink} = require('node:fs/promises'); -const {Buffer} = require('node:buffer'); - - -const Kilobyte = 1024; -const Megabyte = Kilobyte * 1024; -const Gigabyte = Megabyte * 1024; - -const BLOCK_SIZE = 16 * Megabyte; - -const locale = 'en-US'; - -/** @type {Buffer} */ -const BufferA = Buffer.alloc(BLOCK_SIZE); -/** @type {Buffer} */ -const BufferB = Buffer.alloc(BLOCK_SIZE); - - -const Terminal = Object.freeze({ - fg: { - red: (text) => `\u001b[31m${text}\u001b[0m`, - green: (text) => `\u001b[32m${text}\u001b[0m`, - yellow: (text) => `\u001b[33m${text}\u001b[0m`, - blue: (text) => `\u001b[34m${text}\u001b[0m`, - } -}); - -/** - * @typedef {{bySize: {[key: string]: string[]}}} Cache - */ - -/** - * @param {function(typeof Terminal)|string} param - */ -function log(param) { - console.log(typeof param === 'string' ? param : param(Terminal)) -} - -/** - * @param {Number} size - * @returns {*} - */ -function formatSize(size) { - let suffix = ''; - let digits = 0; - if (size >= Megabyte) { - digits = 1; - if (size >= Gigabyte) { - size /= Gigabyte; - suffix = ' G'; - } else { - size /= Megabyte; - suffix = ' M'; - } - } else if (size >= Kilobyte) { - digits = 1; - size /= Kilobyte; - suffix = ' K' - } - return `${size.toLocaleString(locale, { - minimumFractionDigits: digits, - maximumFractionDigits: digits, - useGrouping: false - })}${suffix}` -} - -/** - * @param path - * @returns {Promise<{files: Dirent[], dirs: Dirent[]}>} - */ -async function dirEntries(path) { - return (await readdir(path, {withFileTypes: true})) - .reduce((acc, file) => { - (file.isDirectory() ? acc.dirs : acc.files).push(file) - return acc; - }, {files: [], dirs: []}) -} - -async function compareFiles(localPath, remotePath, size) { - const localFile = await open(localPath), - remoteFile = await open(remotePath); - try { - for (let i = 0; ;) { - const start = process.hrtime.bigint(); - const a = await localFile.read(BufferA, 0, BLOCK_SIZE); - const b = await remoteFile.read(BufferB, 0, BLOCK_SIZE); - - if (a.bytesRead !== b.bytesRead || BufferA.compare(BufferB) !== 0) { - log(f => f.fg.yellow(` files are different`)); - console.log(); - return false; - } - if (a.bytesRead === 0) { - console.log(); - return true; - } - const seconds = Number(process.hrtime.bigint() - start) / 1_000_000_000; - i += a.bytesRead; - process.stdout.write(`\r${(i * 100 / size).toLocaleString(locale, { - minimumFractionDigits: 2, - maximumFractionDigits: 2 - })}% ${formatSize((a.bytesRead + b.bytesRead) / seconds)}Bytes/sec`) - } - console.log('') - - } finally { - Promise.allSettled([localFile.close(), remoteFile.close()]); - } -} - -/** - * @param local - * @param {Cache} cache - * @param options - * @returns {Promise} - */ -async function compareDirs(local, cache, options) { - try { - let canRemoveDir = true; - const entries = await dirEntries(local) - loop: for (const file of entries.files) { - const localPath = join(local, file.name); - const localStat = await stat(localPath); - - const sameSize = cache.bySize[localStat.size]; - if (!sameSize) { - canRemoveDir = false; - continue; - } - - log(f => `${f.fg.green(localPath)} (${f.fg.green(`${formatSize(localStat.size)}B`)})`) - for (let i = 0; i < sameSize.length; i++) { - const remotePath = sameSize[i]; - const remoteStat = await stat(remotePath); - - - if (localStat.ino === remoteStat.ino) { - log(f => f.fg.red('This is the same file')); - } else { - log(f => `Comparing with ${f.fg.blue(remotePath)}`) - if (await compareFiles(localPath, remotePath, localStat.size)) { - if (options.delete) { - log(f => ` ${f.fg.red('files are the same - deleting')}`); - await unlink(localPath) - } - continue loop; - } - } - } - canRemoveDir = false; - } - for (const dir of entries.dirs) { - if (!await compareDirs(join(local, dir.name), cache, options)) canRemoveDir = false; - } - if (canRemoveDir && options.delete) { - log(f => `Directory ${f.fg.yellow(local)} is empty, removed`) - await rmdir(local) - } - return canRemoveDir; - } catch (err) { - console.error(err); - return false; - } -} - -const options = { - delete: false -}; - -if (process.argv[3] === 'delete') { - options.delete = true; -} - -/** - * @param path - * @param result - * @returns {Promise} - */ -async function scanDir(path, result) { - const entries = await dirEntries(path) - for (const file of entries.files) { - let absPath = join(path, file.name); - let {size} = (await stat(absPath)); - let bySize = result.bySize[size]; - if (bySize) { - bySize.push(absPath); - } else { - bySize = [absPath]; - result.bySize[size] = bySize; - } - } - for (const dir of entries.dirs) { - await scanDir(join(path, dir.name), result) - } - return result; -} - -(async function () { - log('Scanning target directory...') - const scan = await scanDir(process.argv[2], {bySize: {}}) - log('Comparing...') - await compareDirs(process.cwd(), scan, options); -})(); diff --git a/bin/has-copy.ts b/bin/has-copy.ts new file mode 100755 index 0000000..f5f79ba --- /dev/null +++ b/bin/has-copy.ts @@ -0,0 +1,33 @@ +#!/usr/bin/env node +import {unlink} from 'fs/promises'; +import {DirScanner} from '../lib/dir-scanner.ts'; +import chalk from 'chalk'; + +const {red, green} = chalk; + +const [node, script, target, param] = process.argv; +if (!target) { + console.error(`Usage ${node} ${script} DIRECTORY`) + process.exit(1) +} + +(async function () { + const localDir = new DirScanner(); + const targetDir = new DirScanner(); + console.log('Scanning directories...') + let local = process.cwd(); + await localDir.addDirectory(local); + await targetDir.addDirectory(target) + console.log('Comparing...'); + const {duplicated, unique} = await localDir.compareTo(targetDir); + + for (let [name, copy] of duplicated) + console.log(`${green('DUPLICATES:')} ${name.slice(local.length)} == ${copy.slice(target.length)}`); + for (let name of unique) + console.log(`${red('UNIQUE:')} ${name.slice(local.length)}`); + console.log(`Duplicated ${duplicated.size}, unique ${unique.length}`) + + if (param === 'delete') + for (let [name] of duplicated) + await unlink(name) +})(); diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..5ddc593 --- /dev/null +++ b/bun.lock @@ -0,0 +1,20 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "dependencies": { + "chalk": "^5.5.0", + }, + "devDependencies": { + "@types/node": "^24.2.1", + }, + }, + }, + "packages": { + "@types/node": ["@types/node@24.2.1", "", { "dependencies": { "undici-types": "~7.10.0" } }, "sha512-DRh5K+ka5eJic8CjH7td8QpYEV6Zo10gfRkjHCO3weqZHWDtAaSTFtl4+VMqOJ4N5jcuhZ9/l+yy8rVgw7BQeQ=="], + + "chalk": ["chalk@5.5.0", "", {}, "sha512-1tm8DTaJhPBG3bIkVeZt1iZM9GfSX2lzOeDVZH9R9ffRHpmHvxZ/QhgQH/aDTkswQVt+YHdXAdS/In/30OjCbg=="], + + "undici-types": ["undici-types@7.10.0", "", {}, "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag=="], + } +} diff --git a/lib/dir-scanner.ts b/lib/dir-scanner.ts new file mode 100644 index 0000000..35c660c --- /dev/null +++ b/lib/dir-scanner.ts @@ -0,0 +1,131 @@ +import {open, readdir, stat} from 'fs/promises'; +import {join} from 'path'; +import type {Dirent} from 'node:fs'; +import {Buffer} from 'buffer'; + + +const locale = 'en-US'; + +const Kilobyte = 1024; +const Megabyte = Kilobyte * 1024; +const Gigabyte = Megabyte * 1024; + +const BLOCK_SIZE = 16 * Megabyte; + +const BufferA: Buffer = Buffer.alloc(BLOCK_SIZE); +const BufferB = Buffer.alloc(BLOCK_SIZE); + + +export function formatSize(size: number) { + let suffix = ''; + let digits = 0; + if (size >= Megabyte) { + digits = 1; + if (size >= Gigabyte) { + size /= Gigabyte; + suffix = ' G'; + } else { + size /= Megabyte; + suffix = ' M'; + } + } else if (size >= Kilobyte) { + digits = 1; + size /= Kilobyte; + suffix = ' K' + } + return `${size.toLocaleString(locale, { + minimumFractionDigits: digits, + maximumFractionDigits: digits, + useGrouping: false + })}${suffix}` +} + + +export async function dirEntries(path: string) { + return (await readdir(path, {withFileTypes: true})) + .reduce((acc, file) => { + (file.isDirectory() ? acc.dirs : acc.files).push(file) + return acc; + }, {files: [] as Dirent[], dirs: [] as Dirent[]}) +} + +export async function compareFiles(localPath: string, remotePath: string, size: number) { + const localFile = await open(localPath), + remoteFile = await open(remotePath); + try { + for (let i = 0; ;) { + const start = process.hrtime.bigint(); + const a = await localFile.read(BufferA, 0, BLOCK_SIZE); + const b = await remoteFile.read(BufferB, 0, BLOCK_SIZE); + + if (a.bytesRead !== b.bytesRead || BufferA.compare(BufferB) !== 0) { + // console.log(yellow(` files are different`)); + // console.log(); + return false; + } + if (a.bytesRead === 0) { + // console.log(); + return true; + } + const seconds = Number(process.hrtime.bigint() - start) / 1_000_000_000; + i += a.bytesRead; + // process.stdout.write(`\r${(i * 100 / size).toLocaleString(locale, { + // minimumFractionDigits: 2, + // maximumFractionDigits: 2 + // })}% ${formatSize((a.bytesRead + b.bytesRead) / seconds)}Bytes/sec`) + } + } finally { + await Promise.allSettled([localFile.close(), remoteFile.close()]); + } +} + +export class DirScanner { + private bySize = new Map(); + + async addDirectory(path: string) { + const entries = await dirEntries(path) + for (const file of entries.files) { + let absPath = join(path, file.name); + let {size} = (await stat(absPath)); + let bySize = this.bySize.get(size); + if (bySize) { + bySize.push(absPath); + } else { + bySize = [absPath]; + this.bySize.set(size, bySize); + } + } + for (const dir of entries.dirs) { + await this.addDirectory(join(path, dir.name)) + } + } + + async compareTo(other: DirScanner) { + const unique: string[] = []; + const duplicated = new Map(); + const result = {unique, duplicated} + let bySize = this.bySize; + for (let size of [...bySize.keys()].sort()) { + for (let name of bySize.get(size)) { + let found = await other.findCopy(name); + if (found) duplicated.set(name, found); + else unique.push(name) + } + } + return result; + } + + private async findCopy(file: string) { + const fileStat = await stat(file); + const sameSize = this.bySize.get(fileStat.size); + if (!sameSize) return false; + + for (let i = 0; i < sameSize.length; i++) { + const existingPath = sameSize[i]; + const existingStat = await stat(existingPath); + if (fileStat.ino === existingStat.ino) continue + if (await compareFiles(file, existingPath, fileStat.size)) return existingPath; + } + return false; + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..c57fec9 --- /dev/null +++ b/package.json @@ -0,0 +1,9 @@ +{ + "type": "module", + "devDependencies": { + "@types/node": "^24.2.1" + }, + "dependencies": { + "chalk": "^5.5.0" + } +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..2e719ca --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,7 @@ +{ + "compilerOptions": { + "allowImportingTsExtensions": true, + "target": "esnext", + "moduleResolution": "nodenext" + } +}