From 6010b5a24c3718a3a2687aeb6ebbbd381a5b8797 Mon Sep 17 00:00:00 2001 From: Grief Date: Mon, 13 Oct 2025 17:31:41 +0100 Subject: [PATCH] photos-diff.ts --- bin/photos-diff.ts | 547 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 547 insertions(+) create mode 100755 bin/photos-diff.ts diff --git a/bin/photos-diff.ts b/bin/photos-diff.ts new file mode 100755 index 0000000..a7ebad3 --- /dev/null +++ b/bin/photos-diff.ts @@ -0,0 +1,547 @@ +#!/usr/bin/env node + +import { execSync } from 'child_process'; +import { createHash } from 'crypto'; +import { readFileSync, statSync, writeFileSync } from 'fs'; +import { parseArgs } from 'util'; +import Redis from 'iovalkey'; + +const CACHE_TTL = 3600; // 1 hour +const CACHE_PREFIX_ANDROID = 'photo-sync:android:'; +const CACHE_PREFIX_LOCAL = 'photo-sync:local:'; + +interface FileInfo { + path: string; + size: number; + hash?: string; +} + +interface Results { + matched: number; + missingInBackup: FileInfo[]; + missingOnPhone: FileInfo[]; + duplicatesOnPhone: Record; + duplicatesInBackup: Record; +} + +function parseArguments() { + const { values } = parseArgs({ + options: { + local: { type: 'string' }, + android: { type: 'string' }, + }, + }); + + if (!values.local || !values.android) { + console.error('Usage: ./photo-backup-checker.ts --local DIR --android DIR'); + process.exit(1); + } + + return { localDir: values.local, androidDir: values.android }; +} + +async function getLocalFiles(dir: string, redis: Redis): Promise { + console.log(`\nšŸ“ Scanning local directory: ${dir}`); + + const output = execSync(`find "${dir}" -type f`, { encoding: 'utf-8', maxBuffer: 100 * 1024 * 1024 }); + const files = output.trim().split('\n').filter(Boolean); + + const fileInfos: FileInfo[] = []; + const startTime = Date.now(); + + for (let i = 0; i < files.length; i++) { + const file = files[i]; + const cacheKey = CACHE_PREFIX_LOCAL + file; + + try { + // Check cache first + const cached = await redis.get(cacheKey); + if (cached) { + const data = JSON.parse(cached); + fileInfos.push({ path: file, size: data.size, hash: data.hash }); + } else { + // Get from filesystem + const stat = statSync(file); + const fileInfo: FileInfo = { path: file, size: stat.size }; + fileInfos.push(fileInfo); + + // Cache it + await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: stat.size })); + } + + // Progress and ETA + const processed = i + 1; + const elapsed = Date.now() - startTime; + const avgTime = elapsed / processed; + const remaining = files.length - processed; + const eta = Math.round((avgTime * remaining) / 1000); + + const etaStr = eta > 60 + ? `${Math.floor(eta / 60)}m ${eta % 60}s` + : `${eta}s`; + + process.stdout.write(`\ršŸ“ Progress: ${processed}/${files.length} files | ETA: ${etaStr} `); + } catch (err) { + console.error(`\nāŒ Error reading local file: ${file}`); + throw err; + } + } + + console.log(`\nāœ… Found ${fileInfos.length} local files`); + return fileInfos; +} + +async function getAndroidFiles(dir: string, redis: Redis): Promise { + console.log(`\nšŸ“± Scanning Android directory: ${dir}`); + + const output = execSync(`adb shell "find '${dir}' -type f 2>/dev/null"`, { + encoding: 'utf-8', + maxBuffer: 50 * 1024 * 1024 + }); + + const files = output.trim().split('\n').filter(Boolean).map(f => f.trim()).filter(Boolean); + + console.log(`šŸ“Š Getting file sizes for ${files.length} files...`); + + const fileInfos: FileInfo[] = []; + const startTime = Date.now(); + + const BATCH_SIZE = 50; + let processed = 0; + + for (let i = 0; i < files.length; i += BATCH_SIZE) { + const batch = files.slice(i, i + BATCH_SIZE); + + // Check cache first for this batch + const cachedResults: FileInfo[] = []; + const needFetch: string[] = []; + + for (const file of batch) { + const cacheKey = CACHE_PREFIX_ANDROID + file; + const cached = await redis.get(cacheKey); + + if (cached) { + const data = JSON.parse(cached); + cachedResults.push({ path: file, size: data.size, hash: data.hash }); + } else { + needFetch.push(file); + } + } + + fileInfos.push(...cachedResults); + processed += cachedResults.length; + + // Fetch uncached files in batch + if (needFetch.length > 0) { + try { + // Build shell script to get all sizes in one adb call + const script = needFetch.map(f => `stat -c '%s' '${f}' 2>/dev/null || echo "ERROR"`).join('; '); + const statOutput = execSync(`adb shell "${script}"`, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }); + const sizes = statOutput.trim().split('\n'); + + if (sizes.length !== needFetch.length) { + console.error(`\nāŒ Batch size mismatch: expected ${needFetch.length}, got ${sizes.length}`); + throw new Error('Batch stat failed'); + } + + for (let j = 0; j < needFetch.length; j++) { + const file = needFetch[j]; + const sizeStr = sizes[j].trim(); + + if (sizeStr === 'ERROR' || sizeStr === '') { + console.error(`\nāŒ Failed to get size for: ${file}`); + continue; + } + + const size = parseInt(sizeStr, 10); + + if (isNaN(size)) { + console.error(`\nāŒ Invalid size for Android file: ${file} (got: ${sizeStr})`); + continue; + } + + const fileInfo: FileInfo = { path: file, size }; + fileInfos.push(fileInfo); + + // Cache it + await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size })); + } + + processed += needFetch.length; + } catch (err) { + console.error(`\nāŒ Error in batch stat, falling back to individual calls`); + // Fallback to individual calls for this batch + for (const file of needFetch) { + try { + const stat = execSync(`adb shell "stat -c '%s' '${file}'"`, { encoding: 'utf-8' }); + const size = parseInt(stat.trim(), 10); + + if (!isNaN(size)) { + const fileInfo: FileInfo = { path: file, size }; + fileInfos.push(fileInfo); + await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size })); + } + } catch (e) { + console.error(`\nāŒ Failed to stat: ${file}`); + } + processed++; + } + } + } + + // Progress and ETA + const elapsed = Date.now() - startTime; + const avgTime = elapsed / processed; + const remaining = files.length - processed; + const eta = Math.round((avgTime * remaining) / 1000); + + const etaStr = eta > 60 + ? `${Math.floor(eta / 60)}m ${eta % 60}s` + : `${eta}s`; + + process.stdout.write(`\ršŸ“Š Progress: ${processed}/${files.length} files | ETA: ${etaStr} `); + } + + console.log(`\nāœ… Found ${fileInfos.length} Android files`); + return fileInfos; +} + +function sha256Local(path: string): string { + const hash = createHash('sha256'); + const data = readFileSync(path); + hash.update(data); + return hash.digest('hex'); +} + +function sha256Android(path: string): string { + const output = execSync(`adb shell "sha256sum '${path}'"`, { encoding: 'utf-8' }); + const hash = output.trim().split(/\s+/)[0]; + if (!hash || hash.length !== 64) { + throw new Error(`Invalid hash from Android: ${output}`); + } + return hash; +} + +function groupBySize(files: FileInfo[]): Map { + const groups = new Map(); + for (const file of files) { + const group = groups.get(file.size) || []; + group.push(file); + groups.set(file.size, group); + } + return groups; +} + +async function calculateHashes( + files: FileInfo[], + source: 'local' | 'android', + redis: Redis, + cachePrefix: string +): Promise { + const totalFiles = files.length; + if (totalFiles === 0) return; + + console.log(`\nšŸ” Computing hashes for ${totalFiles} ${source} files...`); + + const startTime = Date.now(); + + if (source === 'android') { + // Batch processing for Android + const BATCH_SIZE = 20; + let processed = 0; + + for (let i = 0; i < files.length; i += BATCH_SIZE) { + const batch = files.slice(i, i + BATCH_SIZE); + + // Check cache first + const needHash: FileInfo[] = []; + + for (const file of batch) { + const cacheKey = cachePrefix + file.path; + const cached = await redis.get(cacheKey); + + if (cached) { + const data = JSON.parse(cached); + if (data.hash) { + file.hash = data.hash; + processed++; + continue; + } + } + needHash.push(file); + } + + // Hash uncached files in batch + if (needHash.length > 0) { + try { + // Build batch sha256sum command + const paths = needHash.map(f => `'${f.path}'`).join(' '); + const hashOutput = execSync(`adb shell "sha256sum ${paths} 2>/dev/null"`, { + encoding: 'utf-8', + maxBuffer: 10 * 1024 * 1024 + }); + + const lines = hashOutput.trim().split('\n'); + + for (let j = 0; j < needHash.length && j < lines.length; j++) { + const line = lines[j].trim(); + const parts = line.split(/\s+/); + const hash = parts[0]; + + if (hash && hash.length === 64) { + needHash[j].hash = hash; + await redis.setex( + cachePrefix + needHash[j].path, + CACHE_TTL, + JSON.stringify({ size: needHash[j].size, hash }) + ); + } + } + + processed += needHash.length; + } catch (err) { + console.error(`\nāŒ Batch hashing failed, falling back to individual hashing`); + // Fallback to individual hashing + for (const file of needHash) { + try { + const output = execSync(`adb shell "sha256sum '${file.path}'"`, { encoding: 'utf-8' }); + const hash = output.trim().split(/\s+/)[0]; + if (hash && hash.length === 64) { + file.hash = hash; + await redis.setex( + cachePrefix + file.path, + CACHE_TTL, + JSON.stringify({ size: file.size, hash }) + ); + } + } catch (e) { + console.error(`\nāŒ Failed to hash: ${file.path}`); + } + processed++; + } + } + } + + // Progress and ETA + const elapsed = Date.now() - startTime; + const avgTime = elapsed / processed; + const remaining = totalFiles - processed; + const eta = Math.round((avgTime * remaining) / 1000); + + const etaStr = eta > 60 + ? `${Math.floor(eta / 60)}m ${eta % 60}s` + : `${eta}s`; + + process.stdout.write(`\ršŸ” Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `); + } + } else { + // Local files - keep sequential for now + for (let i = 0; i < files.length; i++) { + const file = files[i]; + const cacheKey = cachePrefix + file.path; + + try { + // Check if hash is already in cache + const cached = await redis.get(cacheKey); + if (cached) { + const data = JSON.parse(cached); + if (data.hash) { + file.hash = data.hash; + + // Progress and ETA + const processed = i + 1; + const elapsed = Date.now() - startTime; + const avgTime = elapsed / processed; + const remaining = totalFiles - processed; + const eta = Math.round((avgTime * remaining) / 1000); + + const etaStr = eta > 60 + ? `${Math.floor(eta / 60)}m ${eta % 60}s` + : `${eta}s`; + + process.stdout.write(`\ršŸ” Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} (cached) `); + continue; + } + } + + // Compute hash + file.hash = sha256Local(file.path); + + // Update cache with hash + await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: file.size, hash: file.hash })); + + } catch (err) { + console.error(`\nāŒ Error hashing ${source} file: ${file.path}`); + throw err; + } + + // Progress and ETA + const processed = i + 1; + const elapsed = Date.now() - startTime; + const avgTime = elapsed / processed; + const remaining = totalFiles - processed; + const eta = Math.round((avgTime * remaining) / 1000); + + const etaStr = eta > 60 + ? `${Math.floor(eta / 60)}m ${eta % 60}s` + : `${eta}s`; + + process.stdout.write(`\ršŸ” Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `); + } + } + + console.log('\nāœ… Hashing complete'); +} + +function findDuplicates(files: FileInfo[]): Record { + const hashMap = new Map(); + + for (const file of files) { + if (!file.hash) continue; + const paths = hashMap.get(file.hash) || []; + paths.push(file.path); + hashMap.set(file.hash, paths); + } + + const duplicates: Record = {}; + for (const [hash, paths] of hashMap.entries()) { + if (paths.length > 1) { + duplicates[hash] = paths; + } + } + + return duplicates; +} + +async function main() { + const { localDir, androidDir } = parseArguments(); + + console.log('šŸš€ Starting backup verification...'); + console.log('šŸ”Œ Connecting to Redis...'); + + const redis = new Redis(); + + redis.on('error', (err) => { + console.error('āŒ Redis connection error:', err); + process.exit(1); + }); + + try { + // Step 1: Collect file lists + const localFiles = await getLocalFiles(localDir, redis); + const androidFiles = await getAndroidFiles(androidDir, redis); + + // Step 2: Group by size + console.log('\nšŸ“Š Grouping files by size...'); + const localBySize = groupBySize(localFiles); + const androidBySize = groupBySize(androidFiles); + + // Step 3: Determine which files need hashing + const localNeedHash: FileInfo[] = []; + const androidNeedHash: FileInfo[] = []; + + for (const [size, localGroup] of localBySize.entries()) { + const androidGroup = androidBySize.get(size); + + if (androidGroup) { + // Need to hash all files that have matches by size + localNeedHash.push(...localGroup); + androidNeedHash.push(...androidGroup); + } + } + + // Also need to hash Android files that don't have local matches + for (const [size, androidGroup] of androidBySize.entries()) { + if (!localBySize.has(size)) { + androidNeedHash.push(...androidGroup); + } + } + + console.log(`šŸ” ${localNeedHash.length} local + ${androidNeedHash.length} Android files need hashing`); + + // Step 4: Calculate hashes + await calculateHashes(localNeedHash, 'local', redis, CACHE_PREFIX_LOCAL); + await calculateHashes(androidNeedHash, 'android', redis, CACHE_PREFIX_ANDROID); + + // Step 5: Build hash maps + const localHashes = new Map(); + const androidHashes = new Map(); + + for (const file of localFiles) { + if (!file.hash) continue; + const group = localHashes.get(file.hash) || []; + group.push(file); + localHashes.set(file.hash, group); + } + + for (const file of androidFiles) { + if (!file.hash) continue; + const group = androidHashes.get(file.hash) || []; + group.push(file); + androidHashes.set(file.hash, group); + } + + // Step 6: Find differences + console.log('\nšŸ” Comparing files...'); + + const missingInBackup: FileInfo[] = []; + const missingOnPhone: FileInfo[] = []; + let matched = 0; + + // Files on Android but not in backup + for (const [hash, androidGroup] of androidHashes.entries()) { + if (!localHashes.has(hash)) { + missingInBackup.push(...androidGroup); + } else { + matched += androidGroup.length; + } + } + + // Files in backup but not on Android + for (const [hash, localGroup] of localHashes.entries()) { + if (!androidHashes.has(hash)) { + missingOnPhone.push(...localGroup); + } + } + + // Step 7: Find duplicates + const duplicatesOnPhone = findDuplicates(androidFiles); + const duplicatesInBackup = findDuplicates(localFiles); + + // Step 8: Output results + const results: Results = { + matched, + missingInBackup, + missingOnPhone, + duplicatesOnPhone, + duplicatesInBackup, + }; + + console.log('\n' + '='.repeat(60)); + console.log('šŸ“Š RESULTS'); + console.log('='.repeat(60)); + console.log(`āœ… Matched files: ${results.matched}`); + console.log(`āŒ Missing in backup: ${results.missingInBackup.length}`); + console.log(`ā„¹ļø Missing on phone: ${results.missingOnPhone.length}`); + console.log(`šŸ”„ Duplicates on phone: ${Object.keys(results.duplicatesOnPhone).length} hashes`); + console.log(`šŸ”„ Duplicates in backup: ${Object.keys(results.duplicatesInBackup).length} hashes`); + console.log('='.repeat(60)); + + console.log('\nšŸ’¾ Writing results to results.json...'); + writeFileSync('results.json', JSON.stringify(results, null, 2)); + console.log('āœ… Done! Check results.json for details.'); + + if (results.missingInBackup.length > 0) { + console.log('\nāš ļø WARNING: Some files are missing in backup!'); + await redis.quit(); + process.exit(1); + } + + await redis.quit(); + } catch (err) { + await redis.quit(); + throw err; + } +} + +main();