scripts/bin/photos-diff.ts

570 lines
17 KiB
JavaScript
Executable File
Raw Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

#!/usr/bin/env node
import { execSync, spawn } from 'child_process';
import { createHash } from 'crypto';
import { readFileSync, statSync, writeFileSync } from 'fs';
import { parseArgs } from 'util';
import Redis from 'iovalkey';
import { createInterface } from 'readline';
const CACHE_TTL = 3600; // 1 hour
const CACHE_PREFIX_ANDROID = 'photo-sync:android:';
const CACHE_PREFIX_LOCAL = 'photo-sync:local:';
interface FileInfo {
path: string;
size: number;
hash?: string;
}
interface Results {
matched: number;
missingInBackup: FileInfo[];
duplicatesOnPhone: Record<string, string[]>;
duplicatesInBackup: Record<string, string[]>;
}
function parseArguments() {
const { values } = parseArgs({
options: {
local: { type: 'string' },
android: { type: 'string' },
},
});
if (!values.local || !values.android) {
console.error('Usage: ./photo-backup-checker.ts --local DIR --android DIR');
process.exit(1);
}
return { localDir: values.local, androidDir: values.android };
}
async function getLocalFiles(dir: string, redis: Redis): Promise<FileInfo[]> {
console.log(`\n📁 Scanning local directory: ${dir}`);
// Stream files instead of buffering
const files: string[] = [];
const find = spawn('find', [dir, '-type', 'f']);
const rl = createInterface({ input: find.stdout });
for await (const line of rl) {
if (line.trim()) {
files.push(line.trim());
}
}
await new Promise((resolve, reject) => {
find.on('close', resolve);
find.on('error', reject);
});
console.log(`📊 Found ${files.length} files, getting sizes...`);
const fileInfos: FileInfo[] = [];
const startTime = Date.now();
for (let i = 0; i < files.length; i++) {
const file = files[i];
const cacheKey = CACHE_PREFIX_LOCAL + file;
try {
// Check cache first
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
fileInfos.push({ path: file, size: data.size, hash: data.hash });
} else {
// Get from filesystem
const stat = statSync(file);
const fileInfo: FileInfo = { path: file, size: stat.size };
fileInfos.push(fileInfo);
// Cache it
await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: stat.size }));
}
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = files.length - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r📁 Progress: ${processed}/${files.length} files | ETA: ${etaStr} `);
} catch (err) {
console.error(`\n❌ Error reading local file: ${file}`);
throw err;
}
}
console.log(`\n✅ Found ${fileInfos.length} local files`);
return fileInfos;
}
async function getAndroidFiles(dir: string, redis: Redis): Promise<FileInfo[]> {
console.log(`\n📱 Scanning Android directory: ${dir}`);
// Stream files instead of buffering
const files: string[] = [];
const adb = spawn('adb', ['shell', `find '${dir}' -type f 2>/dev/null`]);
const rl = createInterface({ input: adb.stdout });
for await (const line of rl) {
const trimmed = line.trim();
if (trimmed) {
files.push(trimmed);
}
}
await new Promise((resolve, reject) => {
adb.on('close', resolve);
adb.on('error', reject);
});
console.log(`📊 Getting file sizes for ${files.length} files...`);
const fileInfos: FileInfo[] = [];
const startTime = Date.now();
const BATCH_SIZE = 50;
let processed = 0;
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE);
// Check cache first for this batch
const cachedResults: FileInfo[] = [];
const needFetch: string[] = [];
for (const file of batch) {
const cacheKey = CACHE_PREFIX_ANDROID + file;
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
cachedResults.push({ path: file, size: data.size, hash: data.hash });
} else {
needFetch.push(file);
}
}
fileInfos.push(...cachedResults);
processed += cachedResults.length;
// Fetch uncached files in batch
if (needFetch.length > 0) {
try {
// Build shell script to get all sizes in one adb call
const script = needFetch.map(f => `stat -c '%s' '${f}' 2>/dev/null || echo "ERROR"`).join('; ');
const statOutput = execSync(`adb shell "${script}"`, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 });
const sizes = statOutput.trim().split('\n');
if (sizes.length !== needFetch.length) {
console.error(`\n❌ Batch size mismatch: expected ${needFetch.length}, got ${sizes.length}`);
throw new Error('Batch stat failed');
}
for (let j = 0; j < needFetch.length; j++) {
const file = needFetch[j];
const sizeStr = sizes[j].trim();
if (sizeStr === 'ERROR' || sizeStr === '') {
console.error(`\n❌ Failed to get size for: ${file}`);
continue;
}
const size = parseInt(sizeStr, 10);
if (isNaN(size)) {
console.error(`\n❌ Invalid size for Android file: ${file} (got: ${sizeStr})`);
continue;
}
const fileInfo: FileInfo = { path: file, size };
fileInfos.push(fileInfo);
// Cache it
await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size }));
}
processed += needFetch.length;
} catch (err) {
console.error(`\n❌ Error in batch stat, falling back to individual calls`);
// Fallback to individual calls for this batch
for (const file of needFetch) {
try {
const stat = execSync(`adb shell "stat -c '%s' '${file}'"`, { encoding: 'utf-8' });
const size = parseInt(stat.trim(), 10);
if (!isNaN(size)) {
const fileInfo: FileInfo = { path: file, size };
fileInfos.push(fileInfo);
await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size }));
}
} catch (e) {
console.error(`\n❌ Failed to stat: ${file}`);
}
processed++;
}
}
}
// Progress and ETA
if (processed > 0) {
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = files.length - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r📊 Progress: ${processed}/${files.length} files | ETA: ${etaStr} `);
}
}
console.log(`\n✅ Found ${fileInfos.length} Android files`);
return fileInfos;
}
function sha256Local(path: string): string {
const hash = createHash('sha256');
const data = readFileSync(path);
hash.update(data);
return hash.digest('hex');
}
function sha256Android(path: string): string {
const output = execSync(`adb shell "sha256sum '${path}'"`, { encoding: 'utf-8' });
const hash = output.trim().split(/\s+/)[0];
if (!hash || hash.length !== 64) {
throw new Error(`Invalid hash from Android: ${output}`);
}
return hash;
}
function groupBySize(files: FileInfo[]): Map<number, FileInfo[]> {
const groups = new Map<number, FileInfo[]>();
for (const file of files) {
const group = groups.get(file.size) || [];
group.push(file);
groups.set(file.size, group);
}
return groups;
}
async function calculateHashes(
files: FileInfo[],
source: 'local' | 'android',
redis: Redis,
cachePrefix: string
): Promise<void> {
const totalFiles = files.length;
if (totalFiles === 0) return;
console.log(`\n🔐 Computing hashes for ${totalFiles} ${source} files...`);
const startTime = Date.now();
if (source === 'android') {
// Batch processing for Android
const BATCH_SIZE = 20;
let processed = 0;
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE);
// Check cache first
const needHash: FileInfo[] = [];
for (const file of batch) {
const cacheKey = cachePrefix + file.path;
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
if (data.hash) {
file.hash = data.hash;
processed++;
continue;
}
}
needHash.push(file);
}
// Hash uncached files in batch
if (needHash.length > 0) {
try {
// Build batch sha256sum command
const paths = needHash.map(f => `'${f.path}'`).join(' ');
const hashOutput = execSync(`adb shell "sha256sum ${paths} 2>/dev/null"`, {
encoding: 'utf-8',
maxBuffer: 10 * 1024 * 1024
});
const lines = hashOutput.trim().split('\n');
for (let j = 0; j < needHash.length && j < lines.length; j++) {
const line = lines[j].trim();
const parts = line.split(/\s+/);
const hash = parts[0];
if (hash && hash.length === 64) {
needHash[j].hash = hash;
await redis.setex(
cachePrefix + needHash[j].path,
CACHE_TTL,
JSON.stringify({ size: needHash[j].size, hash })
);
}
}
processed += needHash.length;
} catch (err) {
console.error(`\n❌ Batch hashing failed, falling back to individual hashing`);
// Fallback to individual hashing
for (const file of needHash) {
try {
const output = execSync(`adb shell "sha256sum '${file.path}'"`, { encoding: 'utf-8' });
const hash = output.trim().split(/\s+/)[0];
if (hash && hash.length === 64) {
file.hash = hash;
await redis.setex(
cachePrefix + file.path,
CACHE_TTL,
JSON.stringify({ size: file.size, hash })
);
}
} catch (e) {
console.error(`\n❌ Failed to hash: ${file.path}`);
}
processed++;
}
}
}
// Progress and ETA
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `);
}
} else {
// Local files - keep sequential for now
for (let i = 0; i < files.length; i++) {
const file = files[i];
const cacheKey = cachePrefix + file.path;
try {
// Check if hash is already in cache
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
if (data.hash) {
file.hash = data.hash;
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} (cached) `);
continue;
}
}
// Compute hash
file.hash = sha256Local(file.path);
// Update cache with hash
await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: file.size, hash: file.hash }));
} catch (err) {
console.error(`\n❌ Error hashing ${source} file: ${file.path}`);
throw err;
}
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `);
}
}
console.log('\n✅ Hashing complete');
}
function findDuplicates(files: FileInfo[]): Record<string, string[]> {
const hashMap = new Map<string, string[]>();
for (const file of files) {
if (!file.hash) continue;
const paths = hashMap.get(file.hash) || [];
paths.push(file.path);
hashMap.set(file.hash, paths);
}
const duplicates: Record<string, string[]> = {};
for (const [hash, paths] of hashMap.entries()) {
if (paths.length > 1) {
duplicates[hash] = paths;
}
}
return duplicates;
}
async function main() {
const { localDir, androidDir } = parseArguments();
console.log('🚀 Starting backup verification...');
console.log('🔌 Connecting to Redis...');
const redis = new Redis();
redis.on('error', (err) => {
console.error('❌ Redis connection error:', err);
process.exit(1);
});
try {
// Step 1: Collect file lists
const localFiles = await getLocalFiles(localDir, redis);
const androidFiles = await getAndroidFiles(androidDir, redis);
// Step 2: Group by size
console.log('\n📊 Grouping files by size...');
const localBySize = groupBySize(localFiles);
const androidBySize = groupBySize(androidFiles);
// Step 3: Determine which files need hashing
const localNeedHash: FileInfo[] = [];
const androidNeedHash: FileInfo[] = [];
for (const [size, localGroup] of localBySize.entries()) {
const androidGroup = androidBySize.get(size);
if (androidGroup) {
// Need to hash all files that have matches by size
localNeedHash.push(...localGroup);
androidNeedHash.push(...androidGroup);
}
}
// Also need to hash Android files that don't have local matches
for (const [size, androidGroup] of androidBySize.entries()) {
if (!localBySize.has(size)) {
androidNeedHash.push(...androidGroup);
}
}
console.log(`🔐 ${localNeedHash.length} local + ${androidNeedHash.length} Android files need hashing`);
// Step 4: Calculate hashes
await calculateHashes(localNeedHash, 'local', redis, CACHE_PREFIX_LOCAL);
await calculateHashes(androidNeedHash, 'android', redis, CACHE_PREFIX_ANDROID);
// Step 5: Build hash maps
const localHashes = new Map<string, FileInfo[]>();
const androidHashes = new Map<string, FileInfo[]>();
for (const file of localFiles) {
if (!file.hash) continue;
const group = localHashes.get(file.hash) || [];
group.push(file);
localHashes.set(file.hash, group);
}
for (const file of androidFiles) {
if (!file.hash) continue;
const group = androidHashes.get(file.hash) || [];
group.push(file);
androidHashes.set(file.hash, group);
}
// Step 6: Find differences
console.log('\n🔍 Comparing files...');
const missingInBackup: FileInfo[] = [];
let matched = 0;
// Files on Android but not in backup
for (const [hash, androidGroup] of androidHashes.entries()) {
if (!localHashes.has(hash)) {
missingInBackup.push(...androidGroup);
} else {
matched += androidGroup.length;
}
}
// Step 7: Find duplicates
const duplicatesOnPhone = findDuplicates(androidFiles);
const duplicatesInBackup = findDuplicates(localFiles);
// Step 8: Output results
const results: Results = {
matched,
missingInBackup,
duplicatesOnPhone,
duplicatesInBackup,
};
console.log('\n' + '='.repeat(60));
console.log('📊 RESULTS');
console.log('='.repeat(60));
console.log(`📱 Total files on phone: ${androidFiles.length}`);
console.log(`✅ Matched in backup: ${results.matched}`);
console.log(`❌ MISSING in backup: ${results.missingInBackup.length}`);
if (results.missingInBackup.length > 0) {
console.log(`\n Missing files:`);
results.missingInBackup.forEach(f => console.log(` - ${f.path} (${f.size} bytes)`));
}
console.log(`\n🔄 Duplicates on phone: ${Object.keys(results.duplicatesOnPhone).length} groups`);
console.log(`🔄 Duplicates in backup: ${Object.keys(results.duplicatesInBackup).length} groups`);
console.log('='.repeat(60));
console.log('\n💾 Writing results to results.json...');
writeFileSync('results.json', JSON.stringify(results, null, 2));
console.log('✅ Done! Check results.json for details.');
if (results.missingInBackup.length > 0) {
console.log('\n⚠ WARNING: Some files are missing in backup!');
await redis.quit();
process.exit(1);
}
await redis.quit();
} catch (err) {
await redis.quit();
throw err;
}
}
main();