photos-diff.ts

master
Grief 2025-10-13 17:31:41 +01:00
parent 47761dbaa7
commit 6010b5a24c
1 changed files with 547 additions and 0 deletions

547
bin/photos-diff.ts 100755
View File

@ -0,0 +1,547 @@
#!/usr/bin/env node
import { execSync } from 'child_process';
import { createHash } from 'crypto';
import { readFileSync, statSync, writeFileSync } from 'fs';
import { parseArgs } from 'util';
import Redis from 'iovalkey';
const CACHE_TTL = 3600; // 1 hour
const CACHE_PREFIX_ANDROID = 'photo-sync:android:';
const CACHE_PREFIX_LOCAL = 'photo-sync:local:';
interface FileInfo {
path: string;
size: number;
hash?: string;
}
interface Results {
matched: number;
missingInBackup: FileInfo[];
missingOnPhone: FileInfo[];
duplicatesOnPhone: Record<string, string[]>;
duplicatesInBackup: Record<string, string[]>;
}
function parseArguments() {
const { values } = parseArgs({
options: {
local: { type: 'string' },
android: { type: 'string' },
},
});
if (!values.local || !values.android) {
console.error('Usage: ./photo-backup-checker.ts --local DIR --android DIR');
process.exit(1);
}
return { localDir: values.local, androidDir: values.android };
}
async function getLocalFiles(dir: string, redis: Redis): Promise<FileInfo[]> {
console.log(`\n📁 Scanning local directory: ${dir}`);
const output = execSync(`find "${dir}" -type f`, { encoding: 'utf-8', maxBuffer: 100 * 1024 * 1024 });
const files = output.trim().split('\n').filter(Boolean);
const fileInfos: FileInfo[] = [];
const startTime = Date.now();
for (let i = 0; i < files.length; i++) {
const file = files[i];
const cacheKey = CACHE_PREFIX_LOCAL + file;
try {
// Check cache first
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
fileInfos.push({ path: file, size: data.size, hash: data.hash });
} else {
// Get from filesystem
const stat = statSync(file);
const fileInfo: FileInfo = { path: file, size: stat.size };
fileInfos.push(fileInfo);
// Cache it
await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: stat.size }));
}
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = files.length - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r📁 Progress: ${processed}/${files.length} files | ETA: ${etaStr} `);
} catch (err) {
console.error(`\n❌ Error reading local file: ${file}`);
throw err;
}
}
console.log(`\n✅ Found ${fileInfos.length} local files`);
return fileInfos;
}
async function getAndroidFiles(dir: string, redis: Redis): Promise<FileInfo[]> {
console.log(`\n📱 Scanning Android directory: ${dir}`);
const output = execSync(`adb shell "find '${dir}' -type f 2>/dev/null"`, {
encoding: 'utf-8',
maxBuffer: 50 * 1024 * 1024
});
const files = output.trim().split('\n').filter(Boolean).map(f => f.trim()).filter(Boolean);
console.log(`📊 Getting file sizes for ${files.length} files...`);
const fileInfos: FileInfo[] = [];
const startTime = Date.now();
const BATCH_SIZE = 50;
let processed = 0;
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE);
// Check cache first for this batch
const cachedResults: FileInfo[] = [];
const needFetch: string[] = [];
for (const file of batch) {
const cacheKey = CACHE_PREFIX_ANDROID + file;
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
cachedResults.push({ path: file, size: data.size, hash: data.hash });
} else {
needFetch.push(file);
}
}
fileInfos.push(...cachedResults);
processed += cachedResults.length;
// Fetch uncached files in batch
if (needFetch.length > 0) {
try {
// Build shell script to get all sizes in one adb call
const script = needFetch.map(f => `stat -c '%s' '${f}' 2>/dev/null || echo "ERROR"`).join('; ');
const statOutput = execSync(`adb shell "${script}"`, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 });
const sizes = statOutput.trim().split('\n');
if (sizes.length !== needFetch.length) {
console.error(`\n❌ Batch size mismatch: expected ${needFetch.length}, got ${sizes.length}`);
throw new Error('Batch stat failed');
}
for (let j = 0; j < needFetch.length; j++) {
const file = needFetch[j];
const sizeStr = sizes[j].trim();
if (sizeStr === 'ERROR' || sizeStr === '') {
console.error(`\n❌ Failed to get size for: ${file}`);
continue;
}
const size = parseInt(sizeStr, 10);
if (isNaN(size)) {
console.error(`\n❌ Invalid size for Android file: ${file} (got: ${sizeStr})`);
continue;
}
const fileInfo: FileInfo = { path: file, size };
fileInfos.push(fileInfo);
// Cache it
await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size }));
}
processed += needFetch.length;
} catch (err) {
console.error(`\n❌ Error in batch stat, falling back to individual calls`);
// Fallback to individual calls for this batch
for (const file of needFetch) {
try {
const stat = execSync(`adb shell "stat -c '%s' '${file}'"`, { encoding: 'utf-8' });
const size = parseInt(stat.trim(), 10);
if (!isNaN(size)) {
const fileInfo: FileInfo = { path: file, size };
fileInfos.push(fileInfo);
await redis.setex(CACHE_PREFIX_ANDROID + file, CACHE_TTL, JSON.stringify({ size }));
}
} catch (e) {
console.error(`\n❌ Failed to stat: ${file}`);
}
processed++;
}
}
}
// Progress and ETA
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = files.length - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r📊 Progress: ${processed}/${files.length} files | ETA: ${etaStr} `);
}
console.log(`\n✅ Found ${fileInfos.length} Android files`);
return fileInfos;
}
function sha256Local(path: string): string {
const hash = createHash('sha256');
const data = readFileSync(path);
hash.update(data);
return hash.digest('hex');
}
function sha256Android(path: string): string {
const output = execSync(`adb shell "sha256sum '${path}'"`, { encoding: 'utf-8' });
const hash = output.trim().split(/\s+/)[0];
if (!hash || hash.length !== 64) {
throw new Error(`Invalid hash from Android: ${output}`);
}
return hash;
}
function groupBySize(files: FileInfo[]): Map<number, FileInfo[]> {
const groups = new Map<number, FileInfo[]>();
for (const file of files) {
const group = groups.get(file.size) || [];
group.push(file);
groups.set(file.size, group);
}
return groups;
}
async function calculateHashes(
files: FileInfo[],
source: 'local' | 'android',
redis: Redis,
cachePrefix: string
): Promise<void> {
const totalFiles = files.length;
if (totalFiles === 0) return;
console.log(`\n🔐 Computing hashes for ${totalFiles} ${source} files...`);
const startTime = Date.now();
if (source === 'android') {
// Batch processing for Android
const BATCH_SIZE = 20;
let processed = 0;
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE);
// Check cache first
const needHash: FileInfo[] = [];
for (const file of batch) {
const cacheKey = cachePrefix + file.path;
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
if (data.hash) {
file.hash = data.hash;
processed++;
continue;
}
}
needHash.push(file);
}
// Hash uncached files in batch
if (needHash.length > 0) {
try {
// Build batch sha256sum command
const paths = needHash.map(f => `'${f.path}'`).join(' ');
const hashOutput = execSync(`adb shell "sha256sum ${paths} 2>/dev/null"`, {
encoding: 'utf-8',
maxBuffer: 10 * 1024 * 1024
});
const lines = hashOutput.trim().split('\n');
for (let j = 0; j < needHash.length && j < lines.length; j++) {
const line = lines[j].trim();
const parts = line.split(/\s+/);
const hash = parts[0];
if (hash && hash.length === 64) {
needHash[j].hash = hash;
await redis.setex(
cachePrefix + needHash[j].path,
CACHE_TTL,
JSON.stringify({ size: needHash[j].size, hash })
);
}
}
processed += needHash.length;
} catch (err) {
console.error(`\n❌ Batch hashing failed, falling back to individual hashing`);
// Fallback to individual hashing
for (const file of needHash) {
try {
const output = execSync(`adb shell "sha256sum '${file.path}'"`, { encoding: 'utf-8' });
const hash = output.trim().split(/\s+/)[0];
if (hash && hash.length === 64) {
file.hash = hash;
await redis.setex(
cachePrefix + file.path,
CACHE_TTL,
JSON.stringify({ size: file.size, hash })
);
}
} catch (e) {
console.error(`\n❌ Failed to hash: ${file.path}`);
}
processed++;
}
}
}
// Progress and ETA
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `);
}
} else {
// Local files - keep sequential for now
for (let i = 0; i < files.length; i++) {
const file = files[i];
const cacheKey = cachePrefix + file.path;
try {
// Check if hash is already in cache
const cached = await redis.get(cacheKey);
if (cached) {
const data = JSON.parse(cached);
if (data.hash) {
file.hash = data.hash;
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} (cached) `);
continue;
}
}
// Compute hash
file.hash = sha256Local(file.path);
// Update cache with hash
await redis.setex(cacheKey, CACHE_TTL, JSON.stringify({ size: file.size, hash: file.hash }));
} catch (err) {
console.error(`\n❌ Error hashing ${source} file: ${file.path}`);
throw err;
}
// Progress and ETA
const processed = i + 1;
const elapsed = Date.now() - startTime;
const avgTime = elapsed / processed;
const remaining = totalFiles - processed;
const eta = Math.round((avgTime * remaining) / 1000);
const etaStr = eta > 60
? `${Math.floor(eta / 60)}m ${eta % 60}s`
: `${eta}s`;
process.stdout.write(`\r🔐 Progress: ${processed}/${totalFiles} files | ETA: ${etaStr} `);
}
}
console.log('\n✅ Hashing complete');
}
function findDuplicates(files: FileInfo[]): Record<string, string[]> {
const hashMap = new Map<string, string[]>();
for (const file of files) {
if (!file.hash) continue;
const paths = hashMap.get(file.hash) || [];
paths.push(file.path);
hashMap.set(file.hash, paths);
}
const duplicates: Record<string, string[]> = {};
for (const [hash, paths] of hashMap.entries()) {
if (paths.length > 1) {
duplicates[hash] = paths;
}
}
return duplicates;
}
async function main() {
const { localDir, androidDir } = parseArguments();
console.log('🚀 Starting backup verification...');
console.log('🔌 Connecting to Redis...');
const redis = new Redis();
redis.on('error', (err) => {
console.error('❌ Redis connection error:', err);
process.exit(1);
});
try {
// Step 1: Collect file lists
const localFiles = await getLocalFiles(localDir, redis);
const androidFiles = await getAndroidFiles(androidDir, redis);
// Step 2: Group by size
console.log('\n📊 Grouping files by size...');
const localBySize = groupBySize(localFiles);
const androidBySize = groupBySize(androidFiles);
// Step 3: Determine which files need hashing
const localNeedHash: FileInfo[] = [];
const androidNeedHash: FileInfo[] = [];
for (const [size, localGroup] of localBySize.entries()) {
const androidGroup = androidBySize.get(size);
if (androidGroup) {
// Need to hash all files that have matches by size
localNeedHash.push(...localGroup);
androidNeedHash.push(...androidGroup);
}
}
// Also need to hash Android files that don't have local matches
for (const [size, androidGroup] of androidBySize.entries()) {
if (!localBySize.has(size)) {
androidNeedHash.push(...androidGroup);
}
}
console.log(`🔐 ${localNeedHash.length} local + ${androidNeedHash.length} Android files need hashing`);
// Step 4: Calculate hashes
await calculateHashes(localNeedHash, 'local', redis, CACHE_PREFIX_LOCAL);
await calculateHashes(androidNeedHash, 'android', redis, CACHE_PREFIX_ANDROID);
// Step 5: Build hash maps
const localHashes = new Map<string, FileInfo[]>();
const androidHashes = new Map<string, FileInfo[]>();
for (const file of localFiles) {
if (!file.hash) continue;
const group = localHashes.get(file.hash) || [];
group.push(file);
localHashes.set(file.hash, group);
}
for (const file of androidFiles) {
if (!file.hash) continue;
const group = androidHashes.get(file.hash) || [];
group.push(file);
androidHashes.set(file.hash, group);
}
// Step 6: Find differences
console.log('\n🔍 Comparing files...');
const missingInBackup: FileInfo[] = [];
const missingOnPhone: FileInfo[] = [];
let matched = 0;
// Files on Android but not in backup
for (const [hash, androidGroup] of androidHashes.entries()) {
if (!localHashes.has(hash)) {
missingInBackup.push(...androidGroup);
} else {
matched += androidGroup.length;
}
}
// Files in backup but not on Android
for (const [hash, localGroup] of localHashes.entries()) {
if (!androidHashes.has(hash)) {
missingOnPhone.push(...localGroup);
}
}
// Step 7: Find duplicates
const duplicatesOnPhone = findDuplicates(androidFiles);
const duplicatesInBackup = findDuplicates(localFiles);
// Step 8: Output results
const results: Results = {
matched,
missingInBackup,
missingOnPhone,
duplicatesOnPhone,
duplicatesInBackup,
};
console.log('\n' + '='.repeat(60));
console.log('📊 RESULTS');
console.log('='.repeat(60));
console.log(`✅ Matched files: ${results.matched}`);
console.log(`❌ Missing in backup: ${results.missingInBackup.length}`);
console.log(` Missing on phone: ${results.missingOnPhone.length}`);
console.log(`🔄 Duplicates on phone: ${Object.keys(results.duplicatesOnPhone).length} hashes`);
console.log(`🔄 Duplicates in backup: ${Object.keys(results.duplicatesInBackup).length} hashes`);
console.log('='.repeat(60));
console.log('\n💾 Writing results to results.json...');
writeFileSync('results.json', JSON.stringify(results, null, 2));
console.log('✅ Done! Check results.json for details.');
if (results.missingInBackup.length > 0) {
console.log('\n⚠ WARNING: Some files are missing in backup!');
await redis.quit();
process.exit(1);
}
await redis.quit();
} catch (err) {
await redis.quit();
throw err;
}
}
main();