boot(); // Database connection. $connection = \Drupal::database(); $outputDir = __DIR__ . '/wisski-data-integrity-test'; // Get all WissKI tables. function getWisskiTables($connection) { $tables = []; // Query information_schema to get all WissKI tables. $database = $connection->getConnectionOptions(); $dbName = $database['database']; $result = $connection->query("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = :db AND TABLE_NAME LIKE 'wisski%'", [ ':db' => $dbName, ]); while ($row = $result->fetchAssoc()) { $tables[] = $row['TABLE_NAME']; } return $tables; } // Export table data: row count, checksum, sample rows. function exportTableData($connection, $tableName, $outputDir) { $data = [ 'table' => $tableName, 'timestamp' => date('Y-m-d H:i:s'), 'row_count' => 0, 'checksum' => '', 'sample_rows' => [], 'column_info' => [], ]; // Get row count. $countResult = $connection->query("SELECT COUNT(*) as cnt FROM {" . $tableName . "}"); $data['row_count'] = (int) $countResult->fetchField(); if ($data['row_count'] === 0) { $data['checksum'] = 'empty'; return $data; } // Get column information. $columnsResult = $connection->query("SHOW COLUMNS FROM {" . $tableName . "}"); $columns = []; while ($col = $columnsResult->fetchAssoc()) { $columns[] = $col['Field']; } $data['column_info'] = $columns; // Get sample rows (first 10 rows). $sampleResult = $connection->query("SELECT * FROM {" . $tableName . "} LIMIT 10"); $samples = []; while ($row = $sampleResult->fetchAssoc()) { $samples[] = $row; } $data['sample_rows'] = $samples; // Calculate checksum of all data. // For large tables, we'll use a hash of all row data concatenated. // Build checksum query - use backticks for column names. $checksumFields = []; foreach ($columns as $col) { $checksumFields[] = "COALESCE(CAST(`" . $col . "` AS CHAR), '')"; } $orderByCols = array_slice($columns, 0, min(5, count($columns))); $orderBy = implode(', ', array_map(function($col) { return "`" . $col . "`"; }, $orderByCols)); // Use CONCAT_WS for safer concatenation. $concatExpr = "CONCAT_WS('|', " . implode(', ', $checksumFields) . ")"; $checksumQuery = "SELECT MD5(GROUP_CONCAT(" . $concatExpr . " ORDER BY " . $orderBy . " SEPARATOR '|||')) as chksum FROM {" . $tableName . "}"; try { $checksumResult = $connection->query($checksumQuery); $data['checksum'] = $checksumResult->fetchField() ?: 'no_checksum'; } catch (\Exception $e) { // If checksum fails (e.g., GROUP_CONCAT too large), use row count + first row hash. $data['checksum'] = 'partial_' . md5($data['row_count'] . serialize($samples[0] ?? [])); } return $data; } // Export all WissKI tables. function exportAllTables($connection, $outputDir) { if (!is_dir($outputDir)) { mkdir($outputDir, 0755, TRUE); } $tables = getWisskiTables($connection); $allData = [ 'export_timestamp' => date('Y-m-d H:i:s'), 'tables' => [], ]; echo "Exporting data for " . count($tables) . " WissKI tables...\n"; foreach ($tables as $table) { echo " Processing {$table}...\n"; $tableData = exportTableData($connection, $table, $outputDir); $allData['tables'][$table] = $tableData; // Save individual table export. $tableFile = $outputDir . '/' . $table . '.json'; file_put_contents($tableFile, json_encode($tableData, JSON_PRETTY_PRINT)); } // Save summary. $summaryFile = $outputDir . '/before-update-summary.json'; file_put_contents($summaryFile, json_encode($allData, JSON_PRETTY_PRINT)); echo "\nExport complete! Data saved to: {$outputDir}\n"; echo "Summary: {$summaryFile}\n"; return $allData; } // Verify data integrity after update. function verifyDataIntegrity($connection, $outputDir) { $summaryFile = $outputDir . '/before-update-summary.json'; if (!file_exists($summaryFile)) { echo "ERROR: Before-update summary not found at: {$summaryFile}\n"; echo "Please run 'export' first.\n"; return FALSE; } $beforeData = json_decode(file_get_contents($summaryFile), TRUE); $issues = []; $verified = []; echo "Verifying data integrity for " . count($beforeData['tables']) . " tables...\n\n"; foreach ($beforeData['tables'] as $tableName => $beforeTableData) { echo "Checking {$tableName}...\n"; $afterTableData = exportTableData($connection, $tableName, $outputDir); // Check row count. if ($afterTableData['row_count'] !== $beforeTableData['row_count']) { $issues[] = [ 'table' => $tableName, 'issue' => 'row_count_mismatch', 'before' => $beforeTableData['row_count'], 'after' => $afterTableData['row_count'], 'difference' => $afterTableData['row_count'] - $beforeTableData['row_count'], ]; echo " ⚠️ ROW COUNT MISMATCH: Before={$beforeTableData['row_count']}, After={$afterTableData['row_count']}\n"; } else { echo " ✓ Row count matches: {$afterTableData['row_count']}\n"; } // Check checksum. if ($beforeTableData['checksum'] !== 'empty' && $afterTableData['checksum'] !== 'empty') { if ($beforeTableData['checksum'] !== $afterTableData['checksum']) { $issues[] = [ 'table' => $tableName, 'issue' => 'checksum_mismatch', 'before' => $beforeTableData['checksum'], 'after' => $afterTableData['checksum'], ]; echo " ⚠️ CHECKSUM MISMATCH: Data may have changed!\n"; } else { echo " ✓ Checksum matches\n"; } } // Compare sample rows if available. if (!empty($beforeTableData['sample_rows']) && !empty($afterTableData['sample_rows'])) { $sampleMatch = TRUE; $minSamples = min(count($beforeTableData['sample_rows']), count($afterTableData['sample_rows'])); for ($i = 0; $i < $minSamples; $i++) { if ($beforeTableData['sample_rows'][$i] !== $afterTableData['sample_rows'][$i]) { $sampleMatch = FALSE; break; } } if (!$sampleMatch) { $issues[] = [ 'table' => $tableName, 'issue' => 'sample_data_mismatch', ]; echo " ⚠️ Sample data differs!\n"; } else { echo " ✓ Sample data matches\n"; } } if (empty(array_filter($issues, function($issue) use ($tableName) { return $issue['table'] === $tableName; }))) { $verified[] = $tableName; } echo "\n"; } // Save verification report. $report = [ 'verification_timestamp' => date('Y-m-d H:i:s'), 'tables_verified' => count($verified), 'tables_with_issues' => count($issues), 'verified_tables' => $verified, 'issues' => $issues, ]; $reportFile = $outputDir . '/verification-report.json'; file_put_contents($reportFile, json_encode($report, JSON_PRETTY_PRINT)); // Print summary. echo "\n" . str_repeat('=', 60) . "\n"; echo "VERIFICATION SUMMARY\n"; echo str_repeat('=', 60) . "\n"; echo "Tables verified: " . count($verified) . "\n"; echo "Tables with issues: " . count($issues) . "\n"; if (!empty($issues)) { echo "\n⚠️ ISSUES FOUND:\n"; foreach ($issues as $issue) { echo " - {$issue['table']}: {$issue['issue']}\n"; if (isset($issue['difference'])) { echo " Difference: {$issue['difference']} rows\n"; } } echo "\nFull report: {$reportFile}\n"; return FALSE; } else { echo "\n✓ All tables verified successfully! No data loss detected.\n"; echo "Report: {$reportFile}\n"; return TRUE; } } // Main execution. $command = $argv[1] ?? 'help'; switch ($command) { case 'export': exportAllTables($connection, $outputDir); break; case 'verify': verifyDataIntegrity($connection, $outputDir); break; default: echo "Usage:\n"; echo " php test-wisski-data-integrity.php export # Export data before update\n"; echo " php test-wisski-data-integrity.php verify # Verify data after update\n"; break; }