first commit
This commit is contained in:
commit
d5c8076e2f
3 changed files with 471 additions and 0 deletions
274
test-wisski-data-integrity.php
Normal file
274
test-wisski-data-integrity.php
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Script to test WissKI table data integrity before and after schema updates.
|
||||
*
|
||||
* Usage:
|
||||
* php test-wisski-data-integrity.php export # Export data before update
|
||||
* php test-wisski-data-integrity.php verify # Verify data after update
|
||||
*/
|
||||
|
||||
// Bootstrap Drupal.
|
||||
$autoloader = require_once __DIR__ . '/autoload.php';
|
||||
$request = \Symfony\Component\HttpFoundation\Request::createFromGlobals();
|
||||
$kernel = \Drupal\Core\DrupalKernel::createFromRequest($request, $autoloader, 'prod');
|
||||
$kernel->boot();
|
||||
|
||||
// Database connection.
|
||||
$connection = \Drupal::database();
|
||||
$outputDir = __DIR__ . '/wisski-data-integrity-test';
|
||||
|
||||
// Get all WissKI tables.
|
||||
function getWisskiTables($connection) {
|
||||
$tables = [];
|
||||
// Query information_schema to get all WissKI tables.
|
||||
$database = $connection->getConnectionOptions();
|
||||
$dbName = $database['database'];
|
||||
$result = $connection->query("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = :db AND TABLE_NAME LIKE 'wisski%'", [
|
||||
':db' => $dbName,
|
||||
]);
|
||||
while ($row = $result->fetchAssoc()) {
|
||||
$tables[] = $row['TABLE_NAME'];
|
||||
}
|
||||
return $tables;
|
||||
}
|
||||
|
||||
// Export table data: row count, checksum, sample rows.
|
||||
function exportTableData($connection, $tableName, $outputDir) {
|
||||
$data = [
|
||||
'table' => $tableName,
|
||||
'timestamp' => date('Y-m-d H:i:s'),
|
||||
'row_count' => 0,
|
||||
'checksum' => '',
|
||||
'sample_rows' => [],
|
||||
'column_info' => [],
|
||||
];
|
||||
|
||||
// Get row count.
|
||||
$countResult = $connection->query("SELECT COUNT(*) as cnt FROM {" . $tableName . "}");
|
||||
$data['row_count'] = (int) $countResult->fetchField();
|
||||
|
||||
if ($data['row_count'] === 0) {
|
||||
$data['checksum'] = 'empty';
|
||||
return $data;
|
||||
}
|
||||
|
||||
// Get column information.
|
||||
$columnsResult = $connection->query("SHOW COLUMNS FROM {" . $tableName . "}");
|
||||
$columns = [];
|
||||
while ($col = $columnsResult->fetchAssoc()) {
|
||||
$columns[] = $col['Field'];
|
||||
}
|
||||
$data['column_info'] = $columns;
|
||||
|
||||
// Get sample rows (first 10 rows).
|
||||
$sampleResult = $connection->query("SELECT * FROM {" . $tableName . "} LIMIT 10");
|
||||
$samples = [];
|
||||
while ($row = $sampleResult->fetchAssoc()) {
|
||||
$samples[] = $row;
|
||||
}
|
||||
$data['sample_rows'] = $samples;
|
||||
|
||||
// Calculate checksum of all data.
|
||||
// For large tables, we'll use a hash of all row data concatenated.
|
||||
// Build checksum query - use backticks for column names.
|
||||
$checksumFields = [];
|
||||
foreach ($columns as $col) {
|
||||
$checksumFields[] = "COALESCE(CAST(`" . $col . "` AS CHAR), '')";
|
||||
}
|
||||
$orderByCols = array_slice($columns, 0, min(5, count($columns)));
|
||||
$orderBy = implode(', ', array_map(function($col) {
|
||||
return "`" . $col . "`";
|
||||
}, $orderByCols));
|
||||
|
||||
// Use CONCAT_WS for safer concatenation.
|
||||
$concatExpr = "CONCAT_WS('|', " . implode(', ', $checksumFields) . ")";
|
||||
$checksumQuery = "SELECT MD5(GROUP_CONCAT(" . $concatExpr .
|
||||
" ORDER BY " . $orderBy . " SEPARATOR '|||')) as chksum FROM {" . $tableName . "}";
|
||||
|
||||
try {
|
||||
$checksumResult = $connection->query($checksumQuery);
|
||||
$data['checksum'] = $checksumResult->fetchField() ?: 'no_checksum';
|
||||
}
|
||||
catch (\Exception $e) {
|
||||
// If checksum fails (e.g., GROUP_CONCAT too large), use row count + first row hash.
|
||||
$data['checksum'] = 'partial_' . md5($data['row_count'] . serialize($samples[0] ?? []));
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
// Export all WissKI tables.
|
||||
function exportAllTables($connection, $outputDir) {
|
||||
if (!is_dir($outputDir)) {
|
||||
mkdir($outputDir, 0755, TRUE);
|
||||
}
|
||||
|
||||
$tables = getWisskiTables($connection);
|
||||
$allData = [
|
||||
'export_timestamp' => date('Y-m-d H:i:s'),
|
||||
'tables' => [],
|
||||
];
|
||||
|
||||
echo "Exporting data for " . count($tables) . " WissKI tables...\n";
|
||||
|
||||
foreach ($tables as $table) {
|
||||
echo " Processing {$table}...\n";
|
||||
$tableData = exportTableData($connection, $table, $outputDir);
|
||||
$allData['tables'][$table] = $tableData;
|
||||
|
||||
// Save individual table export.
|
||||
$tableFile = $outputDir . '/' . $table . '.json';
|
||||
file_put_contents($tableFile, json_encode($tableData, JSON_PRETTY_PRINT));
|
||||
}
|
||||
|
||||
// Save summary.
|
||||
$summaryFile = $outputDir . '/before-update-summary.json';
|
||||
file_put_contents($summaryFile, json_encode($allData, JSON_PRETTY_PRINT));
|
||||
|
||||
echo "\nExport complete! Data saved to: {$outputDir}\n";
|
||||
echo "Summary: {$summaryFile}\n";
|
||||
|
||||
return $allData;
|
||||
}
|
||||
|
||||
// Verify data integrity after update.
|
||||
function verifyDataIntegrity($connection, $outputDir) {
|
||||
$summaryFile = $outputDir . '/before-update-summary.json';
|
||||
|
||||
if (!file_exists($summaryFile)) {
|
||||
echo "ERROR: Before-update summary not found at: {$summaryFile}\n";
|
||||
echo "Please run 'export' first.\n";
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
$beforeData = json_decode(file_get_contents($summaryFile), TRUE);
|
||||
$issues = [];
|
||||
$verified = [];
|
||||
|
||||
echo "Verifying data integrity for " . count($beforeData['tables']) . " tables...\n\n";
|
||||
|
||||
foreach ($beforeData['tables'] as $tableName => $beforeTableData) {
|
||||
echo "Checking {$tableName}...\n";
|
||||
|
||||
$afterTableData = exportTableData($connection, $tableName, $outputDir);
|
||||
|
||||
// Check row count.
|
||||
if ($afterTableData['row_count'] !== $beforeTableData['row_count']) {
|
||||
$issues[] = [
|
||||
'table' => $tableName,
|
||||
'issue' => 'row_count_mismatch',
|
||||
'before' => $beforeTableData['row_count'],
|
||||
'after' => $afterTableData['row_count'],
|
||||
'difference' => $afterTableData['row_count'] - $beforeTableData['row_count'],
|
||||
];
|
||||
echo " ⚠️ ROW COUNT MISMATCH: Before={$beforeTableData['row_count']}, After={$afterTableData['row_count']}\n";
|
||||
}
|
||||
else {
|
||||
echo " ✓ Row count matches: {$afterTableData['row_count']}\n";
|
||||
}
|
||||
|
||||
// Check checksum.
|
||||
if ($beforeTableData['checksum'] !== 'empty' && $afterTableData['checksum'] !== 'empty') {
|
||||
if ($beforeTableData['checksum'] !== $afterTableData['checksum']) {
|
||||
$issues[] = [
|
||||
'table' => $tableName,
|
||||
'issue' => 'checksum_mismatch',
|
||||
'before' => $beforeTableData['checksum'],
|
||||
'after' => $afterTableData['checksum'],
|
||||
];
|
||||
echo " ⚠️ CHECKSUM MISMATCH: Data may have changed!\n";
|
||||
}
|
||||
else {
|
||||
echo " ✓ Checksum matches\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Compare sample rows if available.
|
||||
if (!empty($beforeTableData['sample_rows']) && !empty($afterTableData['sample_rows'])) {
|
||||
$sampleMatch = TRUE;
|
||||
$minSamples = min(count($beforeTableData['sample_rows']), count($afterTableData['sample_rows']));
|
||||
for ($i = 0; $i < $minSamples; $i++) {
|
||||
if ($beforeTableData['sample_rows'][$i] !== $afterTableData['sample_rows'][$i]) {
|
||||
$sampleMatch = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$sampleMatch) {
|
||||
$issues[] = [
|
||||
'table' => $tableName,
|
||||
'issue' => 'sample_data_mismatch',
|
||||
];
|
||||
echo " ⚠️ Sample data differs!\n";
|
||||
}
|
||||
else {
|
||||
echo " ✓ Sample data matches\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (empty(array_filter($issues, function($issue) use ($tableName) {
|
||||
return $issue['table'] === $tableName;
|
||||
}))) {
|
||||
$verified[] = $tableName;
|
||||
}
|
||||
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
// Save verification report.
|
||||
$report = [
|
||||
'verification_timestamp' => date('Y-m-d H:i:s'),
|
||||
'tables_verified' => count($verified),
|
||||
'tables_with_issues' => count($issues),
|
||||
'verified_tables' => $verified,
|
||||
'issues' => $issues,
|
||||
];
|
||||
|
||||
$reportFile = $outputDir . '/verification-report.json';
|
||||
file_put_contents($reportFile, json_encode($report, JSON_PRETTY_PRINT));
|
||||
|
||||
// Print summary.
|
||||
echo "\n" . str_repeat('=', 60) . "\n";
|
||||
echo "VERIFICATION SUMMARY\n";
|
||||
echo str_repeat('=', 60) . "\n";
|
||||
echo "Tables verified: " . count($verified) . "\n";
|
||||
echo "Tables with issues: " . count($issues) . "\n";
|
||||
|
||||
if (!empty($issues)) {
|
||||
echo "\n⚠️ ISSUES FOUND:\n";
|
||||
foreach ($issues as $issue) {
|
||||
echo " - {$issue['table']}: {$issue['issue']}\n";
|
||||
if (isset($issue['difference'])) {
|
||||
echo " Difference: {$issue['difference']} rows\n";
|
||||
}
|
||||
}
|
||||
echo "\nFull report: {$reportFile}\n";
|
||||
return FALSE;
|
||||
}
|
||||
else {
|
||||
echo "\n✓ All tables verified successfully! No data loss detected.\n";
|
||||
echo "Report: {$reportFile}\n";
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
// Main execution.
|
||||
$command = $argv[1] ?? 'help';
|
||||
|
||||
switch ($command) {
|
||||
case 'export':
|
||||
exportAllTables($connection, $outputDir);
|
||||
break;
|
||||
|
||||
case 'verify':
|
||||
verifyDataIntegrity($connection, $outputDir);
|
||||
break;
|
||||
|
||||
default:
|
||||
echo "Usage:\n";
|
||||
echo " php test-wisski-data-integrity.php export # Export data before update\n";
|
||||
echo " php test-wisski-data-integrity.php verify # Verify data after update\n";
|
||||
break;
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue