From d5c8076e2fe1c9db3d5417879341accdd6b87c60 Mon Sep 17 00:00:00 2001 From: Robert Nasarek Date: Tue, 9 Dec 2025 09:29:17 +0100 Subject: [PATCH] first commit --- WISSKI-UPDATE-TESTING.md | 122 +++++++++++++++ test-wisski-data-integrity.php | 274 +++++++++++++++++++++++++++++++++ test-wisski-update.sh | 75 +++++++++ 3 files changed, 471 insertions(+) create mode 100644 WISSKI-UPDATE-TESTING.md create mode 100644 test-wisski-data-integrity.php create mode 100755 test-wisski-update.sh diff --git a/WISSKI-UPDATE-TESTING.md b/WISSKI-UPDATE-TESTING.md new file mode 100644 index 0000000..5c02039 --- /dev/null +++ b/WISSKI-UPDATE-TESTING.md @@ -0,0 +1,122 @@ +# WissKI Table Update Data Integrity Testing + +This directory contains scripts to test data integrity when updating WissKI tables. The scripts ensure that no data is lost or truncated during schema updates. + +## Files + +- `test-wisski-data-integrity.php` - PHP script to export and verify data +- `test-wisski-update.sh` - Bash wrapper script for the complete testing process + +## Usage + +### Option 1: Automated Testing (Recommended) + +Run the complete test process (export → update → verify): + +```bash +./test-wisski-update.sh +``` + +This script will: +1. Export all WissKI table data (row counts, checksums, sample rows) +2. Prompt you to run Drupal updates +3. Verify data integrity after updates +4. Generate a detailed report + +### Option 2: Manual Step-by-Step + +#### Step 1: Export Data Before Update + +```bash +php test-wisski-data-integrity.php export +``` + +This creates a directory `wisski-data-integrity-test/` with: +- `before-update-summary.json` - Summary of all tables +- Individual table JSON files with row counts, checksums, and sample data + +#### Step 2: Run Drupal Updates + +```bash +drush updatedb -y +``` + +Or use the Drupal admin interface: `/update.php` + +#### Step 3: Verify Data Integrity + +```bash +php test-wisski-data-integrity.php verify +``` + +This compares the current state with the exported data and reports: +- Row count changes +- Checksum mismatches (data changes) +- Sample data differences + +## Output + +All test results are saved in `wisski-data-integrity-test/`: + +- `before-update-summary.json` - Complete export before update +- `verification-report.json` - Detailed verification results +- Individual table JSON files for detailed inspection + +## What Gets Tested + +For each WissKI table (all tables with `wisski` prefix): + +1. **Row Count** - Ensures no rows are lost or added unexpectedly +2. **Data Checksum** - MD5 hash of all table data to detect any changes +3. **Sample Rows** - First 10 rows compared to detect data corruption +4. **Column Information** - Schema changes are logged + +## Interpreting Results + +### ✓ Success +- All row counts match +- All checksums match +- No data loss detected + +### ⚠️ Issues Detected +- **Row count mismatch**: Rows were added or deleted +- **Checksum mismatch**: Data content changed (may be expected for schema updates) +- **Sample data mismatch**: First rows differ (investigate for corruption) + +## Important Notes + +1. **Backup First**: Always backup your database before running updates +2. **Schema Changes**: Some checksum changes may be expected if column types change (e.g., VARCHAR to TEXT) +3. **Large Tables**: For very large tables, checksum calculation may fall back to partial checksums +4. **Empty Tables**: Empty tables are marked with checksum 'empty' and skipped in checksum comparison + +## Troubleshooting + +If verification fails: + +1. Check `verification-report.json` for detailed information +2. Compare individual table JSON files before/after +3. Review Drupal update logs for errors +4. Check if schema changes are expected (column type changes, etc.) + +## Example Output + +``` +Exporting data for 450 WissKI tables... + Processing wisski_entity_map... + Processing wisski_calling_bundles... + ... + +✓ Data export complete. + +Verifying data integrity for 450 tables... + Checking wisski_entity_map... + ✓ Row count matches: 1234 + ✓ Checksum matches + ✓ Sample data matches + +========================================== +✓ SUCCESS: All data verified successfully! +========================================== +``` + diff --git a/test-wisski-data-integrity.php b/test-wisski-data-integrity.php new file mode 100644 index 0000000..bc1504b --- /dev/null +++ b/test-wisski-data-integrity.php @@ -0,0 +1,274 @@ +boot(); + +// Database connection. +$connection = \Drupal::database(); +$outputDir = __DIR__ . '/wisski-data-integrity-test'; + +// Get all WissKI tables. +function getWisskiTables($connection) { + $tables = []; + // Query information_schema to get all WissKI tables. + $database = $connection->getConnectionOptions(); + $dbName = $database['database']; + $result = $connection->query("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = :db AND TABLE_NAME LIKE 'wisski%'", [ + ':db' => $dbName, + ]); + while ($row = $result->fetchAssoc()) { + $tables[] = $row['TABLE_NAME']; + } + return $tables; +} + +// Export table data: row count, checksum, sample rows. +function exportTableData($connection, $tableName, $outputDir) { + $data = [ + 'table' => $tableName, + 'timestamp' => date('Y-m-d H:i:s'), + 'row_count' => 0, + 'checksum' => '', + 'sample_rows' => [], + 'column_info' => [], + ]; + + // Get row count. + $countResult = $connection->query("SELECT COUNT(*) as cnt FROM {" . $tableName . "}"); + $data['row_count'] = (int) $countResult->fetchField(); + + if ($data['row_count'] === 0) { + $data['checksum'] = 'empty'; + return $data; + } + + // Get column information. + $columnsResult = $connection->query("SHOW COLUMNS FROM {" . $tableName . "}"); + $columns = []; + while ($col = $columnsResult->fetchAssoc()) { + $columns[] = $col['Field']; + } + $data['column_info'] = $columns; + + // Get sample rows (first 10 rows). + $sampleResult = $connection->query("SELECT * FROM {" . $tableName . "} LIMIT 10"); + $samples = []; + while ($row = $sampleResult->fetchAssoc()) { + $samples[] = $row; + } + $data['sample_rows'] = $samples; + + // Calculate checksum of all data. + // For large tables, we'll use a hash of all row data concatenated. + // Build checksum query - use backticks for column names. + $checksumFields = []; + foreach ($columns as $col) { + $checksumFields[] = "COALESCE(CAST(`" . $col . "` AS CHAR), '')"; + } + $orderByCols = array_slice($columns, 0, min(5, count($columns))); + $orderBy = implode(', ', array_map(function($col) { + return "`" . $col . "`"; + }, $orderByCols)); + + // Use CONCAT_WS for safer concatenation. + $concatExpr = "CONCAT_WS('|', " . implode(', ', $checksumFields) . ")"; + $checksumQuery = "SELECT MD5(GROUP_CONCAT(" . $concatExpr . + " ORDER BY " . $orderBy . " SEPARATOR '|||')) as chksum FROM {" . $tableName . "}"; + + try { + $checksumResult = $connection->query($checksumQuery); + $data['checksum'] = $checksumResult->fetchField() ?: 'no_checksum'; + } + catch (\Exception $e) { + // If checksum fails (e.g., GROUP_CONCAT too large), use row count + first row hash. + $data['checksum'] = 'partial_' . md5($data['row_count'] . serialize($samples[0] ?? [])); + } + + return $data; +} + +// Export all WissKI tables. +function exportAllTables($connection, $outputDir) { + if (!is_dir($outputDir)) { + mkdir($outputDir, 0755, TRUE); + } + + $tables = getWisskiTables($connection); + $allData = [ + 'export_timestamp' => date('Y-m-d H:i:s'), + 'tables' => [], + ]; + + echo "Exporting data for " . count($tables) . " WissKI tables...\n"; + + foreach ($tables as $table) { + echo " Processing {$table}...\n"; + $tableData = exportTableData($connection, $table, $outputDir); + $allData['tables'][$table] = $tableData; + + // Save individual table export. + $tableFile = $outputDir . '/' . $table . '.json'; + file_put_contents($tableFile, json_encode($tableData, JSON_PRETTY_PRINT)); + } + + // Save summary. + $summaryFile = $outputDir . '/before-update-summary.json'; + file_put_contents($summaryFile, json_encode($allData, JSON_PRETTY_PRINT)); + + echo "\nExport complete! Data saved to: {$outputDir}\n"; + echo "Summary: {$summaryFile}\n"; + + return $allData; +} + +// Verify data integrity after update. +function verifyDataIntegrity($connection, $outputDir) { + $summaryFile = $outputDir . '/before-update-summary.json'; + + if (!file_exists($summaryFile)) { + echo "ERROR: Before-update summary not found at: {$summaryFile}\n"; + echo "Please run 'export' first.\n"; + return FALSE; + } + + $beforeData = json_decode(file_get_contents($summaryFile), TRUE); + $issues = []; + $verified = []; + + echo "Verifying data integrity for " . count($beforeData['tables']) . " tables...\n\n"; + + foreach ($beforeData['tables'] as $tableName => $beforeTableData) { + echo "Checking {$tableName}...\n"; + + $afterTableData = exportTableData($connection, $tableName, $outputDir); + + // Check row count. + if ($afterTableData['row_count'] !== $beforeTableData['row_count']) { + $issues[] = [ + 'table' => $tableName, + 'issue' => 'row_count_mismatch', + 'before' => $beforeTableData['row_count'], + 'after' => $afterTableData['row_count'], + 'difference' => $afterTableData['row_count'] - $beforeTableData['row_count'], + ]; + echo " ⚠️ ROW COUNT MISMATCH: Before={$beforeTableData['row_count']}, After={$afterTableData['row_count']}\n"; + } + else { + echo " ✓ Row count matches: {$afterTableData['row_count']}\n"; + } + + // Check checksum. + if ($beforeTableData['checksum'] !== 'empty' && $afterTableData['checksum'] !== 'empty') { + if ($beforeTableData['checksum'] !== $afterTableData['checksum']) { + $issues[] = [ + 'table' => $tableName, + 'issue' => 'checksum_mismatch', + 'before' => $beforeTableData['checksum'], + 'after' => $afterTableData['checksum'], + ]; + echo " ⚠️ CHECKSUM MISMATCH: Data may have changed!\n"; + } + else { + echo " ✓ Checksum matches\n"; + } + } + + // Compare sample rows if available. + if (!empty($beforeTableData['sample_rows']) && !empty($afterTableData['sample_rows'])) { + $sampleMatch = TRUE; + $minSamples = min(count($beforeTableData['sample_rows']), count($afterTableData['sample_rows'])); + for ($i = 0; $i < $minSamples; $i++) { + if ($beforeTableData['sample_rows'][$i] !== $afterTableData['sample_rows'][$i]) { + $sampleMatch = FALSE; + break; + } + } + if (!$sampleMatch) { + $issues[] = [ + 'table' => $tableName, + 'issue' => 'sample_data_mismatch', + ]; + echo " ⚠️ Sample data differs!\n"; + } + else { + echo " ✓ Sample data matches\n"; + } + } + + if (empty(array_filter($issues, function($issue) use ($tableName) { + return $issue['table'] === $tableName; + }))) { + $verified[] = $tableName; + } + + echo "\n"; + } + + // Save verification report. + $report = [ + 'verification_timestamp' => date('Y-m-d H:i:s'), + 'tables_verified' => count($verified), + 'tables_with_issues' => count($issues), + 'verified_tables' => $verified, + 'issues' => $issues, + ]; + + $reportFile = $outputDir . '/verification-report.json'; + file_put_contents($reportFile, json_encode($report, JSON_PRETTY_PRINT)); + + // Print summary. + echo "\n" . str_repeat('=', 60) . "\n"; + echo "VERIFICATION SUMMARY\n"; + echo str_repeat('=', 60) . "\n"; + echo "Tables verified: " . count($verified) . "\n"; + echo "Tables with issues: " . count($issues) . "\n"; + + if (!empty($issues)) { + echo "\n⚠️ ISSUES FOUND:\n"; + foreach ($issues as $issue) { + echo " - {$issue['table']}: {$issue['issue']}\n"; + if (isset($issue['difference'])) { + echo " Difference: {$issue['difference']} rows\n"; + } + } + echo "\nFull report: {$reportFile}\n"; + return FALSE; + } + else { + echo "\n✓ All tables verified successfully! No data loss detected.\n"; + echo "Report: {$reportFile}\n"; + return TRUE; + } +} + +// Main execution. +$command = $argv[1] ?? 'help'; + +switch ($command) { + case 'export': + exportAllTables($connection, $outputDir); + break; + + case 'verify': + verifyDataIntegrity($connection, $outputDir); + break; + + default: + echo "Usage:\n"; + echo " php test-wisski-data-integrity.php export # Export data before update\n"; + echo " php test-wisski-data-integrity.php verify # Verify data after update\n"; + break; +} + diff --git a/test-wisski-update.sh b/test-wisski-update.sh new file mode 100755 index 0000000..885d0bc --- /dev/null +++ b/test-wisski-update.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Script to test WissKI table updates with data integrity checks. +# This script exports data before update, runs the update, and verifies data integrity. + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +OUTPUT_DIR="$SCRIPT_DIR/wisski-data-integrity-test" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +echo "==========================================" +echo "WissKI Table Update Data Integrity Test" +echo "==========================================" +echo "" + +# Step 1: Export data before update. +echo "Step 1: Exporting data BEFORE update..." +echo "----------------------------------------" +php test-wisski-data-integrity.php export + +if [ ! -f "$OUTPUT_DIR/before-update-summary.json" ]; then + echo "ERROR: Failed to export data before update!" + exit 1 +fi + +echo "" +echo "✓ Data export complete. Backup saved to: $OUTPUT_DIR" +echo "" +read -p "Press Enter to continue with the update, or Ctrl+C to cancel..." + +# Step 2: Run Drupal updates. +echo "" +echo "Step 2: Running Drupal updates..." +echo "----------------------------------------" + +# Check if drush is available. +if command -v drush &> /dev/null; then + echo "Running: drush updatedb -y" + drush updatedb -y +else + echo "Drush not found. Please run Drupal updates manually:" + echo " drush updatedb -y" + echo "" + read -p "Press Enter after you have run the updates, or Ctrl+C to cancel..." +fi + +# Step 3: Verify data integrity. +echo "" +echo "Step 3: Verifying data integrity AFTER update..." +echo "----------------------------------------" +php test-wisski-data-integrity.php verify + +VERIFICATION_EXIT=$? + +echo "" +if [ $VERIFICATION_EXIT -eq 0 ]; then + echo "==========================================" + echo "✓ SUCCESS: All data verified successfully!" + echo "==========================================" +else + echo "==========================================" + echo "⚠️ WARNING: Issues detected during verification!" + echo "Check the verification report for details." + echo "==========================================" +fi + +echo "" +echo "Test results saved to: $OUTPUT_DIR" +echo "" + +exit $VERIFICATION_EXIT +