first commit

This commit is contained in:
Robert Nasarek 2025-12-09 09:29:17 +01:00
commit d5c8076e2f
3 changed files with 471 additions and 0 deletions

122
WISSKI-UPDATE-TESTING.md Normal file
View file

@ -0,0 +1,122 @@
# WissKI Table Update Data Integrity Testing
This directory contains scripts to test data integrity when updating WissKI tables. The scripts ensure that no data is lost or truncated during schema updates.
## Files
- `test-wisski-data-integrity.php` - PHP script to export and verify data
- `test-wisski-update.sh` - Bash wrapper script for the complete testing process
## Usage
### Option 1: Automated Testing (Recommended)
Run the complete test process (export → update → verify):
```bash
./test-wisski-update.sh
```
This script will:
1. Export all WissKI table data (row counts, checksums, sample rows)
2. Prompt you to run Drupal updates
3. Verify data integrity after updates
4. Generate a detailed report
### Option 2: Manual Step-by-Step
#### Step 1: Export Data Before Update
```bash
php test-wisski-data-integrity.php export
```
This creates a directory `wisski-data-integrity-test/` with:
- `before-update-summary.json` - Summary of all tables
- Individual table JSON files with row counts, checksums, and sample data
#### Step 2: Run Drupal Updates
```bash
drush updatedb -y
```
Or use the Drupal admin interface: `/update.php`
#### Step 3: Verify Data Integrity
```bash
php test-wisski-data-integrity.php verify
```
This compares the current state with the exported data and reports:
- Row count changes
- Checksum mismatches (data changes)
- Sample data differences
## Output
All test results are saved in `wisski-data-integrity-test/`:
- `before-update-summary.json` - Complete export before update
- `verification-report.json` - Detailed verification results
- Individual table JSON files for detailed inspection
## What Gets Tested
For each WissKI table (all tables with `wisski` prefix):
1. **Row Count** - Ensures no rows are lost or added unexpectedly
2. **Data Checksum** - MD5 hash of all table data to detect any changes
3. **Sample Rows** - First 10 rows compared to detect data corruption
4. **Column Information** - Schema changes are logged
## Interpreting Results
### ✓ Success
- All row counts match
- All checksums match
- No data loss detected
### ⚠️ Issues Detected
- **Row count mismatch**: Rows were added or deleted
- **Checksum mismatch**: Data content changed (may be expected for schema updates)
- **Sample data mismatch**: First rows differ (investigate for corruption)
## Important Notes
1. **Backup First**: Always backup your database before running updates
2. **Schema Changes**: Some checksum changes may be expected if column types change (e.g., VARCHAR to TEXT)
3. **Large Tables**: For very large tables, checksum calculation may fall back to partial checksums
4. **Empty Tables**: Empty tables are marked with checksum 'empty' and skipped in checksum comparison
## Troubleshooting
If verification fails:
1. Check `verification-report.json` for detailed information
2. Compare individual table JSON files before/after
3. Review Drupal update logs for errors
4. Check if schema changes are expected (column type changes, etc.)
## Example Output
```
Exporting data for 450 WissKI tables...
Processing wisski_entity_map...
Processing wisski_calling_bundles...
...
✓ Data export complete.
Verifying data integrity for 450 tables...
Checking wisski_entity_map...
✓ Row count matches: 1234
✓ Checksum matches
✓ Sample data matches
==========================================
✓ SUCCESS: All data verified successfully!
==========================================
```

View file

@ -0,0 +1,274 @@
<?php
/**
* @file
* Script to test WissKI table data integrity before and after schema updates.
*
* Usage:
* php test-wisski-data-integrity.php export # Export data before update
* php test-wisski-data-integrity.php verify # Verify data after update
*/
// Bootstrap Drupal.
$autoloader = require_once __DIR__ . '/autoload.php';
$request = \Symfony\Component\HttpFoundation\Request::createFromGlobals();
$kernel = \Drupal\Core\DrupalKernel::createFromRequest($request, $autoloader, 'prod');
$kernel->boot();
// Database connection.
$connection = \Drupal::database();
$outputDir = __DIR__ . '/wisski-data-integrity-test';
// Get all WissKI tables.
function getWisskiTables($connection) {
$tables = [];
// Query information_schema to get all WissKI tables.
$database = $connection->getConnectionOptions();
$dbName = $database['database'];
$result = $connection->query("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = :db AND TABLE_NAME LIKE 'wisski%'", [
':db' => $dbName,
]);
while ($row = $result->fetchAssoc()) {
$tables[] = $row['TABLE_NAME'];
}
return $tables;
}
// Export table data: row count, checksum, sample rows.
function exportTableData($connection, $tableName, $outputDir) {
$data = [
'table' => $tableName,
'timestamp' => date('Y-m-d H:i:s'),
'row_count' => 0,
'checksum' => '',
'sample_rows' => [],
'column_info' => [],
];
// Get row count.
$countResult = $connection->query("SELECT COUNT(*) as cnt FROM {" . $tableName . "}");
$data['row_count'] = (int) $countResult->fetchField();
if ($data['row_count'] === 0) {
$data['checksum'] = 'empty';
return $data;
}
// Get column information.
$columnsResult = $connection->query("SHOW COLUMNS FROM {" . $tableName . "}");
$columns = [];
while ($col = $columnsResult->fetchAssoc()) {
$columns[] = $col['Field'];
}
$data['column_info'] = $columns;
// Get sample rows (first 10 rows).
$sampleResult = $connection->query("SELECT * FROM {" . $tableName . "} LIMIT 10");
$samples = [];
while ($row = $sampleResult->fetchAssoc()) {
$samples[] = $row;
}
$data['sample_rows'] = $samples;
// Calculate checksum of all data.
// For large tables, we'll use a hash of all row data concatenated.
// Build checksum query - use backticks for column names.
$checksumFields = [];
foreach ($columns as $col) {
$checksumFields[] = "COALESCE(CAST(`" . $col . "` AS CHAR), '')";
}
$orderByCols = array_slice($columns, 0, min(5, count($columns)));
$orderBy = implode(', ', array_map(function($col) {
return "`" . $col . "`";
}, $orderByCols));
// Use CONCAT_WS for safer concatenation.
$concatExpr = "CONCAT_WS('|', " . implode(', ', $checksumFields) . ")";
$checksumQuery = "SELECT MD5(GROUP_CONCAT(" . $concatExpr .
" ORDER BY " . $orderBy . " SEPARATOR '|||')) as chksum FROM {" . $tableName . "}";
try {
$checksumResult = $connection->query($checksumQuery);
$data['checksum'] = $checksumResult->fetchField() ?: 'no_checksum';
}
catch (\Exception $e) {
// If checksum fails (e.g., GROUP_CONCAT too large), use row count + first row hash.
$data['checksum'] = 'partial_' . md5($data['row_count'] . serialize($samples[0] ?? []));
}
return $data;
}
// Export all WissKI tables.
function exportAllTables($connection, $outputDir) {
if (!is_dir($outputDir)) {
mkdir($outputDir, 0755, TRUE);
}
$tables = getWisskiTables($connection);
$allData = [
'export_timestamp' => date('Y-m-d H:i:s'),
'tables' => [],
];
echo "Exporting data for " . count($tables) . " WissKI tables...\n";
foreach ($tables as $table) {
echo " Processing {$table}...\n";
$tableData = exportTableData($connection, $table, $outputDir);
$allData['tables'][$table] = $tableData;
// Save individual table export.
$tableFile = $outputDir . '/' . $table . '.json';
file_put_contents($tableFile, json_encode($tableData, JSON_PRETTY_PRINT));
}
// Save summary.
$summaryFile = $outputDir . '/before-update-summary.json';
file_put_contents($summaryFile, json_encode($allData, JSON_PRETTY_PRINT));
echo "\nExport complete! Data saved to: {$outputDir}\n";
echo "Summary: {$summaryFile}\n";
return $allData;
}
// Verify data integrity after update.
function verifyDataIntegrity($connection, $outputDir) {
$summaryFile = $outputDir . '/before-update-summary.json';
if (!file_exists($summaryFile)) {
echo "ERROR: Before-update summary not found at: {$summaryFile}\n";
echo "Please run 'export' first.\n";
return FALSE;
}
$beforeData = json_decode(file_get_contents($summaryFile), TRUE);
$issues = [];
$verified = [];
echo "Verifying data integrity for " . count($beforeData['tables']) . " tables...\n\n";
foreach ($beforeData['tables'] as $tableName => $beforeTableData) {
echo "Checking {$tableName}...\n";
$afterTableData = exportTableData($connection, $tableName, $outputDir);
// Check row count.
if ($afterTableData['row_count'] !== $beforeTableData['row_count']) {
$issues[] = [
'table' => $tableName,
'issue' => 'row_count_mismatch',
'before' => $beforeTableData['row_count'],
'after' => $afterTableData['row_count'],
'difference' => $afterTableData['row_count'] - $beforeTableData['row_count'],
];
echo " ⚠️ ROW COUNT MISMATCH: Before={$beforeTableData['row_count']}, After={$afterTableData['row_count']}\n";
}
else {
echo " ✓ Row count matches: {$afterTableData['row_count']}\n";
}
// Check checksum.
if ($beforeTableData['checksum'] !== 'empty' && $afterTableData['checksum'] !== 'empty') {
if ($beforeTableData['checksum'] !== $afterTableData['checksum']) {
$issues[] = [
'table' => $tableName,
'issue' => 'checksum_mismatch',
'before' => $beforeTableData['checksum'],
'after' => $afterTableData['checksum'],
];
echo " ⚠️ CHECKSUM MISMATCH: Data may have changed!\n";
}
else {
echo " ✓ Checksum matches\n";
}
}
// Compare sample rows if available.
if (!empty($beforeTableData['sample_rows']) && !empty($afterTableData['sample_rows'])) {
$sampleMatch = TRUE;
$minSamples = min(count($beforeTableData['sample_rows']), count($afterTableData['sample_rows']));
for ($i = 0; $i < $minSamples; $i++) {
if ($beforeTableData['sample_rows'][$i] !== $afterTableData['sample_rows'][$i]) {
$sampleMatch = FALSE;
break;
}
}
if (!$sampleMatch) {
$issues[] = [
'table' => $tableName,
'issue' => 'sample_data_mismatch',
];
echo " ⚠️ Sample data differs!\n";
}
else {
echo " ✓ Sample data matches\n";
}
}
if (empty(array_filter($issues, function($issue) use ($tableName) {
return $issue['table'] === $tableName;
}))) {
$verified[] = $tableName;
}
echo "\n";
}
// Save verification report.
$report = [
'verification_timestamp' => date('Y-m-d H:i:s'),
'tables_verified' => count($verified),
'tables_with_issues' => count($issues),
'verified_tables' => $verified,
'issues' => $issues,
];
$reportFile = $outputDir . '/verification-report.json';
file_put_contents($reportFile, json_encode($report, JSON_PRETTY_PRINT));
// Print summary.
echo "\n" . str_repeat('=', 60) . "\n";
echo "VERIFICATION SUMMARY\n";
echo str_repeat('=', 60) . "\n";
echo "Tables verified: " . count($verified) . "\n";
echo "Tables with issues: " . count($issues) . "\n";
if (!empty($issues)) {
echo "\n⚠️ ISSUES FOUND:\n";
foreach ($issues as $issue) {
echo " - {$issue['table']}: {$issue['issue']}\n";
if (isset($issue['difference'])) {
echo " Difference: {$issue['difference']} rows\n";
}
}
echo "\nFull report: {$reportFile}\n";
return FALSE;
}
else {
echo "\n✓ All tables verified successfully! No data loss detected.\n";
echo "Report: {$reportFile}\n";
return TRUE;
}
}
// Main execution.
$command = $argv[1] ?? 'help';
switch ($command) {
case 'export':
exportAllTables($connection, $outputDir);
break;
case 'verify':
verifyDataIntegrity($connection, $outputDir);
break;
default:
echo "Usage:\n";
echo " php test-wisski-data-integrity.php export # Export data before update\n";
echo " php test-wisski-data-integrity.php verify # Verify data after update\n";
break;
}

75
test-wisski-update.sh Executable file
View file

@ -0,0 +1,75 @@
#!/bin/bash
# Script to test WissKI table updates with data integrity checks.
# This script exports data before update, runs the update, and verifies data integrity.
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
OUTPUT_DIR="$SCRIPT_DIR/wisski-data-integrity-test"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
echo "=========================================="
echo "WissKI Table Update Data Integrity Test"
echo "=========================================="
echo ""
# Step 1: Export data before update.
echo "Step 1: Exporting data BEFORE update..."
echo "----------------------------------------"
php test-wisski-data-integrity.php export
if [ ! -f "$OUTPUT_DIR/before-update-summary.json" ]; then
echo "ERROR: Failed to export data before update!"
exit 1
fi
echo ""
echo "✓ Data export complete. Backup saved to: $OUTPUT_DIR"
echo ""
read -p "Press Enter to continue with the update, or Ctrl+C to cancel..."
# Step 2: Run Drupal updates.
echo ""
echo "Step 2: Running Drupal updates..."
echo "----------------------------------------"
# Check if drush is available.
if command -v drush &> /dev/null; then
echo "Running: drush updatedb -y"
drush updatedb -y
else
echo "Drush not found. Please run Drupal updates manually:"
echo " drush updatedb -y"
echo ""
read -p "Press Enter after you have run the updates, or Ctrl+C to cancel..."
fi
# Step 3: Verify data integrity.
echo ""
echo "Step 3: Verifying data integrity AFTER update..."
echo "----------------------------------------"
php test-wisski-data-integrity.php verify
VERIFICATION_EXIT=$?
echo ""
if [ $VERIFICATION_EXIT -eq 0 ]; then
echo "=========================================="
echo "✓ SUCCESS: All data verified successfully!"
echo "=========================================="
else
echo "=========================================="
echo "⚠️ WARNING: Issues detected during verification!"
echo "Check the verification report for details."
echo "=========================================="
fi
echo ""
echo "Test results saved to: $OUTPUT_DIR"
echo ""
exit $VERIFICATION_EXIT