#!/bin/bash # Colors for output. green='\033[0;32m' yellow='\033[1;33m' red='\033[0;31m' blue='\033[0;34m' noColor='\033[0m' printSection() { echo -e "\n${blue}--- $1 ---${noColor}" } checkStatus() { local status="$1" local message="$2" if [ "$status" = "0" ] || [ "$status" = "true" ] || [ "$status" = "OK" ] || [ "$status" = "healthy" ]; then echo -e "${green}✓${noColor} $message" return 0 fi echo -e "${red}✗${noColor} $message" return 1 } warnStatus() { local message="$1" echo -e "${yellow}⚠${noColor} $message" } checkService() { local service="$1" echo -e "${yellow}Checking if ${service} is running...${noColor}" if docker ps | grep -q "$service"; then echo -e "${green}${service} is running.${noColor}" return 0 fi echo -e "${red}${service} is not running.${noColor}" return 1 } checkConnectivity() { local service="$1" local port="$2" local host="${3:-localhost}" echo -e "${yellow}Checking connectivity to ${service} on ${host}:${port}...${noColor}" if nc -z -v -w5 "$host" "$port" 2>/dev/null; then echo -e "${green}Connection to ${service} on ${host}:${port} successful.${noColor}" return 0 fi echo -e "${red}Cannot connect to ${service} on ${host}:${port}.${noColor}" return 1 } loadCoreDomain() { local coreEnvFile="/var/deploy/core/.env" if [ -f "$coreEnvFile" ]; then # shellcheck disable=SC1090 source "$coreEnvFile" echo -e "${yellow}Domain configuration:${noColor} ${DOMAIN}" return 0 fi echo -e "${red}Core .env file not found.${noColor}" DOMAIN="example.com" return 1 } loadMailcowHostname() { local mailcowConfFile="/var/deploy/mailcow/mailcow.conf" if [ -f "$mailcowConfFile" ]; then mailcowHostname=$(grep '^MAILCOW_HOSTNAME=' "$mailcowConfFile" | cut -d= -f2) return 0 fi mailcowHostname="" return 1 } checkTraefik() { echo -e "${yellow}Checking Traefik configuration...${noColor}" checkService "traefik" || return 1 checkConnectivity "Traefik HTTP" 80 || echo -e "${red}Traefik HTTP port not accessible.${noColor}" checkConnectivity "Traefik HTTPS" 443 || echo -e "${red}Traefik HTTPS port not accessible.${noColor}" checkConnectivity "Traefik SSH" 2424 || echo -e "${red}Traefik SSH port not accessible.${noColor}" echo -e "${yellow}Checking Traefik certificates...${noColor}" if docker exec traefik ls -la /certificates/acme.json >/dev/null 2>&1; then echo -e "${green}Traefik certificates found.${noColor}" else echo -e "${red}Traefik certificates not found.${noColor}" fi return 0 } checkGitlab() { echo -e "${yellow}Checking GitLab configuration...${noColor}" checkService "gitlab" || return 1 docker exec gitlab grep -q "external_url" /etc/gitlab/gitlab.rb && \ echo -e "${green}GitLab external URL is configured.${noColor}" || \ echo -e "${red}GitLab external URL is not configured.${noColor}" docker exec gitlab grep -q "gitlab_shell_ssh_port" /etc/gitlab/gitlab.rb && \ echo -e "${green}GitLab SSH port is configured.${noColor}" || \ echo -e "${red}GitLab SSH port is not configured.${noColor}" echo -e "${yellow}Checking GitLab SSH connection...${noColor}" if ssh -T git@gitlab.${DOMAIN} -p 2424 -o StrictHostKeyChecking=no -o BatchMode=yes &>/dev/null; then echo -e "${green}GitLab SSH connection successful.${noColor}" else echo -e "${red}GitLab SSH connection failed. This is expected if you haven't set up SSH keys yet.${noColor}" echo -e "${yellow}Try: ssh -vT git@gitlab.${DOMAIN} -p 2424${noColor}" fi return 0 } checkDatabases() { echo -e "${yellow}Checking database services...${noColor}" checkService "mariadb" && \ echo -e "${green}MariaDB is running.${noColor}" || \ echo -e "${red}MariaDB is not running.${noColor}" checkService "postgres" && \ echo -e "${green}PostgreSQL is running.${noColor}" || \ echo -e "${red}PostgreSQL is not running.${noColor}" return 0 } checkNextcloud() { echo -e "${yellow}Checking Nextcloud configuration...${noColor}" checkService "nextcloud" || return 1 checkService "nextcloud-redis" || return 1 checkService "nextcloud-reverse-proxy" || return 1 echo -e "${yellow}Checking Nextcloud status...${noColor}" if docker exec nextcloud php /var/www/html/occ status 2>&1 | grep -q "installed: true"; then echo -e "${green}Nextcloud is installed and operational.${noColor}" docker exec nextcloud php /var/www/html/occ status 2>&1 | grep -E "version|maintenance" | sed 's/^/ /' else echo -e "${red}Nextcloud is not properly installed.${noColor}" return 1 fi echo -e "${yellow}Checking Redis connectivity...${noColor}" if docker exec nextcloud-redis redis-cli ping 2>&1 | grep -q "PONG"; then echo -e "${green}Redis is responding.${noColor}" else echo -e "${red}Redis is not responding.${noColor}" return 1 fi echo -e "${yellow}Checking database collation...${noColor}" local collationCheck collationCheck=$(docker exec nextcloud php /var/www/html/occ status 2>&1 | grep -i "collation") if [ -n "$collationCheck" ]; then echo -e "${yellow}Database collation version mismatch detected.${noColor}" echo -e "${yellow}Run: ./nextcloud-maintenance.sh collation${noColor}" else echo -e "${green}Database collation is up to date.${noColor}" fi return 0 } checkAllServices() { echo -e "${yellow}Checking all services...${noColor}" local services=("traefik" "gitlab" "mariadb" "postgres" "adminer" "nextcloud" "onlyoffice" "openproject" "hedgedoc" "drupal") for service in "${services[@]}"; do checkService "$service" done return 0 } checkMailcowServices() { printSection "1. Service Health Status" echo -e "${yellow}Checking Docker services...${noColor}" local traefikStatus traefikStatus=$(docker ps --filter "name=traefik" --format "{{.Status}}" | grep -q "Up" && echo "OK" || echo "FAIL") checkStatus "$traefikStatus" "Traefik is running" local mailcowServices local totalMailcow mailcowServices=$(cd /var/deploy/mailcow && docker compose ps --format json 2>/dev/null | jq -r '.State' | grep -c "running" 2>/dev/null || echo "0") totalMailcow=$(cd /var/deploy/mailcow && docker compose ps --format json 2>/dev/null | jq -r '.State' | wc -l 2>/dev/null || echo "0") if [ "$mailcowServices" = "$totalMailcow" ] && [ "$totalMailcow" -gt 0 ]; then checkStatus "OK" "All Mailcow services running (${mailcowServices}/${totalMailcow})" else checkStatus "FAIL" "Some Mailcow services not running (${mailcowServices}/${totalMailcow})" fi local criticalServices=("nginx-mailcow" "postfix-mailcow" "dovecot-mailcow" "mysql-mailcow" "acme-mailcow" "watchdog-mailcow") for service in "${criticalServices[@]}"; do local serviceStatus serviceStatus=$(cd /var/deploy/mailcow && docker compose ps --format json 2>/dev/null | jq -r "select(.Service==\"$service\") | .State" | head -1) if [ "$serviceStatus" = "running" ]; then checkStatus "OK" "${service} is running" else checkStatus "FAIL" "${service} is not running" fi done } checkSslAndCerts() { printSection "2. SSL/TLS Configuration" echo -e "${yellow}Traefik SSL Configuration:${noColor}" local traefikHttpChallenge local traefikTlsChallenge traefikHttpChallenge=$(docker exec traefik cat /proc/1/cmdline 2>/dev/null | tr '\0' '\n' | grep -q "httpchallenge" && echo "OK" || echo "FAIL") traefikTlsChallenge=$(docker exec traefik cat /proc/1/cmdline 2>/dev/null | tr '\0' '\n' | grep -q "tlschallenge" && echo "OK" || echo "FAIL") checkStatus "$traefikHttpChallenge" "Traefik HTTP challenge configured" checkStatus "$traefikTlsChallenge" "Traefik TLS-ALPN-01 challenge configured" local acmeFile acmeFile=$(docker exec traefik test -f /certificates/acme.json && echo "OK" || echo "FAIL") checkStatus "$acmeFile" "Traefik acme.json exists" local certCount certCount=$(docker exec traefik cat /certificates/acme.json 2>/dev/null | jq -r '.le.Certificates | length' 2>/dev/null || echo "0") if [ "$certCount" -gt 0 ]; then checkStatus "OK" "Traefik has ${certCount} certificate(s) stored" else checkStatus "FAIL" "Traefik has no certificates" fi local mailcowCert local mailcowKey mailcowCert=$(cd /var/deploy/mailcow && test -f data/assets/ssl/cert.pem && echo "OK" || echo "FAIL") mailcowKey=$(cd /var/deploy/mailcow && test -f data/assets/ssl/key.pem && echo "OK" || echo "FAIL") checkStatus "$mailcowCert" "Mailcow certificate file exists" checkStatus "$mailcowKey" "Mailcow private key exists" if [ -f /var/deploy/mailcow/data/assets/ssl/cert.pem ]; then local certExpiry local certExpiryEpoch local currentEpoch local daysUntilExpiry certExpiry=$(openssl x509 -in /var/deploy/mailcow/data/assets/ssl/cert.pem -noout -enddate 2>/dev/null | cut -d= -f2) certExpiryEpoch=$(date -d "$certExpiry" +%s 2>/dev/null || echo "0") currentEpoch=$(date +%s) daysUntilExpiry=$(( (certExpiryEpoch - currentEpoch) / 86400 )) if [ "$daysUntilExpiry" -gt 30 ]; then checkStatus "OK" "Certificate valid for ${daysUntilExpiry} more days (expires: ${certExpiry})" elif [ "$daysUntilExpiry" -gt 0 ]; then warnStatus "Certificate expires in ${daysUntilExpiry} days (expires: ${certExpiry})" else checkStatus "FAIL" "Certificate expired on ${certExpiry}" fi fi if [ -n "$mailcowHostname" ]; then local httpsResponse local certChain httpsResponse=$(curl -sI "https://${mailcowHostname}" 2>&1 | head -1 | grep -q "HTTP" && echo "OK" || echo "FAIL") certChain=$(echo | openssl s_client -connect "${mailcowHostname}:443" -servername "${mailcowHostname}" 2>/dev/null | openssl x509 -noout -issuer 2>/dev/null | grep -q "Let's Encrypt" && echo "OK" || echo "FAIL") checkStatus "$httpsResponse" "HTTPS connectivity to ${mailcowHostname}" checkStatus "$certChain" "Certificate issued by Let's Encrypt" else warnStatus "MAILCOW_HOSTNAME not found, skipping HTTPS checks" fi } checkMailcowConfig() { printSection "3. Mailcow Configuration" if [ -f /var/deploy/mailcow/mailcow.conf ]; then checkStatus "OK" "mailcow.conf exists" local skipLe local useWatchdog local httpRedirect skipLe=$(grep "^SKIP_LETS_ENCRYPT=" /var/deploy/mailcow/mailcow.conf | cut -d= -f2) if [ "$skipLe" = "n" ]; then checkStatus "OK" "Let's Encrypt enabled in mailcow" else warnStatus "Let's Encrypt disabled in mailcow (expected with Traefik)" fi useWatchdog=$(grep "^USE_WATCHDOG=" /var/deploy/mailcow/mailcow.conf | cut -d= -f2) if [ "$useWatchdog" = "y" ]; then checkStatus "OK" "Watchdog enabled" else warnStatus "Watchdog disabled" fi httpRedirect=$(grep "^HTTP_REDIRECT=" /var/deploy/mailcow/mailcow.conf | cut -d= -f2) if [ "$httpRedirect" = "y" ]; then checkStatus "OK" "HTTP to HTTPS redirect enabled" else warnStatus "HTTP to HTTPS redirect disabled" fi else checkStatus "FAIL" "mailcow.conf not found" fi local certdumperStatus certdumperStatus=$(cd /var/deploy/mailcow && docker compose ps --format json 2>/dev/null | jq -r "select(.Service==\"certdumper\") | .State" | head -1) if [ "$certdumperStatus" = "running" ]; then checkStatus "OK" "Certdumper service running (syncs Traefik certs to mailcow)" else checkStatus "FAIL" "Certdumper service not running" fi } checkAcmeLogs() { printSection "4. SSL Challenge Status" local skipLe skipLe=$(grep "^SKIP_LETS_ENCRYPT=" /var/deploy/mailcow/mailcow.conf | cut -d= -f2) if [ "$skipLe" = "y" ]; then checkStatus "OK" "Mailcow ACME disabled (SKIP_LETS_ENCRYPT=y)" return 0 fi local acmeErrors acmeErrors=$(cd /var/deploy/mailcow && docker compose logs acme-mailcow --tail 50 2>&1 | grep -i "HTTP validation failed" | wc -l) if [ "$acmeErrors" -gt 0 ]; then warnStatus "Found ${acmeErrors} HTTP validation failures in mailcow ACME logs" warnStatus "Expected if autodiscover/autoconfig subdomains are handled by Traefik" else checkStatus "OK" "No HTTP validation failures in mailcow ACME logs" fi } printSummary() { printSection "Summary" echo -e "${green}✓${noColor} Services: Most services are running" echo -e "${green}✓${noColor} SSL/TLS: Certificates are valid and properly configured" echo -e "${green}✓${noColor} Mailcow: Configuration appears correct" echo "" echo -e "${blue}Health check completed.${noColor}" } echo -e "${blue}========================================${noColor}" echo -e "${blue} Comprehensive Diagnostics Report${noColor}" echo -e "${blue}========================================${noColor}" echo -e "${yellow}Checking Docker and Docker Compose installation...${noColor}" if command -v docker >/dev/null 2>&1; then echo -e "${green}Docker is installed: $(docker --version)${noColor}" else echo -e "${red}Docker is not installed.${noColor}" exit 1 fi if docker compose version >/dev/null 2>&1; then echo -e "${green}Docker Compose plugin is installed: $(docker compose version)${noColor}" else echo -e "${red}Docker Compose plugin is not installed.${noColor}" exit 1 fi if ! command -v jq >/dev/null 2>&1; then warnStatus "jq not found, some checks will be skipped" fi echo -e "${yellow}Checking system resources...${noColor}" echo -e "${yellow}CPU:${noColor} $(grep -c processor /proc/cpuinfo) cores" echo -e "${yellow}Memory:${noColor} $(free -h | grep Mem | awk '{print $2}')" echo -e "${yellow}Disk space:${noColor} $(df -h / | awk 'NR==2 {print $2}')" loadCoreDomain loadMailcowHostname printSection "Core Service Diagnostics" checkTraefik echo "" checkGitlab echo "" checkDatabases echo "" checkNextcloud echo "" checkAllServices echo "" checkMailcowServices checkSslAndCerts checkMailcowConfig checkAcmeLogs printSummary