15.4 Testing and Debugging the Project

15.3. 15.4 Testing and Debugging the Project#

15.3.1. Testing Strategy#

A production system requires comprehensive testing at multiple levels.

15.3.1.1. Unit Testing#

Test individual functions in isolation with mock data.

Test Framework:

# test-lib-functions.sh
source ../lib/logging.sh
source ../lib/arrays.sh

test_array_contains() {
  local -a arr=("apple" "banana" "cherry")
  
  # Test: element exists
  if array_contains arr "banana"; then
    echo "✓ array_contains: element exists"
  else
    echo "✗ array_contains: element exists"
  fi
  
  # Test: element not exists
  if ! array_contains arr "grape"; then
    echo "✓ array_contains: element not exists"
  else
    echo "✗ array_contains: element not exists"
  fi
}

test_parse_metrics() {
  local cpu_line="user:85 system:10 idle:5"
  local expected_cpu=95
  
  local actual_cpu=$(extract_cpu_usage "$cpu_line")
  
  if [[ "$actual_cpu" -eq "$expected_cpu" ]]; then
    echo "✓ parse_metrics: CPU extraction"
  else
    echo "✗ parse_metrics: got $actual_cpu, expected $expected_cpu"
  fi
}

run_all_tests() {
  test_array_contains
  test_parse_metrics
  # ... more tests
}

Test Categories:

String manipulation (parsing, formatting)
Array operations (sorting, filtering)
Database operations (insert, query, update)
Alert rules (threshold evaluation)
File handling (permissions, contents)

15.3.1.2. Integration Testing#

Test components working together with realistic data.

Setup Mocks:

setup_mock_database() {
  sqlite3 /tmp/test-metrics.db < schema.sql
  
  # Insert sample data
  sqlite3 /tmp/test-metrics.db <<EOF
INSERT INTO cpu_metrics VALUES (1234567890, 75.5);
INSERT INTO memory_metrics VALUES (1234567890, 88.2);
INSERT INTO disk_metrics VALUES (1234567890, '/', 92.1);
EOF
}

setup_mock_logs() {
  mkdir -p /tmp/mock-logs
  
  cat > /tmp/mock-logs/auth.log << 'EOF'
Dec 19 10:15:23 host sshd[1234]: Failed password for admin from 192.168.1.100
Dec 19 10:15:30 host sshd[1235]: Failed password for admin from 192.168.1.100
Dec 19 10:15:45 host sshd[1236]: Accepted password for user from 192.168.1.101
EOF
}

test_integration_collection_to_alerting() {
  setup_mock_database
  
  # Run metrics collector
  ./src/metrics-collector.sh --db /tmp/test-metrics.db
  
  # Run alert engine
  ./src/alert-engine.sh --db /tmp/test-metrics.db
  
  # Check if alert was triggered
  local alert_count=$(sqlite3 /tmp/test-metrics.db "SELECT COUNT(*) FROM alerts WHERE level='WARNING'")
  
  if [[ "$alert_count" -gt 0 ]]; then
    echo "✓ Integration: Memory alert triggered for 88.2% usage"
  else
    echo "✗ Integration: Expected alert not triggered"
  fi
}

15.3.1.3. System Testing#

Test against real system data and dependencies.

Environment Setup:

setup_test_environment() {
  export TEST_MODE=1
  export CONFIG_PATH=/tmp/test-config
  export LOG_PATH=/tmp/test-logs
  export DB_PATH=/tmp/test-data.db
  
  mkdir -p "$CONFIG_PATH" "$LOG_PATH"
  
  # Copy default configs
  cp config/* "$CONFIG_PATH"
  
  # Modify thresholds for testing
  sed -i 's/CPU_THRESHOLD=85/CPU_THRESHOLD=50/' "$CONFIG_PATH/thresholds.conf"
}

test_with_real_system_metrics() {
  setup_test_environment
  
  # Run collector against real system
  ./src/metrics-collector.sh
  
  # Verify database has data
  local row_count=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM cpu_metrics")
  if [[ "$row_count" -gt 0 ]]; then
    echo "✓ System test: Metrics collected successfully"
  fi
}

test_alert_threshold_triggers() {
  # Artificially create high load
  stress-ng --cpu 4 --timeout 30s &
  
  # Run collector multiple times
  for i in {1..5}; do
    sleep 5
    ./src/metrics-collector.sh
  done
  
  # Verify alert was triggered
  local critical_alerts=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM alerts WHERE level='CRITICAL'")
  if [[ "$critical_alerts" -gt 0 ]]; then
    echo "✓ System test: CPU alert triggered under load"
  else
    echo "✗ System test: Expected CPU alert not triggered"
  fi
}

15.3.2. Debugging Tools and Techniques#

15.3.2.1. Debug Mode#

Enable verbose output to trace execution:

# Run with debug flag
./src/metrics-collector.sh --debug

# Inside scripts, use:
set -x  # Print every command
set -v  # Print every line before executing

15.3.2.2. Logging Strategy#

Implement structured logging with levels:

log() {
  local level="$1"
  local message="$2"
  local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
  
  echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
  
  if [[ "$DEBUG_MODE" == "1" ]]; then
    echo "[$timestamp] [$level] $message" >&2
  fi
}

log_debug() { [[ "$DEBUG_MODE" == "1" ]] && log "DEBUG" "$@"; }
log_info() { log "INFO" "$@"; }
log_warning() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }

15.3.2.3. Common Issues and Solutions#

Issue 1: Database Locks

# Problem: "database is locked" errors
# Solution: Set timeout and use WAL mode
sqlite3 "$DB_PATH" "PRAGMA journal_mode=WAL;"
sqlite3 "$DB_PATH" "PRAGMA busy_timeout=5000;"

Issue 2: Process Doesn’t Start

# Debug: Check prerequisites
check_dependencies() {
  local deps=("sqlite3" "curl" "awk")
  for cmd in "${deps[@]}"; do
    if ! command -v "$cmd" &> /dev/null; then
      log_error "Missing required command: $cmd"
      return 1
    fi
  done
}

# Run before main logic
check_dependencies || exit 1

Issue 3: Unexpected Alert Triggers

# Debug: Log the values causing alert
debug_alert_evaluation() {
  local cpu_usage="$1"
  local threshold="$2"
  
  log_debug "CPU check: usage=$cpu_usage, threshold=$threshold"
  
  if (( $(echo "$cpu_usage > $threshold" | bc -l) )); then
    log_debug "Alert condition MET"
  else
    log_debug "Alert condition NOT met"
  fi
}

15.3.2.4. Validation Checklist#

Before deploying to production, verify:

validate_system() {
  local pass=0
  local fail=0
  
  # Check 1: Database initialization
  if sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM sqlite_master" &>/dev/null; then
    echo "✓ Database initialized"
    ((pass++))
  else
    echo "✗ Database not initialized"
    ((fail++))
  fi
  
  # Check 2: Configuration files readable
  if [[ -r "$CONFIG_PATH/monitoring-system.conf" ]]; then
    echo "✓ Configuration accessible"
    ((pass++))
  else
    echo "✗ Configuration not accessible"
    ((fail++))
  fi
  
  # Check 3: Required directories writable
  if [[ -w "$LOG_PATH" ]]; then
    echo "✓ Log directory writable"
    ((pass++))
  else
    echo "✗ Log directory not writable"
    ((fail++))
  fi
  
  # Check 4: External services reachable
  if curl -s -m 5 "https://api.slack.com/api/api.test" >/dev/null 2>&1; then
    echo "✓ Slack API reachable"
    ((pass++))
  else
    echo "✗ Slack API unreachable (non-critical)"
  fi
  
  echo "Validation: $pass passed, $fail failed"
  
  [[ "$fail" -eq 0 ]]
}

15.3.3. Continuous Testing#

Automated Test Suite:

#!/bin/bash
# run-tests.sh - Execute all tests

run_unit_tests() {
  echo "Running unit tests..."
  bash tests/test-lib-functions.sh
}

run_integration_tests() {
  echo "Running integration tests..."
  bash tests/integration-tests.sh
}

run_system_tests() {
  echo "Running system tests..."
  bash tests/system-tests.sh
}

run_all() {
  run_unit_tests || return 1
  run_integration_tests || return 1
  run_system_tests || return 1
  
  echo "All tests passed! ✓"
}

run_all "$@"

Run tests on every commit:

# .git/hooks/pre-commit
#!/bin/bash
bash tests/run-tests.sh || exit 1

15.3.4. Debugging in Production#

When issues arise in production, use these techniques:

15.3.4.1. Enable Debug Logging#

# In your main script:
if [[ "${DEBUG_MODE:-0}" == "1" ]]; then
  set -x  # Print each command
fi

# Run with debug enabled:
DEBUG_MODE=1 bash script.sh

# Output will show:
# + source lib/logging.sh
# + init_database
# + sqlite3 /var/lib/myapp/metrics.db
# [CREATE TABLE...]

15.3.4.2. Trace Execution Flow#

# Add entry/exit logging to functions
my_function() {
  local func_name="${FUNCNAME[0]}"
  log_debug "→ Entering $func_name"
  
  # Function logic
  local result=$?
  
  log_debug "← Exiting $func_name (exit code: $result)"
  return $result
}

# This creates a breadcrumb trail showing execution order

15.3.4.3. Check State at Each Step#

# After each operation, verify state
log_debug "Database file: $(ls -lh $DB_PATH)"
log_debug "Recent alerts: $(sqlite3 $DB_PATH 'SELECT COUNT(*) FROM alerts;')"
log_debug "Disk usage: $(df -h / | tail -1)"

15.3.4.4. Use Assertion Functions#

assert() {
  if ! "$@"; then
    log_error "Assertion failed: $@"
    return 1
  fi
}

# Usage:
assert [[ -f "$CONFIG_FILE" ]] || { echo "Config missing"; exit 1; }
assert command -v sqlite3 &>/dev/null || { echo "sqlite3 required"; exit 1; }

15.3.5. Performance Profiling#

Measure how long operations take:

#!/bin/bash
# Measure operation timing

time_operation() {
  local operation_name="$1"
  shift
  
  local start_time=$(date +%s%N)
  
  "$@"
  local exit_code=$?
  
  local end_time=$(date +%s%N)
  local duration_ms=$(( (end_time - start_time) / 1000000 ))
  
  log_info "$operation_name completed in ${duration_ms}ms"
  
  return $exit_code
}

# Usage:
time_operation "Metrics Collection" ./src/metrics-collector.sh
time_operation "Alert Evaluation" ./src/alert-engine.sh

# Output:
# Metrics Collection completed in 245ms
# Alert Evaluation completed in 127ms

15.3.6. Pre-Deployment Checklist#

Before deploying to production:

#!/bin/bash
# pre-deployment-check.sh

check_prerequisites() {
  echo "Checking prerequisites..."
  
  local checks_passed=0
  local checks_failed=0
  
  # Check Bash version
  if (( BASH_VERSINFO[0] >= 4 )); then
    echo "✓ Bash version: ${BASH_VERSION%%.*}"
    ((checks_passed++))
  else
    echo "✗ Bash 4.0+ required (you have: $BASH_VERSION)"
    ((checks_failed++))
  fi
  
  # Check required commands
  for cmd in sqlite3 curl awk sed; do
    if command -v "$cmd" &>/dev/null; then
      echo "✓ Command available: $cmd"
      ((checks_passed++))
    else
      echo "✗ Command not found: $cmd"
      ((checks_failed++))
    fi
  done
  
  # Check file permissions
  if [[ -r "$CONFIG_FILE" ]] && [[ -w "${CONFIG_FILE%/*}" ]]; then
    echo "✓ Configuration accessible"
    ((checks_passed++))
  else
    echo "✗ Configuration not accessible"
    ((checks_failed++))
  fi
  
  # Check database initialization
  if sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM sqlite_master;" &>/dev/null; then
    echo "✓ Database initialized"
    ((checks_passed++))
  else
    echo "✗ Database not initialized"
    ((checks_failed++))
  fi
  
  echo
  echo "Results: $checks_passed passed, $checks_failed failed"
  
  [[ "$checks_failed" -eq 0 ]]
}

check_prerequisites

#!/bin/bash
# Example: Running and interpreting tests

# Test execution with verbose output
run_tests_verbose() {
  echo "Running tests with verbose output..."
  
  bash tests/test-validation.sh 2>&1 | tee /tmp/test-results.txt
  
  local test_result=$?
  
  if [[ $test_result -eq 0 ]]; then
    echo "All tests PASSED ✓"
  else
    echo "Some tests FAILED ✗"
    echo "Review test output above"
  fi
  
  return $test_result
}

# Parse test results
parse_test_results() {
  local results_file="$1"
  
  local passed=$(grep "^✓" "$results_file" | wc -l)
  local failed=$(grep "^✗" "$results_file" | wc -l)
  local total=$((passed + failed))
  
  echo "Test Summary:"
  echo "  Passed: $passed"
  echo "  Failed: $failed"
  echo "  Total:  $total"
  echo "  Success Rate: $(awk "BEGIN {printf \"%.1f%%\", ($passed/$total)*100}")";
}

# Run and analyze
run_tests_verbose
parse_test_results /tmp/test-results.txt

  Cell In[1], line 6
    echo "Running tests with verbose output..."
         ^
SyntaxError: invalid syntax. Perhaps you forgot a comma?

15.3.7. Writing Effective Tests#

15.3.7.1. Unit Test Example#

#!/bin/bash
# tests/test-validation.sh

source lib/validation.sh
source lib/logging.sh

LOG_FILE="/tmp/test.log"

# Test counter
TESTS_PASSED=0
TESTS_FAILED=0

# Test helper
assert_success() {
  local test_name="$1"
  local command="$2"
  
  if eval "$command"; then
    echo "✓ $test_name"
    ((TESTS_PASSED++))
  else
    echo "✗ $test_name"
    ((TESTS_FAILED++))
  fi
}

assert_failure() {
  local test_name="$1"
  local command="$2"
  
  if ! eval "$command"; then
    echo "✓ $test_name"
    ((TESTS_PASSED++))
  else
    echo "✗ $test_name"
    ((TESTS_FAILED++))
  fi
}

# Run tests
test_validate_not_empty() {
  assert_failure "Empty string fails" "validate_not_empty 'VAR' ''"
  assert_success "Non-empty string passes" "validate_not_empty 'VAR' 'value'"
}

test_validate_integer() {
  assert_success "Integer passes" "validate_integer 'NUM' '42'"
  assert_failure "Non-integer fails" "validate_integer 'NUM' 'abc'"
  assert_failure "Float fails" "validate_integer 'NUM' '3.14'"
}

test_validate_directory() {
  assert_success "Existing dir passes" "validate_directory '/tmp'"
  assert_failure "Nonexistent dir fails" "validate_directory '/nonexistent'"
}

# Run all tests
main() {
  echo "Running validation tests..."
  test_validate_not_empty
  test_validate_integer
  test_validate_directory
  
  echo
  echo "Results: $TESTS_PASSED passed, $TESTS_FAILED failed"
  
  [[ "$TESTS_FAILED" -eq 0 ]]
}

main "$@"

15.3.7.2. Integration Test Example#

#!/bin/bash
# tests/integration-test.sh

# Test complete workflow
test_complete_workflow() {
  echo "Testing complete workflow..."
  
  # Setup
  local test_dir="/tmp/integration-test-$$"
  mkdir -p "$test_dir"
  
  export LOG_PATH="$test_dir/logs"
  export DATA_PATH="$test_dir/data"
  mkdir -p "$LOG_PATH" "$DATA_PATH"
  
  # Run collectors
  ./src/metrics-collector.sh
  if [[ $? -ne 0 ]]; then
    echo "✗ Metrics collector failed"
    return 1
  fi
  
  # Verify data was collected
  if [[ -f "$DATA_PATH/metrics.db" ]]; then
    echo "✓ Metrics database created"
  else
    echo "✗ Metrics database not created"
    return 1
  fi
  
  # Run alert engine
  ./src/alert-engine.sh
  if [[ $? -ne 0 ]]; then
    echo "✗ Alert engine failed"
    return 1
  fi
  
  # Generate report
  ./src/report-generator.sh
  if [[ $? -ne 0 ]]; then
    echo "✗ Report generator failed"
    return 1
  fi
  
  # Cleanup
  rm -rf "$test_dir"
  
  echo "✓ Complete workflow test passed"
}

test_complete_workflow