15.3. 15.4 Testing and Debugging the Project#
15.3.1. Testing Strategy#
A production system requires comprehensive testing at multiple levels.
15.3.1.1. Unit Testing#
Test individual functions in isolation with mock data.
Test Framework:
# test-lib-functions.sh
source ../lib/logging.sh
source ../lib/arrays.sh
test_array_contains() {
local -a arr=("apple" "banana" "cherry")
# Test: element exists
if array_contains arr "banana"; then
echo "✓ array_contains: element exists"
else
echo "✗ array_contains: element exists"
fi
# Test: element not exists
if ! array_contains arr "grape"; then
echo "✓ array_contains: element not exists"
else
echo "✗ array_contains: element not exists"
fi
}
test_parse_metrics() {
local cpu_line="user:85 system:10 idle:5"
local expected_cpu=95
local actual_cpu=$(extract_cpu_usage "$cpu_line")
if [[ "$actual_cpu" -eq "$expected_cpu" ]]; then
echo "✓ parse_metrics: CPU extraction"
else
echo "✗ parse_metrics: got $actual_cpu, expected $expected_cpu"
fi
}
run_all_tests() {
test_array_contains
test_parse_metrics
# ... more tests
}
Test Categories:
String manipulation (parsing, formatting)
Array operations (sorting, filtering)
Database operations (insert, query, update)
Alert rules (threshold evaluation)
File handling (permissions, contents)
15.3.1.2. Integration Testing#
Test components working together with realistic data.
Setup Mocks:
setup_mock_database() {
sqlite3 /tmp/test-metrics.db < schema.sql
# Insert sample data
sqlite3 /tmp/test-metrics.db <<EOF
INSERT INTO cpu_metrics VALUES (1234567890, 75.5);
INSERT INTO memory_metrics VALUES (1234567890, 88.2);
INSERT INTO disk_metrics VALUES (1234567890, '/', 92.1);
EOF
}
setup_mock_logs() {
mkdir -p /tmp/mock-logs
cat > /tmp/mock-logs/auth.log << 'EOF'
Dec 19 10:15:23 host sshd[1234]: Failed password for admin from 192.168.1.100
Dec 19 10:15:30 host sshd[1235]: Failed password for admin from 192.168.1.100
Dec 19 10:15:45 host sshd[1236]: Accepted password for user from 192.168.1.101
EOF
}
test_integration_collection_to_alerting() {
setup_mock_database
# Run metrics collector
./src/metrics-collector.sh --db /tmp/test-metrics.db
# Run alert engine
./src/alert-engine.sh --db /tmp/test-metrics.db
# Check if alert was triggered
local alert_count=$(sqlite3 /tmp/test-metrics.db "SELECT COUNT(*) FROM alerts WHERE level='WARNING'")
if [[ "$alert_count" -gt 0 ]]; then
echo "✓ Integration: Memory alert triggered for 88.2% usage"
else
echo "✗ Integration: Expected alert not triggered"
fi
}
15.3.1.3. System Testing#
Test against real system data and dependencies.
Environment Setup:
setup_test_environment() {
export TEST_MODE=1
export CONFIG_PATH=/tmp/test-config
export LOG_PATH=/tmp/test-logs
export DB_PATH=/tmp/test-data.db
mkdir -p "$CONFIG_PATH" "$LOG_PATH"
# Copy default configs
cp config/* "$CONFIG_PATH"
# Modify thresholds for testing
sed -i 's/CPU_THRESHOLD=85/CPU_THRESHOLD=50/' "$CONFIG_PATH/thresholds.conf"
}
test_with_real_system_metrics() {
setup_test_environment
# Run collector against real system
./src/metrics-collector.sh
# Verify database has data
local row_count=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM cpu_metrics")
if [[ "$row_count" -gt 0 ]]; then
echo "✓ System test: Metrics collected successfully"
fi
}
test_alert_threshold_triggers() {
# Artificially create high load
stress-ng --cpu 4 --timeout 30s &
# Run collector multiple times
for i in {1..5}; do
sleep 5
./src/metrics-collector.sh
done
# Verify alert was triggered
local critical_alerts=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM alerts WHERE level='CRITICAL'")
if [[ "$critical_alerts" -gt 0 ]]; then
echo "✓ System test: CPU alert triggered under load"
else
echo "✗ System test: Expected CPU alert not triggered"
fi
}
15.3.2. Debugging Tools and Techniques#
15.3.2.1. Debug Mode#
Enable verbose output to trace execution:
# Run with debug flag
./src/metrics-collector.sh --debug
# Inside scripts, use:
set -x # Print every command
set -v # Print every line before executing
15.3.2.2. Logging Strategy#
Implement structured logging with levels:
log() {
local level="$1"
local message="$2"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
if [[ "$DEBUG_MODE" == "1" ]]; then
echo "[$timestamp] [$level] $message" >&2
fi
}
log_debug() { [[ "$DEBUG_MODE" == "1" ]] && log "DEBUG" "$@"; }
log_info() { log "INFO" "$@"; }
log_warning() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }
15.3.2.3. Common Issues and Solutions#
Issue 1: Database Locks
# Problem: "database is locked" errors
# Solution: Set timeout and use WAL mode
sqlite3 "$DB_PATH" "PRAGMA journal_mode=WAL;"
sqlite3 "$DB_PATH" "PRAGMA busy_timeout=5000;"
Issue 2: Process Doesn’t Start
# Debug: Check prerequisites
check_dependencies() {
local deps=("sqlite3" "curl" "awk")
for cmd in "${deps[@]}"; do
if ! command -v "$cmd" &> /dev/null; then
log_error "Missing required command: $cmd"
return 1
fi
done
}
# Run before main logic
check_dependencies || exit 1
Issue 3: Unexpected Alert Triggers
# Debug: Log the values causing alert
debug_alert_evaluation() {
local cpu_usage="$1"
local threshold="$2"
log_debug "CPU check: usage=$cpu_usage, threshold=$threshold"
if (( $(echo "$cpu_usage > $threshold" | bc -l) )); then
log_debug "Alert condition MET"
else
log_debug "Alert condition NOT met"
fi
}
15.3.2.4. Validation Checklist#
Before deploying to production, verify:
validate_system() {
local pass=0
local fail=0
# Check 1: Database initialization
if sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM sqlite_master" &>/dev/null; then
echo "✓ Database initialized"
((pass++))
else
echo "✗ Database not initialized"
((fail++))
fi
# Check 2: Configuration files readable
if [[ -r "$CONFIG_PATH/monitoring-system.conf" ]]; then
echo "✓ Configuration accessible"
((pass++))
else
echo "✗ Configuration not accessible"
((fail++))
fi
# Check 3: Required directories writable
if [[ -w "$LOG_PATH" ]]; then
echo "✓ Log directory writable"
((pass++))
else
echo "✗ Log directory not writable"
((fail++))
fi
# Check 4: External services reachable
if curl -s -m 5 "https://api.slack.com/api/api.test" >/dev/null 2>&1; then
echo "✓ Slack API reachable"
((pass++))
else
echo "✗ Slack API unreachable (non-critical)"
fi
echo "Validation: $pass passed, $fail failed"
[[ "$fail" -eq 0 ]]
}
15.3.3. Continuous Testing#
Automated Test Suite:
#!/bin/bash
# run-tests.sh - Execute all tests
run_unit_tests() {
echo "Running unit tests..."
bash tests/test-lib-functions.sh
}
run_integration_tests() {
echo "Running integration tests..."
bash tests/integration-tests.sh
}
run_system_tests() {
echo "Running system tests..."
bash tests/system-tests.sh
}
run_all() {
run_unit_tests || return 1
run_integration_tests || return 1
run_system_tests || return 1
echo "All tests passed! ✓"
}
run_all "$@"
Run tests on every commit:
# .git/hooks/pre-commit
#!/bin/bash
bash tests/run-tests.sh || exit 1
15.3.4. Debugging in Production#
When issues arise in production, use these techniques:
15.3.4.1. Enable Debug Logging#
# In your main script:
if [[ "${DEBUG_MODE:-0}" == "1" ]]; then
set -x # Print each command
fi
# Run with debug enabled:
DEBUG_MODE=1 bash script.sh
# Output will show:
# + source lib/logging.sh
# + init_database
# + sqlite3 /var/lib/myapp/metrics.db
# [CREATE TABLE...]
15.3.4.2. Trace Execution Flow#
# Add entry/exit logging to functions
my_function() {
local func_name="${FUNCNAME[0]}"
log_debug "→ Entering $func_name"
# Function logic
local result=$?
log_debug "← Exiting $func_name (exit code: $result)"
return $result
}
# This creates a breadcrumb trail showing execution order
15.3.4.3. Check State at Each Step#
# After each operation, verify state
log_debug "Database file: $(ls -lh $DB_PATH)"
log_debug "Recent alerts: $(sqlite3 $DB_PATH 'SELECT COUNT(*) FROM alerts;')"
log_debug "Disk usage: $(df -h / | tail -1)"
15.3.4.4. Use Assertion Functions#
assert() {
if ! "$@"; then
log_error "Assertion failed: $@"
return 1
fi
}
# Usage:
assert [[ -f "$CONFIG_FILE" ]] || { echo "Config missing"; exit 1; }
assert command -v sqlite3 &>/dev/null || { echo "sqlite3 required"; exit 1; }
15.3.5. Performance Profiling#
Measure how long operations take:
#!/bin/bash
# Measure operation timing
time_operation() {
local operation_name="$1"
shift
local start_time=$(date +%s%N)
"$@"
local exit_code=$?
local end_time=$(date +%s%N)
local duration_ms=$(( (end_time - start_time) / 1000000 ))
log_info "$operation_name completed in ${duration_ms}ms"
return $exit_code
}
# Usage:
time_operation "Metrics Collection" ./src/metrics-collector.sh
time_operation "Alert Evaluation" ./src/alert-engine.sh
# Output:
# Metrics Collection completed in 245ms
# Alert Evaluation completed in 127ms
15.3.6. Pre-Deployment Checklist#
Before deploying to production:
#!/bin/bash
# pre-deployment-check.sh
check_prerequisites() {
echo "Checking prerequisites..."
local checks_passed=0
local checks_failed=0
# Check Bash version
if (( BASH_VERSINFO[0] >= 4 )); then
echo "✓ Bash version: ${BASH_VERSION%%.*}"
((checks_passed++))
else
echo "✗ Bash 4.0+ required (you have: $BASH_VERSION)"
((checks_failed++))
fi
# Check required commands
for cmd in sqlite3 curl awk sed; do
if command -v "$cmd" &>/dev/null; then
echo "✓ Command available: $cmd"
((checks_passed++))
else
echo "✗ Command not found: $cmd"
((checks_failed++))
fi
done
# Check file permissions
if [[ -r "$CONFIG_FILE" ]] && [[ -w "${CONFIG_FILE%/*}" ]]; then
echo "✓ Configuration accessible"
((checks_passed++))
else
echo "✗ Configuration not accessible"
((checks_failed++))
fi
# Check database initialization
if sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM sqlite_master;" &>/dev/null; then
echo "✓ Database initialized"
((checks_passed++))
else
echo "✗ Database not initialized"
((checks_failed++))
fi
echo
echo "Results: $checks_passed passed, $checks_failed failed"
[[ "$checks_failed" -eq 0 ]]
}
check_prerequisites
#!/bin/bash
# Example: Running and interpreting tests
# Test execution with verbose output
run_tests_verbose() {
echo "Running tests with verbose output..."
bash tests/test-validation.sh 2>&1 | tee /tmp/test-results.txt
local test_result=$?
if [[ $test_result -eq 0 ]]; then
echo "All tests PASSED ✓"
else
echo "Some tests FAILED ✗"
echo "Review test output above"
fi
return $test_result
}
# Parse test results
parse_test_results() {
local results_file="$1"
local passed=$(grep "^✓" "$results_file" | wc -l)
local failed=$(grep "^✗" "$results_file" | wc -l)
local total=$((passed + failed))
echo "Test Summary:"
echo " Passed: $passed"
echo " Failed: $failed"
echo " Total: $total"
echo " Success Rate: $(awk "BEGIN {printf \"%.1f%%\", ($passed/$total)*100}")";
}
# Run and analyze
run_tests_verbose
parse_test_results /tmp/test-results.txt
Cell In[1], line 6
echo "Running tests with verbose output..."
^
SyntaxError: invalid syntax. Perhaps you forgot a comma?
15.3.7. Writing Effective Tests#
15.3.7.1. Unit Test Example#
#!/bin/bash
# tests/test-validation.sh
source lib/validation.sh
source lib/logging.sh
LOG_FILE="/tmp/test.log"
# Test counter
TESTS_PASSED=0
TESTS_FAILED=0
# Test helper
assert_success() {
local test_name="$1"
local command="$2"
if eval "$command"; then
echo "✓ $test_name"
((TESTS_PASSED++))
else
echo "✗ $test_name"
((TESTS_FAILED++))
fi
}
assert_failure() {
local test_name="$1"
local command="$2"
if ! eval "$command"; then
echo "✓ $test_name"
((TESTS_PASSED++))
else
echo "✗ $test_name"
((TESTS_FAILED++))
fi
}
# Run tests
test_validate_not_empty() {
assert_failure "Empty string fails" "validate_not_empty 'VAR' ''"
assert_success "Non-empty string passes" "validate_not_empty 'VAR' 'value'"
}
test_validate_integer() {
assert_success "Integer passes" "validate_integer 'NUM' '42'"
assert_failure "Non-integer fails" "validate_integer 'NUM' 'abc'"
assert_failure "Float fails" "validate_integer 'NUM' '3.14'"
}
test_validate_directory() {
assert_success "Existing dir passes" "validate_directory '/tmp'"
assert_failure "Nonexistent dir fails" "validate_directory '/nonexistent'"
}
# Run all tests
main() {
echo "Running validation tests..."
test_validate_not_empty
test_validate_integer
test_validate_directory
echo
echo "Results: $TESTS_PASSED passed, $TESTS_FAILED failed"
[[ "$TESTS_FAILED" -eq 0 ]]
}
main "$@"
15.3.7.2. Integration Test Example#
#!/bin/bash
# tests/integration-test.sh
# Test complete workflow
test_complete_workflow() {
echo "Testing complete workflow..."
# Setup
local test_dir="/tmp/integration-test-$$"
mkdir -p "$test_dir"
export LOG_PATH="$test_dir/logs"
export DATA_PATH="$test_dir/data"
mkdir -p "$LOG_PATH" "$DATA_PATH"
# Run collectors
./src/metrics-collector.sh
if [[ $? -ne 0 ]]; then
echo "✗ Metrics collector failed"
return 1
fi
# Verify data was collected
if [[ -f "$DATA_PATH/metrics.db" ]]; then
echo "✓ Metrics database created"
else
echo "✗ Metrics database not created"
return 1
fi
# Run alert engine
./src/alert-engine.sh
if [[ $? -ne 0 ]]; then
echo "✗ Alert engine failed"
return 1
fi
# Generate report
./src/report-generator.sh
if [[ $? -ne 0 ]]; then
echo "✗ Report generator failed"
return 1
fi
# Cleanup
rm -rf "$test_dir"
echo "✓ Complete workflow test passed"
}
test_complete_workflow