10.4. Process Monitoring & Performance#
10.4.1. Real-World Example: Performance Alert System#
#!/bin/bash
# Alert on performance issues
check_performance() {
local cpu_threshold=80
local mem_threshold=85
local disk_threshold=90
local load_threshold=4
# CPU check
local cpu_load=$(uptime | awk -F'load average:' '{print $2}' | awk '{printf "%d", $1}')
if [[ $cpu_load -gt $load_threshold ]]; then
echo "🔴 CRITICAL: High CPU load ($cpu_load)"
fi
# Memory check
local mem_used=$(free | awk 'NR==2 {printf "%d", $3/$2*100}')
if [[ $mem_used -gt $mem_threshold ]]; then
echo "🟠WARNING: Memory usage at ${mem_used}%"
echo " Top memory process: $(ps aux --sort=-%mem | head -2 | tail -1 | awk '{print $11, $4"%"}')"
fi
# Disk check
local disk_used=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
if [[ $disk_used -gt $disk_threshold ]]; then
echo "🟠WARNING: Root filesystem at ${disk_used}%"
echo " Largest dirs: $(du -sh /* 2>/dev/null | sort -hr | head -3 | awk '{print $2}')"
fi
# Zombie check
local zombies=$(ps aux | grep -c "[Z]")
if [[ $zombies -gt 0 ]]; then
echo "🟡 NOTE: $zombies zombie processes"
fi
}
# Run checks
check_performance
10.4.1.1. Crontab Integration#
# Add to crontab to run every 5 minutes
*/5 * * * * /usr/local/bin/check_performance.sh | mail -s "System Alert" admin@example.com
10.4.2. Performance Bottleneck Identification#
10.4.2.1. Identifying Bottleneck Type#
#!/bin/bash
# Comprehensive performance analysis
analyze_performance() {
echo "=== System Performance Analysis ==="
# Check CPU saturation
local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}')
local cpus=$(nproc)
local cpu_pct=$(echo "$load / $cpus * 100" | bc)
echo "CPU: Load $load / $cpus cores (~${cpu_pct}%)"
[[ $(echo "$load > $cpus" | bc) -eq 1 ]] && echo " âš CPU bound"
# Check memory pressure
local free=$(free | awk 'NR==2 {print $7}')
local total=$(free | awk 'NR==2 {print $2}')
local mem_pct=$(echo "100 - ($free/$total*100)" | bc)
echo "Memory: ${mem_pct}% used"
[[ $(echo "$mem_pct > 85" | bc) -eq 1 ]] && echo " âš Memory pressure"
# Check disk I/O
local io=$(iostat -x 1 2 | tail -1 | awk '{print $NF}')
echo "Disk util: ${io}%"
[[ $(echo "$io > 80" | bc) -eq 1 ]] && echo " âš I/O bound"
# Check for zombies
local zombies=$(ps aux | grep -c "Z")
[[ $zombies -gt 0 ]] && echo " âš Found $zombies zombie processes"
}
analyze_performance
10.4.2.2. Reading iostat for I/O Analysis#
# Install sysstat
apt-get install sysstat
# Show I/O statistics
iostat -x 1 2
# Key columns:
# %util: Device utilization (0-100%)
# r/s, w/s: Reads/writes per second
# rkB/s, wkB/s: Read/write throughput
10.4.3. Load Average and CPU Usage#
10.4.3.1. Understanding Load Average#
Load average = average number of processes in run queue over time period.
# Show load average
uptime
# output: ... load average: 0.45, 0.32, 0.28
# Load for 1, 5, 15 minutes: 0.45, 0.32, 0.28
10.4.3.2. Load Average Interpretation#
Single-core system: Load=1 means 100% CPU usage
Dual-core system: Load=2 means 100% CPU usage
Load > CPU count = Queue of waiting processes
# Check CPU count
nproc
# or
lscpu | grep "^CPU(s)"
# Interpret load
uptime | awk '{
load=$NF
cpus=$(nproc)
print "CPUs: " cpus
print "Load: " load
if (load > cpus)
print "System is CPU bound"
else if (load > cpus * 0.8)
print "Approaching saturation"
else
print "System healthy"
}'
10.4.3.3. CPU Usage per Process#
# Get top CPU consumers
ps aux --sort=-%cpu | head -10
# Monitor CPU usage over time
while true; do
echo "$(date): $(ps aux --sort=-%cpu | head -2 | tail -1 | awk '{print $11, $3"%"}')"
sleep 5
done
10.4.4. htop: Enhanced Process Monitor#
htop is a more user-friendly alternative to top:
10.4.4.1. htop Features#
# Install (may need to install first)
apt-get install htop # Debian/Ubuntu
brew install htop # macOS
# Run htop
htop
10.4.4.2. htop Advantages#
Color-coded output - Easier to read
Mouse support - Click to select, scroll
Tree view - Show process hierarchy (
tkey)Better sorting - Click headers to sort
Process filtering - Easy user/search filters
Per-CPU view - By default shows all CPUs
Easier killing - Select process, press
k
10.4.4.3. htop vs. top#
Feature |
top |
htop |
|---|---|---|
Included by default |
Yes |
No |
Color output |
Limited |
Full |
Mouse support |
No |
Yes |
User-friendly |
Moderate |
High |
Sorting |
Keyboard shortcuts |
Click headers |
Memory display |
Confusing |
Clear |
CPU usage |
Aggregate |
Per-core |
10.4.5. Interactive top Commands#
While running top, press keys to control it:
Key |
Action |
|---|---|
|
Help screen |
|
Quit |
|
Select columns to display |
|
Sort by different column |
|
Filter by username |
|
Filter by PID |
|
Kill a process (sends SIGTERM) |
|
Force kill (sends SIGKILL) |
|
Renice (change priority) |
|
Toggle CPU info (per-CPU vs. aggregate) |
|
Toggle memory display format |
|
Toggle memory display mode |
|
Toggle color/monochrome |
|
Toggle full command line |
|
Change refresh interval |
10.4.5.1. Example Session#
# Start top
top
# Press 'M' to sort by memory
# Press 'u' to filter by user
# Press 'k' to kill a process (prompts for PID)
# Press 'q' to quit
10.4.6. The top Command#
top provides real-time system and process monitoring:
10.4.6.1. Basic top Usage#
# Interactive process monitor
top
# Non-interactive (10 iterations then exit)
top -n 10
# Monitor specific process
top -p 1234
# Sort by memory usage
top -o %MEM
# Refresh every 1 second (default is 3)
top -d 1
10.4.6.2. Understanding top Output#
System Summary (top lines):
Load average (1, 5, 15 minute)
Number of running/sleeping/stopped processes
CPU usage by state (user, system, idle, wait)
Memory usage (total, used, free, buffers)
Process List Columns:
PID: Process IDUSER: Process ownerPR: Priority (0-39, lower = higher priority)VIRT: Virtual memory usedRES: Physical memory (RSS)S: Process state (R/S/Z/T)%CPU: CPU usage percentage%MEM: Memory usage percentageTIME: CPU time usedCOMMAND: Process command