10.4. Process Monitoring & Performance#

10.4.1. Real-World Example: Performance Alert System#

#!/bin/bash

# Alert on performance issues

check_performance() {
  local cpu_threshold=80
  local mem_threshold=85
  local disk_threshold=90
  local load_threshold=4
  
  # CPU check
  local cpu_load=$(uptime | awk -F'load average:' '{print $2}' | awk '{printf "%d", $1}')
  if [[ $cpu_load -gt $load_threshold ]]; then
    echo "🔴 CRITICAL: High CPU load ($cpu_load)"
  fi
  
  # Memory check
  local mem_used=$(free | awk 'NR==2 {printf "%d", $3/$2*100}')
  if [[ $mem_used -gt $mem_threshold ]]; then
    echo "🟠 WARNING: Memory usage at ${mem_used}%"
    echo "  Top memory process: $(ps aux --sort=-%mem | head -2 | tail -1 | awk '{print $11, $4"%"}')"
  fi
  
  # Disk check
  local disk_used=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
  if [[ $disk_used -gt $disk_threshold ]]; then
    echo "🟠 WARNING: Root filesystem at ${disk_used}%"
    echo "  Largest dirs: $(du -sh /* 2>/dev/null | sort -hr | head -3 | awk '{print $2}')"
  fi
  
  # Zombie check
  local zombies=$(ps aux | grep -c "[Z]")
  if [[ $zombies -gt 0 ]]; then
    echo "🟡 NOTE: $zombies zombie processes"
  fi
}

# Run checks
check_performance

10.4.1.1. Crontab Integration#

# Add to crontab to run every 5 minutes
*/5 * * * * /usr/local/bin/check_performance.sh | mail -s "System Alert" admin@example.com

10.4.2. Performance Bottleneck Identification#

10.4.2.1. Identifying Bottleneck Type#

#!/bin/bash

# Comprehensive performance analysis

analyze_performance() {
  echo "=== System Performance Analysis ==="
  
  # Check CPU saturation
  local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}')
  local cpus=$(nproc)
  local cpu_pct=$(echo "$load / $cpus * 100" | bc)
  
  echo "CPU: Load $load / $cpus cores (~${cpu_pct}%)"
  [[ $(echo "$load > $cpus" | bc) -eq 1 ]] && echo "  âš  CPU bound"
  
  # Check memory pressure
  local free=$(free | awk 'NR==2 {print $7}')
  local total=$(free | awk 'NR==2 {print $2}')
  local mem_pct=$(echo "100 - ($free/$total*100)" | bc)
  
  echo "Memory: ${mem_pct}% used"
  [[ $(echo "$mem_pct > 85" | bc) -eq 1 ]] && echo "  âš  Memory pressure"
  
  # Check disk I/O
  local io=$(iostat -x 1 2 | tail -1 | awk '{print $NF}')
  echo "Disk util: ${io}%"
  [[ $(echo "$io > 80" | bc) -eq 1 ]] && echo "  âš  I/O bound"
  
  # Check for zombies
  local zombies=$(ps aux | grep -c "Z")
  [[ $zombies -gt 0 ]] && echo "  âš  Found $zombies zombie processes"
}

analyze_performance

10.4.2.2. Reading iostat for I/O Analysis#

# Install sysstat
apt-get install sysstat

# Show I/O statistics
iostat -x 1 2

# Key columns:
# %util: Device utilization (0-100%)
# r/s, w/s: Reads/writes per second
# rkB/s, wkB/s: Read/write throughput

10.4.3. Load Average and CPU Usage#

10.4.3.1. Understanding Load Average#

Load average = average number of processes in run queue over time period.

# Show load average
uptime
# output: ... load average: 0.45, 0.32, 0.28

# Load for 1, 5, 15 minutes: 0.45, 0.32, 0.28

10.4.3.2. Load Average Interpretation#

  • Single-core system: Load=1 means 100% CPU usage

  • Dual-core system: Load=2 means 100% CPU usage

  • Load > CPU count = Queue of waiting processes

# Check CPU count
nproc
# or
lscpu | grep "^CPU(s)"

# Interpret load
uptime | awk '{
  load=$NF
  cpus=$(nproc)
  print "CPUs: " cpus
  print "Load: " load
  if (load > cpus)
    print "System is CPU bound"
  else if (load > cpus * 0.8)
    print "Approaching saturation"
  else
    print "System healthy"
}'

10.4.3.3. CPU Usage per Process#

# Get top CPU consumers
ps aux --sort=-%cpu | head -10

# Monitor CPU usage over time
while true; do
  echo "$(date): $(ps aux --sort=-%cpu | head -2 | tail -1 | awk '{print $11, $3"%"}')"
  sleep 5
done

10.4.4. htop: Enhanced Process Monitor#

htop is a more user-friendly alternative to top:

10.4.4.1. htop Features#

# Install (may need to install first)
apt-get install htop  # Debian/Ubuntu
brew install htop      # macOS

# Run htop
htop

10.4.4.2. htop Advantages#

  1. Color-coded output - Easier to read

  2. Mouse support - Click to select, scroll

  3. Tree view - Show process hierarchy (t key)

  4. Better sorting - Click headers to sort

  5. Process filtering - Easy user/search filters

  6. Per-CPU view - By default shows all CPUs

  7. Easier killing - Select process, press k

10.4.4.3. htop vs. top#

Feature

top

htop

Included by default

Yes

No

Color output

Limited

Full

Mouse support

No

Yes

User-friendly

Moderate

High

Sorting

Keyboard shortcuts

Click headers

Memory display

Confusing

Clear

CPU usage

Aggregate

Per-core

10.4.5. Interactive top Commands#

While running top, press keys to control it:

Key

Action

h

Help screen

q

Quit

f

Select columns to display

F or O

Sort by different column

u

Filter by username

p

Filter by PID

k

Kill a process (sends SIGTERM)

9

Force kill (sends SIGKILL)

r

Renice (change priority)

1

Toggle CPU info (per-CPU vs. aggregate)

t

Toggle memory display format

m

Toggle memory display mode

z

Toggle color/monochrome

c

Toggle full command line

d or s

Change refresh interval

10.4.5.1. Example Session#

# Start top
top

# Press 'M' to sort by memory
# Press 'u' to filter by user
# Press 'k' to kill a process (prompts for PID)
# Press 'q' to quit

10.4.6. The top Command#

top provides real-time system and process monitoring:

10.4.6.1. Basic top Usage#

# Interactive process monitor
top

# Non-interactive (10 iterations then exit)
top -n 10

# Monitor specific process
top -p 1234

# Sort by memory usage
top -o %MEM

# Refresh every 1 second (default is 3)
top -d 1

10.4.6.2. Understanding top Output#

System Summary (top lines):

  • Load average (1, 5, 15 minute)

  • Number of running/sleeping/stopped processes

  • CPU usage by state (user, system, idle, wait)

  • Memory usage (total, used, free, buffers)

Process List Columns:

  • PID: Process ID

  • USER: Process owner

  • PR: Priority (0-39, lower = higher priority)

  • VIRT: Virtual memory used

  • RES: Physical memory (RSS)

  • S: Process state (R/S/Z/T)

  • %CPU: CPU usage percentage

  • %MEM: Memory usage percentage

  • TIME: CPU time used

  • COMMAND: Process command