#!/usr/bin/sh # # This script is written by Beyan Zhang of Curvature. # And is only applicable to AIX hosts. # Send an email to '[email protected]' for any help. # When executed, a log file will be recorded to '/' by default. # There are 6 items to check, such as 'HARDWARE INFORMATION', # 'HARDWARE STATUS', 'PERFORMANCE', 'FILESYSTEM STATUS', # 'NETWORK STATUS', and 'ERROR LOG ANALYSIS'. # A breakpoint has been set between those two items. # Delete it if needed. host=`hostname` date=`date +%Y-%m-%d_%H:%M` file_name=""$host"_$date" # Directory for storing log files. # Modify it, if needed. log_dir="/" function press_enter { echo "\nPress any key to continue, or CTRL-c to abort." read k reset } # Write a copy of log to target file. function logit { tee -a $log_dir/$file_name } # Usage: disk_status hdisk0 # Status of single hardware, such as "Available", "Defined"... function hardware_status { lsdev -C | grep $1 | awk '{print $2}' } # Hardware info, include hostname SN... press_enter echo "HARDWARE INFORMATION" | logit prtconf | grep -i -e "System Model" -e "Machine Serial Number" \ -e "Processor Type" -e "number of processor" \ -e "cpu type" -e "kernel type" -e "lpar info" \ -e "memory size" -e "good memory size" \ -e "firmware version" | logit echo "Hostname: \c" | logit hostname | logit echo "Number of disks: \c" | logit lsdev -Cc disk | wc -l | awk '{print $1}' | logit echo "Number of local disks: \c" | logit lsdev -Cc disk | grep -i -e "sas" -e "scsi" | wc -l | \ awk '{print $1}' | logit echo "Local disks list: \c" | logit lsdev -Cc disk | grep -i -e "sas" -e "scsi" | \ awk '{print $1}' | xargs | \ sed 's/ /, /g' | logit echo "Uptime: \c" | logit uptime | awk '{print $3}' | \ sed -e 's/,//g' -e 's/:/hours /' -e 's/$/minutes/' | logit echo "Oslevel: \c" | logit oslevel -s | logit echo | logit # Performance press_enter echo "PERFORMANCE" | logit echo "CPU idle: `sar -u 5 1 | tail -1 | \ awk '{print $5}' | xargs`%" | logit free_memory_4k=`vmstat 5 1 | tail -1 | awk '{print $4}' | xargs` memory=`bootinfo -r | xargs` echo "Memory free: $(($free_memory_4k*400/$memory))%" | logit echo | logit # Hardware status, such as disk health, adapter health... press_enter echo "HARDWARE STATUS" | logit for I in Disk Processor Memory Adapter Tape Cdrom Interface do echo "$I status: " | logit I=`echo $I | tr 'A-Z' 'a-z'` if [ $I == 'interface' ]; then I="if" fi lsdev -Cc $I | awk '{print "\t"$1": "$2}' | logit done echo | logit # Filesystem status, such as Volume Group, Physical Volume... press_enter echo "FILESYSTEM STATUS" | logit echo "Filesystem Usage:" | logit echo "Filesystem %Used %Iused Mounted_on" | \ awk '{printf "\t%-15s %6s %8s %-30s\n", \ $1, $2, $3, $4}' | \ logit df -g | grep -v -i "filesystem" | \ awk '{printf "\t%-15s %6s %8s %-30s\n", \ $1, $4, $6, $7}' | logit echo "Filesystem used more than 80%:" | logit for I in `df -g | grep -v -e "Used" -e "-" | \ awk '{print $4}' | sed 's/%//g'` do if [ $I -ge 80 ]; then echo "\t`df -g | grep "$I" | \ awk '{print $7"\t"$4}'`" | logit fi done echo "Filesystem inode used more than 80%:" | logit for I in `df -g | grep -v -e "Used" -e "-" | \ awk '{print $6}' | sed 's/%//g'` do if [ $I -ge 80 ]; then echo "\t`df -g | grep "$I" | awk '{print $7"\t"$6}'`" fi done echo "VG list: \c" | logit lsvg | xargs | sed 's/ /, /g' | logit echo "Actived VG list: \c" | logit lsvg -o | xargs | sed 's/ /, /g' | logit echo "Logic Volumes list and status:" | logit for I in `lsvg -o` do echo "$I:" | logit for J in `lsvg -l $I | grep -e '[0-9]' | \ awk '{print $1}'` do lv_info=`lsvg -l $I | grep -w "$J"` lps=`echo $lv_info | awk '{print $3}'` pps=`echo $lv_info | awk '{print $4}'` if [ $lps -eq $pps ]; then echo $lv_info | \ awk '{printf "\t%-10s %-11s %-14s %-10s\n", \ $1, "Unmirrored", $6, $7}' | logit else echo "\t$I\c" | logit echo $lv_info | \ awk '{printf "\t%-10s %-11s %-14s %-10s\n", \ $1, "Mirrored", $6, $7}' | logit fi done done echo "Physical Volumes list and status:" | logit for I in `lspv | awk '{print $1}'` do lspv | awk '{printf "\t%-10s %-15s %-10s\n", \ $1,$3,$4}' | grep $I | logit done echo "VG free capability:" | logit for I in `lsvg -o` do echo "\t$I: \c" | logit echo `lsvg $I | grep -i "free pps" | \ awk -F "(" '{print $2}' | \ awk '{print $1}'`\\c | \ logit echo "MB" | logit done echo "Boot information:" | logit echo "\tBoot order: \c" | logit bootlist -m normal -o | xargs | sed 's/ /; /g' | logit echo "\tLast time boot from: \c" | logit bootinfo -b | logit echo "Pagint usage:" | logit echo "\tTotal paging space: \c" | logit lsps -s | grep "MB" | awk '{print $1}' | logit echo "\tCurrent paging used: \c" | logit lsps -s | grep "%" | awk '{print $2}' | logit echo "System dump device:" | logit sysdumpdev -l | sed 's/^/ /' | logit echo | logit press_enter echo "NETWORK INFORMATION" | logit for I in `ifconfig -a | grep -i flags | \ awk '{print $1}' | sed 's/://g'` do echo "$I:" | logit ifconfig $I | grep -e "inet.*netmask" | \ awk '{print "\taddress:\t"$2"\n\t"$3":\t"$4"\n\t"$5":\t"$6}' | \ logit done echo "Route information:" | logit netstat -rn | grep -v -i -E "internet|table" | sed '/^$/d' | \ awk '{printf "\t%-15s %-15s %-6s %-6s %6s %-6s\n", \ $1, $2, $3, $4, $5, $6}' | logit echo | logit press_enter cho "ERROR LOG" | logit num_of_errpt=`errpt | wc -l | xargs` echo "Number of error logs: $num_of_errpt times" | logit num_of_diff_errpt=`errpt | grep -v -i "description" | \ awk '{print $6" "$7" "$8" "$9}' | \ sort -n | uniq -c | wc -l | xargs` echo "Number of diffenent error logs: $num_of_diff_errpt times" | \ logit # echo "Briefly:" | logit # errpt | grep -v -i "description" | \ # awk '{print $6" "$7" "$8" "$9" "$10" "$11" "$12}' | \ # sed 's/^/;/' | sort -n | uniq -c | sort -rk 1 | \ # awk -F " ;" '{print "\t"$2":"$1" times"}' | \ # sed 's/ *: */: /g' | logit lines=`errpt | grep -v -i "description" | \ awk '{print $6" "$7" "$8" "$9}' | \ sed 's/$/;/' | sort -n | uniq -c | sort -rk 1 | \ awk '{print $2" "$3" "$4" "$5" "$6" "$7" "$8}' | \ sed 's/ *; */;/g'` # switch data in errpt more readable. function date_switch { year=`echo $1 | cut -c '9-10'` month=`echo $1 | cut -c '1-2'` day=`echo $1 | cut -c '3-4'` hour=`echo $1 | cut -c '5-6'` minute=`echo $1 | cut -c '7-8'` echo "20"$year"-"$month"-"$day" "$hour":"$minute"" } i=1; while [ $i -le $num_of_diff_errpt ] do description=`echo $lines | tr ";" "\n" | sed -n "$i"p | \ sed 's/^ //g'` echo "------------ERROR $i------------" | logit echo "Description: $description" | logit last_time=`errpt | grep -w "$description" | \ head -1 | awk '{print $2}'` first_time=`errpt | grep -w "$description" | \ tail -1 | awk '{print $2}'` echo "Recently report: `date_switch $last_time`" | logit echo "First time report: `date_switch $first_time`" | logit echo "Repeat times: `errpt | grep -w "$description" | \ wc -l | xargs`" | logit error_id=`errpt | grep -w "$description" | head -1 | \ awk '{print $1}'` echo "ID: $error_id" | logit error_type=`errpt -aj "$error_id" | \ grep -i -e "^type" | head -1 | xargs | \ sed 's/ //g' | awk -F ":" '{print $2}'` case $error_type in 'INFO') ps1="The error log entry is informational " ps2="and was not the result of an error." ;; 'UNKN') ps1="It is not possible to determine the " ps2="severity of the error." ;; 'TEMP') ps1="Condition that was recovered from after " ps2="a number of unsuccessful attempts." ;; 'PERM') ps1="Condition that could not be recovered from. " ps2="Maybe you have a defective hardware " ps3="device or software module." ;; 'PERF') ps1="The performance of the device or component " ps2="has degraded to below an acceptable level." ;; 'PEND') ps1="The loss of availability of " ps2="a device or component is imminent." ;; *) ps1="Unknow!!!" ;; esac echo "Type: $error_type ($ps1$ps2$ps3)" | logit error_class=`errpt -aj "$error_id" | \ grep -i -e "^class" | head -1 | xargs | \ sed 's/ //g' | awk -F ":" '{print $2}'` case $error_class in 'H') ps1="Hardware" ;; 'S') ps1="Software" ;; 'O') ps1="Information messages." ;; 'U') ps1="Undetermined (for example, a network)." ;; *) ps1="Unknow!!!" ;; esac echo "Class: $error_class ($ps1)" | logit error_resource_name=`errpt -aj "$error_id" | \ grep -i -w -e "^resource name" | \ head -1 | xargs | sed 's/ //g' | \ awk -F ":" '{print $2}'` echo "Resource Name: $error_resource_name" | logit error_resource_class=`errpt -aj "$error_id" | \ grep -i -w -e "^resource class" | \ head -1 | xargs | sed 's/ //g' | \ awk -F ":" '{print $2}'` echo "Resource Class: $error_resource_class" | logit error_resource_type=`errpt -aj "$error_id" | \ grep -i -w -e "^resource type" | \ head -1 | xargs | sed 's/ //g' | \ awk -F ":" '{print $2}'` echo "Resource Type: $error_resource_type" | logit error_resource_location=`errpt -aj "$error_id" | \ grep -i -w -e "location" | \ tail -1 | xargs | sed 's/ //g' | \ awk -F ":" '{print $2}'` echo "Location: $error_resource_location" | logit error_causes=`errpt -aj $error_id | grep -p -i " causes" | \ sort -rn | uniq | grep -v -i "causes" | \ sed -e '/^$/d' -e 's/$/;/g' | xargs` echo "Failure causes: $error_causes" | logit error_des=`errpt -aj $error_id | grep -i -e "^description:" | \ tail -1 | xargs | awk -F ":" '{print $2}'` echo "Detail: $error_des" | logit error_fru=`errpt -aj $error_id | grep -i -w "fru:" | \ tail -1 | awk '{print $4}' | xargs` echo "Possible FRUs: $error_fru" | logit i=$(($i+1)); done
Preventive Maintenance Script for AIX Hosts
#!/usr/bin/sh # # This script is written by Beyan Zhang of Curvature. # And is only applicable to AIX hosts. # Send an email to '[email protected]' for any help. # When executed, a log file will be recorded to '/' by default. # There are 6 items to check, such as 'HARDWARE INFORMATION', # 'HARDWARE […]