北肙

当你不能够再拥有,唯一可以做的,就是令自己不要忘记。

Preventive Maintenance Script for AIX Hosts

#!/usr/bin/sh # # This script is written by Beyan Zhang of Curvature. # And is only applicable to AIX hosts. # Send an email to '[email protected]' for any help. # When executed, a log file will be recorded to '/' by default. # There are 6 items to check, such as 'HARDWARE INFORMATION',  # 'HARDWARE […]
#!/usr/bin/sh
#
# This script is written by Beyan Zhang of Curvature.
# And is only applicable to AIX hosts.
# Send an email to '[email protected]' for any help.
# When executed, a log file will be recorded to '/' by default.
# There are 6 items to check, such as 'HARDWARE INFORMATION', 
# 'HARDWARE STATUS', 'PERFORMANCE', 'FILESYSTEM STATUS', 
# 'NETWORK STATUS', and 'ERROR LOG ANALYSIS'. 
# A breakpoint has been set between those two items.
# Delete it if needed.
host=`hostname`
date=`date +%Y-%m-%d_%H:%M`
file_name=""$host"_$date"
# Directory for storing log files.
# Modify it, if needed.
log_dir="/"
function press_enter
{
	echo "\nPress any key to continue, or CTRL-c to abort."
	read k
	reset
}
# Write a copy of log to target file.
function logit
{
        tee -a $log_dir/$file_name
}
# Usage: disk_status hdisk0
# Status of single hardware, such as "Available", "Defined"...
function hardware_status
{
        lsdev -C | grep $1 | awk '{print $2}'
}
# Hardware info, include hostname SN...
press_enter
echo "HARDWARE INFORMATION" | logit
prtconf | grep -i -e "System Model" -e "Machine Serial Number" \
        -e "Processor Type" -e "number of processor" \
        -e "cpu type" -e "kernel type" -e "lpar info" \
        -e "memory size" -e "good memory size" \
        -e "firmware version" | logit
echo "Hostname: \c" | logit
hostname | logit
echo "Number of disks: \c" | logit
lsdev -Cc disk | wc -l | awk '{print $1}' | logit
echo "Number of local disks: \c" | logit
lsdev -Cc disk | grep -i -e "sas" -e "scsi" | wc -l | \
        awk '{print $1}' | logit
echo "Local disks list: \c" | logit
lsdev -Cc disk | grep -i -e "sas" -e "scsi" | \
	awk '{print $1}' | xargs | \
	sed 's/ /, /g' | logit
echo "Uptime: \c" | logit
uptime | awk '{print $3}' | \
	sed -e 's/,//g' -e 's/:/hours /' -e 's/$/minutes/' | logit
echo "Oslevel: \c" | logit
oslevel -s | logit
echo | logit
# Performance 
press_enter
echo "PERFORMANCE" | logit
echo "CPU idle: `sar -u 5 1 | tail -1 | \
	awk '{print $5}' | xargs`%" | logit
free_memory_4k=`vmstat 5 1 | tail -1 | awk '{print $4}' | xargs`
memory=`bootinfo -r | xargs`
echo "Memory free: $(($free_memory_4k*400/$memory))%" | logit
echo | logit
# Hardware status, such as disk health, adapter health...
press_enter
echo "HARDWARE STATUS" | logit
for I in Disk Processor Memory Adapter Tape Cdrom Interface
do
	echo "$I status: " | logit
	I=`echo $I | tr 'A-Z' 'a-z'`
	if [ $I == 'interface' ]; then
		I="if"
	fi
	lsdev -Cc $I | awk '{print "\t"$1": "$2}' | logit
done 
echo | logit
# Filesystem status, such as Volume Group, Physical Volume...
press_enter
echo "FILESYSTEM STATUS" | logit
echo "Filesystem Usage:" | logit
echo "Filesystem %Used %Iused Mounted_on" | \
	awk '{printf "\t%-15s %6s %8s %-30s\n", \
	$1, $2, $3, $4}' | \
	logit
df -g | grep -v -i "filesystem" | \
	awk '{printf "\t%-15s %6s %8s %-30s\n", \
	$1, $4, $6, $7}' | logit
echo "Filesystem used more than 80%:" | logit
for I in `df -g | grep -v -e "Used" -e "-" | \
	awk '{print $4}' | sed 's/%//g'`
do
	if [ $I -ge 80 ]; then
		echo "\t`df -g | grep "$I" | \
		awk '{print $7"\t"$4}'`" | logit
	fi
done
echo "Filesystem inode used more than 80%:" | logit
for I in `df -g | grep -v -e "Used" -e "-" | \
	awk '{print $6}' | sed 's/%//g'`
do
	if [ $I -ge 80 ]; then
		echo "\t`df -g | grep "$I" | awk '{print $7"\t"$6}'`"
	fi
done
echo "VG list: \c" | logit
lsvg | xargs | sed 's/ /, /g' | logit
echo "Actived VG list: \c" | logit
lsvg -o | xargs | sed 's/ /, /g' | logit
echo "Logic Volumes list and status:" | logit
for I in `lsvg -o`
do
	echo "$I:" | logit
	for J in `lsvg -l $I | grep -e '[0-9]' | \
		awk '{print $1}'`
	do
		lv_info=`lsvg -l $I | grep -w "$J"`
		lps=`echo $lv_info | awk '{print $3}'`
		pps=`echo $lv_info | awk '{print $4}'`
		if [ $lps -eq $pps ]; then
			echo $lv_info | \
			awk '{printf "\t%-10s %-11s %-14s %-10s\n", \
			$1, "Unmirrored", $6, $7}' | logit
		else
			echo "\t$I\c" | logit
			echo $lv_info | \
			awk '{printf "\t%-10s %-11s %-14s %-10s\n", \
				$1, "Mirrored", $6, $7}' | logit
		fi
	done
done
echo "Physical Volumes list and status:" | logit
for I in `lspv | awk '{print $1}'`
do
	lspv | awk '{printf "\t%-10s %-15s %-10s\n", \
		$1,$3,$4}' | grep $I | logit
done
echo "VG free capability:" | logit
for I in `lsvg -o`
do
	echo "\t$I: \c" | logit
	echo `lsvg $I | grep -i "free pps" | \
		awk -F "(" '{print $2}' | \
		awk '{print $1}'`\\c | \
		logit
	echo "MB" | logit
done
echo "Boot information:" | logit
echo "\tBoot order: \c" | logit
bootlist -m normal -o | xargs | sed 's/ /; /g' | logit
echo "\tLast time boot from: \c" | logit
bootinfo -b | logit
echo "Pagint usage:" | logit
echo "\tTotal paging space: \c" | logit
lsps -s | grep "MB" | awk '{print $1}' | logit
echo "\tCurrent paging used: \c" | logit
lsps -s | grep "%" | awk '{print $2}' | logit
echo "System dump device:" | logit
sysdumpdev -l | sed 's/^/        /' | logit
echo | logit
press_enter
echo "NETWORK INFORMATION" | logit
for I in `ifconfig -a | grep -i flags | \
	awk '{print $1}' | sed 's/://g'`
do
	echo "$I:" | logit
	ifconfig $I | grep -e "inet.*netmask" | \
	awk '{print "\taddress:\t"$2"\n\t"$3":\t"$4"\n\t"$5":\t"$6}' | \
		logit 
done
echo "Route information:" | logit
netstat -rn | grep -v -i -E "internet|table" | sed '/^$/d' | \
	awk '{printf "\t%-15s %-15s %-6s %-6s %6s %-6s\n", \
	$1, $2, $3, $4, $5, $6}' | logit
echo | logit
press_enter
cho "ERROR LOG" | logit
num_of_errpt=`errpt | wc -l | xargs`
echo "Number of error logs: $num_of_errpt times" | logit
num_of_diff_errpt=`errpt | grep -v -i "description" | \
	awk '{print $6" "$7" "$8" "$9}' | \
	sort -n | uniq -c | wc -l | xargs`
echo "Number of diffenent error logs: $num_of_diff_errpt times" | \
	logit
# echo "Briefly:" | logit
# errpt | grep -v -i "description" | \
#	awk '{print $6" "$7" "$8" "$9" "$10" "$11" "$12}' | \
#	sed 's/^/;/' | sort -n | uniq -c | sort -rk 1 | \
#	awk -F " ;" '{print "\t"$2":"$1" times"}' | \
#	sed 's/ *: */: /g' | logit
lines=`errpt | grep -v -i "description" | \
        awk '{print $6" "$7" "$8" "$9}' | \
        sed 's/$/;/' | sort -n | uniq -c | sort -rk 1 | \
	awk '{print $2" "$3" "$4" "$5" "$6" "$7" "$8}' | \
	sed 's/ *; */;/g'`
# switch data in errpt more readable.
function date_switch
{
	year=`echo $1 | cut -c '9-10'`
	month=`echo $1 | cut -c '1-2'`
	day=`echo $1 | cut -c '3-4'`
	hour=`echo $1 | cut -c '5-6'`
	minute=`echo $1 | cut -c '7-8'` 
	echo "20"$year"-"$month"-"$day" "$hour":"$minute""
}
i=1;
while [ $i -le $num_of_diff_errpt ]
do
	description=`echo $lines | tr ";" "\n" | sed -n "$i"p | \
		sed 's/^ //g'`
	echo "------------ERROR $i------------" | logit
	echo "Description: $description" | logit
	last_time=`errpt | grep -w "$description" | \
		head -1 | awk '{print $2}'`
	first_time=`errpt | grep -w "$description" | \
		tail -1 | awk '{print $2}'`
	echo "Recently report: `date_switch $last_time`" | logit
	echo "First time report: `date_switch $first_time`" | logit
	echo "Repeat times: `errpt | grep -w "$description" | \
		wc -l | xargs`" | logit
	error_id=`errpt | grep -w "$description" | head -1 | \
		awk '{print $1}'`
	echo "ID: $error_id" | logit
	error_type=`errpt -aj "$error_id" | \
		grep -i -e "^type" | head -1 | xargs | \
		sed 's/ //g' | awk -F ":" '{print $2}'`
	case $error_type in
		'INFO')
		ps1="The error log entry is informational "
		ps2="and was not the result of an error."
		;;
		'UNKN')
		ps1="It is not possible to determine the "
		ps2="severity of the error."
		;;
		'TEMP')
		ps1="Condition that was recovered from after "
		ps2="a number of unsuccessful attempts."
		;;
		'PERM')
		ps1="Condition that could not be recovered from. "
		ps2="Maybe you have a defective hardware "
		ps3="device or software module."
		;;
		'PERF')
		ps1="The performance of the device or component " 
		ps2="has degraded to below an acceptable level."
		;;
		'PEND')
		ps1="The loss of availability of "
		ps2="a device or component is imminent."
		;;
		*)
		ps1="Unknow!!!"
		;;
	esac	 
	echo "Type: $error_type ($ps1$ps2$ps3)" | logit 
	error_class=`errpt -aj "$error_id" | \
		grep -i -e "^class" | head -1 | xargs | \
		sed 's/ //g' | awk -F ":" '{print $2}'`
	case $error_class in 
		'H')
		ps1="Hardware"
		;;
		'S')
		ps1="Software"
		;;
		'O')
		ps1="Information messages."
		;;
		'U')
		ps1="Undetermined (for example, a network)."
		;;
		*)
		ps1="Unknow!!!"
		;;
	esac
	echo "Class: $error_class ($ps1)" | logit
	error_resource_name=`errpt -aj "$error_id" | \
		grep -i -w -e "^resource name" | \
		head -1 | xargs | sed 's/ //g' | \
		awk -F ":" '{print $2}'`
	echo "Resource Name: $error_resource_name" | logit
	error_resource_class=`errpt -aj "$error_id" | \
		grep -i -w -e "^resource class" | \
		head -1 | xargs | sed 's/ //g' | \
		awk -F ":" '{print $2}'`
	echo "Resource Class: $error_resource_class" | logit
	error_resource_type=`errpt -aj "$error_id" | \
		grep -i -w -e "^resource type" | \
		head -1 | xargs | sed 's/ //g' | \
		awk -F ":" '{print $2}'`
	echo "Resource Type: $error_resource_type" | logit
	error_resource_location=`errpt -aj "$error_id" | \
		grep -i -w -e "location" | \
		tail -1 | xargs | sed 's/ //g' | \
		awk -F ":" '{print $2}'`
	echo "Location: $error_resource_location" | logit
	error_causes=`errpt -aj $error_id | grep -p -i " causes" | \
		sort -rn | uniq | grep -v -i "causes" | \
		sed -e '/^$/d' -e 's/$/;/g' | xargs`
	echo "Failure causes: $error_causes" | logit
	error_des=`errpt -aj $error_id | grep -i -e "^description:" | \
		tail -1 | xargs | awk -F ":" '{print $2}'`
	echo "Detail: $error_des" | logit
	error_fru=`errpt -aj $error_id | grep -i -w "fru:" | \
		tail -1 | awk '{print $4}' | xargs`
	echo "Possible FRUs: $error_fru" | logit
	i=$(($i+1));
done

Leave a Reply

Your email address will not be published. Required fields are marked *