`
wsql
  • 浏览: 11800550 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
文章分类
社区版块
存档分类
最新评论

使用shell脚本进行服务器系统监控——进程监控

 
阅读更多

#!/bin/ksh

typeset -u RUN_PRE_EVENT # Force to UPPERCASE
typeset -u RUN_STARTUP_EVENT # Force to UPPERCASE
typeset -u RUN_POST_EVENT # force to UPPERCASE

RUN_PRE_EVENT='N' # A 'Y' will execute, anything else will not
RUN_STARTUP_EVENT='Y' # A 'Y' will execute, anything else will not
RUN_POST_EVENT='Y' # A 'Y' will execute, anything else will not

LOGFILE="/tmp/proc_status.log"
[[ ! -s $LOGFILE ]] && touch $LOGFILE

SCRIPT_NAME=$(basename $0)
TTY=$(tty)
INTERVAL="1" # Seconds between sampling
JOBS=

####################################################
############# DEFINE FUNCTIONS HERE ################
####################################################

usage ()
{
echo "/n/n/t*****USAGE ERROR*****"
echo "/n/nUSAGE: $SCRIPT_NAME seconds process"
echo "/nWill monitor the specified process for the"
echo "specified number of seconds."
echo "/nUSAGE: $SCRIPT_NAME [-s|-S seconds] [-m|-M minutes]"
echo " [-h|-H hours] [-d|-D days] [-p|-P process]/n"
echo "/nWill monitor the specified process for number of"
echo "seconds specified within -s seconds, -m minutes,"
echo "-h hours and -d days. Any combination of command"
echo "switches can be used./n"
echo "/nEXAMPLE: $SCRIPT_NAME 300 dtcalc"
echo "/n/nEXAMPLE: $SCRIPT_NAME -m 5 -p dtcalc"
echo "/nBoth examples will monitor the dtcalc process"
echo "for 5 minutes. Can specify days, hours, minutes"
echo "and seconds, using -d, -h, -m and -s/n/n"
}

####################################################

trap_exit ()
{
# set -x # Uncommant to debug this function
# Log an ending time for process monitoring
echo "INTERRUPT: Program Received an Interrupt...EXITING..." > $TTY
echo "INTERRUPT: Program Received an Interrupt...EXITING..." >> $LOGFILE
TIMESTAMP=$(date +%D@%T) # Get a new time stamp...
echo "MON_STOPPED: Monitoring for $PROCESS ended ==> $TIMESTAMP/n" /
>> $TTY
echo "MON_STOPPED: Monitoring for $PROCESS ended ==> $TIMESTAMP/n" /
>> $LOGFILE
echo "LOGFILE: All Events are Logged ==> $LOGFILE /n" > $TTY

# Kill all functions
JOBS=$(jobs -p)
if [[ ! -z $JOBS && $JOBS != '' && $JOBS != '0' ]]
then
kill $(jobs -p) 2>/dev/null 1>&2
fi
return 2
}

####################################################

pre_event_script ()
{
# Put anything that you want to execute BEFORE the
# monitored process STARTS in this function

: # No-OP - Needed as a place holder for an empty function
# Comment Out the Above colon, ':'

PRE_RC=$?
return $PRE_RC
}

####################################################
startup_event_script ()
{
# Put anything that you want to execute WHEN, or AS, the
# monitored process STARTS in this function

: # No-OP - Needed as a place holder for an empty function
# Comment Out the Above colon, ':'

STARTUP_RC=$?
return $STARTUP_RC
}

####################################################


post_event_script ()
{
# Put anything that you want to execute AFTER the
# monitored process ENDS in this function

: # No-OP - Need as a place holder for an empty function
# Comment Out the Above colon, ':'

POST_RC=$?
return $POST_RC
}

####################################################

test_string ()
{
if (( $# != 1 ))
then
echo 'ERROR'
return
fi

C_STRING=$1

case $C_STRING in

+([0-9])) echo 'POS_INT' # Integer >= 0
;;
+([-0-9])) echo 'NEG_INT' # Integer < 0
;;
+([a-z])) echo 'LOW_CASE' # lower case text
;;
+([A-Z])) echo 'UP_CASE' # UPPER case text
;;
+([a-z]|[A-Z])) echo 'MIX_CASE' # MIxed CAse text
;;
*) echo 'UNKNOWN' # Anything else
esac
}

####################################################

proc_watch ()
{
# set -x # Uncomment to debug this function

while : # Loop Forever!!
do
case $RUN in
'Y')
# This will run the startup_event_script, which is a function

if [[ $RUN_STARTUP_EVENT = 'Y' ]]
then
echo "STARTUP EVENT: Executing Startup Event Script..." > $TTY
echo "STARTUP EVENT: Executing Startup Event Script..." >> $LOGFILE

startup_event_script # USER DEFINED FUNCTION!!!
RC=$?
if (( "RC" == 0 ))
then
echo "SUCCESS: Startup Event Script Completed RC - ${RC}"/
> $TTY
echo "SUCCESS: Startup Event Script Completed RC - ${RC}"/
>> $LOGFILE

else
echo "FAILURE: Startup Event Script FAILED RC - ${RC}"/
> $TTY
echo "FAILURE: Startup Event Script FAILED RC - ${RC}"/
>> $LOGFILE

fi
fi

integer PROC_COUNT='-1' # Reset the Counters
integer LAST_COUNT='-1'

# Loop until the process(es) end(s)

until (( "PROC_COUNT" == 0 ))
do
# This function is a Co-Process. $BREAK checks to see if
# "Program Interrupt" has taken place. If so BREAK will
# be 'Y' and we exit both the loop and function.

read BREAK
if [[ $BREAK = 'Y' ]]
then
return 3
fi
PROC_COUNT=$(ps -ef | grep -v "grep $PROCESS" /
| grep -v $SCRIPT_NAME /
| grep $PROCESS| wc -l) >/dev/null 2>&1

if (( "LAST_COUNT" > 0 && "LAST_COUNT" != "PROC_COUNT" ))
then
# The Process Count has Changed...
TIMESTAMP=$(date +%D@%T)
# Get a list of the PID of all of the processes
PID_LIST=$(ps -ef | grep -v "grep $PROCESS" /
| grep -v $SCRIPT_NAME /
| grep $PROCESS | awk '{print $2}')

echo "PROCESS COUNT: $PROC_COUNT $PROCESS/
Processes Running ==> $TIMESTAMP" >> $LOGFILE &
echo "PROCESS COUNT: $PROC_COUNT $PROCESS/
Processes Running ==> $TIMESTAMP" > $TTY

echo ACTIVE PIDS: $PID_LIST >> $LOGFILE &
echo ACTIVE PIDS: $PID_LIST > $TTY
fi
LAST_COUNT=$PROC_COUNT
sleep $INTERVAL # Needed to reduce CPU load!
done

RUN='N' # Turn the RUN Flag Off

TIMESTAMP=$(date +%D@%T)
echo "ENDING PROCESS: $PROCESS END time ==>/
$TIMESTAMP" >> $LOGFILE &
echo "ENDING PROCESS: $PROCESS END time ==>/
$TIMESTAMP" > $TTY

# This will run the post_event_script, which is a function

if [[ $RUN_POST_EVENT = 'Y' ]]
then
echo "POST EVENT: Executing Post Event Script..."/
> $TTY
echo "POST EVENT: Executing Post Event Script..."/
>> $LOGFILE

post_event_script # USER DEFINED FUNCTION!!!
RC=$?
if (( "RC" == 0 ))
then
echo "SUCCESS: Post Event Script Completed RC -/
${RC}" > $TTY
echo "SUCCESS: Post Event Script Completed RC - ${RC}"/
>> $LOGFILE
else
echo "FAILURE: Post Event Script FAILED RC - ${RC}"/
> $TTY
echo "FAILURE: Post Event Script FAILED RC - ${RC}"/
>> $LOGFILE
fi
fi
;;

'N')
# This will run the pre_event_script, which is a function

if [[ $RUN_PRE_EVENT = 'Y' ]]
then
echo "PRE EVENT: Executing Pre Event Script..." > $TTY
echo "PRE EVENT: Executing Pre Event Script..." >> $LOGFILE

pre_event_script # USER DEFINED FUNCTION!!!
RC=$?
if (( "RC" == 0 ))
then
echo "SUCCESS: Pre Event Script Completed RC - ${RC}"/
> $TTY
echo "SUCCESS: Pre Event Script Completed RC - ${RC}"/
>> $LOGFILE
else
echo "FAILURE: Pre Event Script FAILED RC - ${RC}"/
> $TTY
echo "FAILURE: Pre Event Script FAILED RC - ${RC}"/
>> $LOGFILE
fi
fi

echo "WAITING: Waiting for $PROCESS to startup...Monitoring..."

integer PROC_COUNT='-1' # Initialize to a fake value

# Loop until at least one process starts

until (( "PROC_COUNT" > 0 ))
do
# This is a Co-Process. This checks to see if a "Program
# Interrupt" has taken place. If so BREAK will be 'Y' and
# we exit both the loop and function

read BREAK
if [[ $BREAK = 'Y' ]]
then
return 3
fi
PROC_COUNT=$(ps -ef | grep -v "grep $PROCESS" /
| grep -v $SCRIPT_NAME | grep $PROCESS | wc -l) /
>/dev/null 2>&1

sleep $INTERVAL # Needed to reduce CPU load!
done

RUN='Y' # Turn the RUN Flag On

TIMESTAMP=$(date +%D@%T)

PID_LIST=$(ps -ef | grep -v "grep $PROCESS" /
| grep -v $SCRIPT_NAME /
| grep $PROCESS | awk '{print $2}')

if (( "PROC_COUNT" == 1 ))
then
echo "START PROCESS: $PROCESS START time ==>/
$TIMESTAMP" >> $LOGFILE &
echo ACTIVE PIDS: $PID_LIST >> $LOGFILE &
echo "START PROCESS: $PROCESS START time ==>/
$TIMESTAMP" > $TTY
echo ACTIVE PIDS: $PID_LIST > $TTY
elif (( "PROC_COUNT" > 1 ))
then
echo "START PROCESS: $PROC_COUNT $PROCESS /
Processes Started: START time ==> $TIMESTAMP" >> $LOGFILE &
echo ACTIVE PIDS: $PID_LIST >> $LOGFILE &
echo "START PROCESS: $PROC_COUNT $PROCESS /
Processes Started: START time ==> $TIMESTAMP" > $TTY
echo ACTIVE PIDS: $PID_LIST > $TTY
fi
;;
esac
done
}

####################################################
############## START OF MAIN #######################
####################################################

### SET A TRAP ####

trap 'BREAK='Y';print -p $BREAK 2>/dev/null;trap_exit/
2>/dev/null;exit 0' 1 2 3 15

BREAK='N' # The BREAK variable is used in the co-process proc_watch
PROCESS=
PROCESS_ID=$$
integer TOTAL_SECONDS=0

# Check commnand line arguments

if (( $# > 10 || $# < 2 ))
then
usage
exit 1
fi

# Check to see if only the seconds and a process are
# the only arguments

if [[ ($# -eq 2) && ($1 != -*) && ($2 != -*) ]]
then
NUM_TEST=$(test_string $1)
if [[ "$NUM_TEST" = 'POS_INT' ]]
then
TOTAL_SECONDS=$1
PROCESS=$2
else
usage
exit 1
fi
else
# Since getopts does not care what arguments it gets lets
# do a quick sanity check to make sure that we only have
# between 2 and 10 arguments and the first one must start
# with a -* (hyphen and anything), else usage error

case "$#" in
[2-10]) if [[ $1 != -* ]]; then
usage; exit 1
fi
;;
esac

HOURS=0 # Initialize all to zero
MINUTES=0
SECS=0
DAYS=0

# Use getopts to parse the command line arguments

# For each $OPTARG for DAYS, HOURS, MINUTES and DAYS check to see
# that each one is an integer by using the check_string function

while getopts ":h:H:m:M:s:S:d:D:P:p:" TIMED 2>/dev/null
do
case $TIMED in
h|H) [[ $(test_string $OPTARG) != 'POS_INT' ]] && usage && exit 1
(( HOURS = $OPTARG * 3600 )) # 3600 seconds per hour
;;
m|H) [[ $(test_string $OPTARG) != 'POS_INT' ]] && usage && exit 1
(( MINUTES = $OPTARG * 60 )) # 60 seconds per minute
;;
s|S) [[ $(test_string $OPTARG) != 'POS_INT' ]] && usage && exit 1
SECS="$OPTARG" # seconds are seconds
;;
d|D) [[ $(test_string $OPTARG) != 'POS_INT' ]] && usage && exit 1
(( DAYS = $OPTARG * 86400 )) # 86400 seconds per day
;;
p|P) PROCESS=$OPTARG # process can be anything
;;
/?) usage # USAGE ERROR
exit 1
;;
:) usage
exit 1
;;
*) usage
exit 1
;;
esac
done
fi

# We need to make sure that we have a process - sanity check

if [[ -z "$PROCESS" || "$PROCESS" = '' ]]
then
usage
exit 1
fi

# Check to see that TOTAL_SECONDS was not previously set

if (( TOTAL_SECONDS == 0 ))
then
# Add everything together if anything is > 0

if [[ $SECS -gt 0 || $MINUTES -gt 0 || $HOURS -gt 0 /
|| $DAYS -gt 0 ]]
then
(( TOTAL_SECONDS = SECS + MINUTES + HOURS + DAYS ))
fi
SECONDS_LEFT=$TOTAL_SECONDS # Set the countdown varaible...

fi

# Last Sanity Check!

if (( TOTAL_SECONDS <= 0 )) || [ -z $PROCESS ]
then
# Either There are No Seconds to Count or the
# $PROCESS Variable is Null...USAGE ERROR...

usage
exit 1
fi

echo "/nCurrently running $PROCESS processes:/n" > $TTY
ps -ef | grep -v "grep $PROCESS" | grep -v $SCRIPT_NAME /
| grep $PROCESS > $TTY

PROC_RC=$? # Get the initial state of the monitored function

echo >$TTY # Send a blank line to the screen

(( PROC_RC != 0 )) && echo "/nThere are no $PROCESS processes running/n"

if (( PROC_RC == 0 )) # The Target Process(es) is/are running...
then
RUN='Y' # Set the RUN flag to true, or yes.

integer PROC_COUNT # Strips out the "padding" for display

PROC_COUNT=$(ps -ef | grep -v "grep $PROCESS" | grep -v /
$SCRIPT_NAME | grep $PROCESS | wc -l) >/dev/null 2>&1
if (( PROC_COUNT == 1 ))
then
echo "The $PROCESS process is currently running...Monitoring.../n"
elif (( PROC_COUNT > 1 ))
then
print "There are $PROC_COUNT $PROCESS processes currently running/
...Monitoring.../n"
fi
else
echo "The $PROCESS process is not currently/ running...monitoring..."
RUN='N' # Set the RUN flag to false, or no.
fi

TIMESTAMP=$(date +%D@%T) # Time that this script started monitoring

# Get a list of the currently active process IDs

PID_LIST=$(ps -ef | grep -v "grep $PROCESS" /
| grep -v $SCRIPT_NAME /
| grep $PROCESS | awk '{print $2}')

echo "MON_STARTED: Monitoring for $PROCESS began ==> $TIMESTAMP" /
| tee -a $LOGFILE
echo ACTIVE PIDS: $PID_LIST | tee -a $LOGFILE


proc_watch |& # Create a Background Co-Process!!
WATCH_PID=$! # Get the process of the last background job!

# Start the Count Down!

integer SECONDS_LEFT=$TOTAL_SECONDS

while (( SECONDS_LEFT > 0 ))
do
print -p $BREAK 2>/dev/null

(( SECONDS_LEFT = SECONDS_LEFT - 1 ))
sleep 1 # Seconds Countdown
done

# Finished - Normal Timeout Exit...

TIMESTAMP=$(date +%D@%T) # Get a new time stamp...
echo "MON_STOPPED: Monitoring for $PROCESS ended ==> $TIMESTAMP/n" /
| tee -a $LOGFILE

echo "LOGFILE: All Events are Logged ==> $LOGFILE /n"

# Tell the proc_watch function to break out of the loop and die

BREAK='Y'
print -p $BREAK 2>/dev/null

kill $WATCH_PID 2>/dev/null

exit 0

# End of Script

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics