Initial
This commit is contained in:
115
roles/server/files/nrpe/check_hddtemp
Normal file
115
roles/server/files/nrpe/check_hddtemp
Normal file
@@ -0,0 +1,115 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# USAGE:
|
||||
# ./check_hddtemp.sh <device> <warn> <crit>
|
||||
# Nagios script to get the temperatue of HDD from hddtemp
|
||||
#
|
||||
# You may have to let nagios run this script as root
|
||||
# This is how the sudoers file looks in my debian system:
|
||||
# nagios ALL=(root) NOPASSWD:/usr/lib/nagios/plugins/check_hddtemp
|
||||
#
|
||||
# Version 1.0
|
||||
# Another note: install the hddtemp package as well
|
||||
|
||||
|
||||
OK=0
|
||||
WARNING=1
|
||||
CRITICAL=2
|
||||
UNKNOWN=3
|
||||
|
||||
function usage()
|
||||
{
|
||||
echo "Usage: ./check_hddtemp <device> <warn> <crit>"
|
||||
}
|
||||
|
||||
function check_root()
|
||||
{
|
||||
# make sure script is running as root
|
||||
if [ `whoami` != root ]; then
|
||||
echo "UNKNOWN: please make sure script is running as root"
|
||||
exit $UNKNOWN
|
||||
fi
|
||||
}
|
||||
function check_arg()
|
||||
{
|
||||
# make sure you supplied all 3 arguments
|
||||
if [ $# -ne 3 ]; then
|
||||
usage
|
||||
exit $OK
|
||||
fi
|
||||
}
|
||||
function check_device()
|
||||
{
|
||||
# make sure device is a special block
|
||||
if [ ! -b $DEVICE ];then
|
||||
echo "UNKNOWN: $DEVICE is not a block special file"
|
||||
exit $UNKNOWN
|
||||
fi
|
||||
}
|
||||
function check_warn_vs_crit()
|
||||
{
|
||||
# make sure CRIT is larger than WARN
|
||||
if [ $WARN -ge $CRIT ];then
|
||||
echo "UNKNOWN: WARN value may not be greater than or equal the CRIT value"
|
||||
exit $UNKNOWN
|
||||
fi
|
||||
}
|
||||
|
||||
function init()
|
||||
{
|
||||
check_root
|
||||
check_arg $*
|
||||
check_device
|
||||
check_warn_vs_crit
|
||||
}
|
||||
|
||||
function get_hddtemp()
|
||||
{
|
||||
# gets temperature and stores it in $HEAT
|
||||
# and make sure we get a numeric output
|
||||
if [ -x $HDDTEMP ];then
|
||||
HEAT=`$HDDTEMP $DEVICE -n`
|
||||
case "$HEAT" in
|
||||
[0-9]* )
|
||||
echo "do nothing" > /dev/null
|
||||
;;
|
||||
* )
|
||||
echo "UNKNOWN: Could not get temperature from: $DEVICE"
|
||||
exit $UNKNOWN
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo "UNKNOWN: cannot execute $HDDTEMP"
|
||||
exit $UNKNOWN
|
||||
fi
|
||||
}
|
||||
function check_heat()
|
||||
{
|
||||
# checks temperature and replies according to $CRIT and $WARN
|
||||
if [ $HEAT -lt $WARN ];then
|
||||
echo "OK: Temperature is below warn treshold ($DEVICE is $HEAT)"
|
||||
exit $OK
|
||||
elif [ $HEAT -lt $CRIT ];then
|
||||
echo "WARNING: Temperature is above warn treshold ($DEVICE is $HEAT)"
|
||||
exit $WARNING
|
||||
elif [ $HEAT -ge $CRIT ];then
|
||||
echo "CRITICAL: Temperature is above crit treshold ($DEVICE is $HEAT)"
|
||||
exit $CRITICAL
|
||||
else
|
||||
echo "UNKNOWN: This error message should never occur, if it does happen anyway, get a new cup of coffee and fix the code :)"
|
||||
exit $UNKNOWN
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# -- Main -- #
|
||||
|
||||
HDDTEMP=/usr/sbin/hddtemp
|
||||
DEVICE=$1
|
||||
WARN=$2
|
||||
CRIT=$3
|
||||
|
||||
|
||||
init $*
|
||||
get_hddtemp
|
||||
check_heat
|
||||
36
roles/server/files/nrpe/check_md_raid
Normal file
36
roles/server/files/nrpe/check_md_raid
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Created by Sebastian Grewe, Jammicron Technology
|
||||
#
|
||||
|
||||
# Get count of raid arrays
|
||||
RAID_DEVICES=`grep ^md -c /proc/mdstat`
|
||||
|
||||
# Get count of degraded arrays
|
||||
RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c`
|
||||
|
||||
# Is an array currently recovering, get percentage of recovery
|
||||
RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'`
|
||||
RAID_RESYNC=`grep resync /proc/mdstat | awk '{print $4}'`
|
||||
|
||||
# Check raid status
|
||||
# RAID recovers --> Warning
|
||||
if [[ $RAID_RECOVER ]]; then
|
||||
STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER"
|
||||
EXIT=1
|
||||
elif [[ $RAID_RESYNC ]]; then
|
||||
STATUS="WARNING - Checked $RAID_DEVICES arrays, resync : $RAID_RESYNC"
|
||||
EXIT=1
|
||||
# RAID ok
|
||||
elif [[ $RAID_STATUS == "0" ]]; then
|
||||
STATUS="OK - Checked $RAID_DEVICES arrays."
|
||||
EXIT=0
|
||||
# All else critical, better save than sorry
|
||||
else
|
||||
STATUS="CRITICAL - Checked $RAID_DEVICES arrays, $RAID_STATUS have FAILED"
|
||||
EXIT=2
|
||||
fi
|
||||
|
||||
# Status and quit
|
||||
echo $STATUS
|
||||
exit $EXIT
|
||||
116
roles/server/files/nrpe/check_mem
Normal file
116
roles/server/files/nrpe/check_mem
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Plugin to check system memory
|
||||
# by hugme (nagios@hugme.org)
|
||||
# You can find my checks here: https://github.com/hugme/Nag_checks
|
||||
# Nagios script to check memory usage on linux server
|
||||
# version 1.2.0
|
||||
#
|
||||
##########################################################
|
||||
|
||||
MEMINFO="/proc/meminfo"
|
||||
|
||||
##########################################################
|
||||
# We call them functions because they're fun
|
||||
##########################################################
|
||||
|
||||
print_help() {
|
||||
cat << EOF
|
||||
Linux Memory Plugin for Nagios
|
||||
Copyright (c) hugme (nagios@hugme.org)
|
||||
Version: 1.2.0
|
||||
Last Modified: 10-07-2014
|
||||
License: This software can be used for free unless I meet you, then you owe me lunch.
|
||||
|
||||
Usage: check_linux_memory -w [warning %] -c [critical %]
|
||||
|
||||
Options:
|
||||
-w [0-99] = Your warning %. 20 means 20% of your memory can remain before a warning alarm. Do not use the % sign.
|
||||
-c [0-99] = Your critical %. 10 means 10% of your memory can remain before a critical alarm. Do not use the % sign.
|
||||
-d [K,M,G,T] = divider K=kilobytes, M=megabytes, G=gigabytes, T=terabytes
|
||||
-f = Include cached memory as free memory when calculating your percentage free
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
invalid_type() {
|
||||
echo "\nInvalid $1\n"
|
||||
print_help
|
||||
exit 3
|
||||
}
|
||||
|
||||
##############################################
|
||||
## Suck in the user input
|
||||
##############################################
|
||||
|
||||
|
||||
while test -n "$1"; do
|
||||
case $1 in
|
||||
--help) print_help ; exit 0 ;;
|
||||
-h) print_help ; exit 0 ;;
|
||||
-w) WARN="$2"; shift ;;
|
||||
-c) CRIT="$2"; shift ;;
|
||||
-d) DIV="$2"; shift ;;
|
||||
-f) FC=1 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
##############################################
|
||||
## Set the defaults if needed
|
||||
##############################################
|
||||
|
||||
[ -z "$WARN" ] && WARN=20
|
||||
[ -z "$CRIT" ] && CRIT=10
|
||||
[ -z "$DIV" ] && DIV=M
|
||||
[ -z "$FC" ] && FC=0
|
||||
|
||||
##############################################
|
||||
## Check user input
|
||||
##############################################
|
||||
|
||||
[ ! -z `echo $WARN | tr -d [:digit:]` ] && invalid_type "Warning: Warning value can only contain numbers"
|
||||
[ ! -z `echo $CRIT | tr -d [:digit:]` ] && invalid_type "Critical: Critical value can only contain numbers"
|
||||
[ "${WARN%.*}" -ge 100 ] && invalid_type "Warning: Warning must be smaller than 100%"
|
||||
[ "${CRIT%.*}" -ge 100 ] && invalid_type "Critical: Critical must be smaller than 100%"
|
||||
[ "${CRIT%.*}" -gt "${WARN%.*}" ] && invalid_type "Critical: Your Warning must be Higher than your Critical"
|
||||
|
||||
case $DIV in
|
||||
k|K) DIVNUM=1;;
|
||||
m|M) DIVNUM=1024;;
|
||||
g|G) DIVNUM=1048576;;
|
||||
t|T) DIVNUM=1073741824;;
|
||||
*) invalid_type;;
|
||||
esac
|
||||
|
||||
[ ! -f "$MEMINFO" ] && {
|
||||
echo "Your Memory info file seems to be missing"
|
||||
exit 1
|
||||
}
|
||||
|
||||
##############################################
|
||||
## Do the work
|
||||
## Pull the memory file into awk
|
||||
## grab the lines we need
|
||||
## Print the information
|
||||
##############################################
|
||||
|
||||
RESULT=$(awk -v warn=$WARN -v crit=$CRIT -v div=$DIV -v divnum=$DIVNUM -v fc=$FC '/^MemTotal:/ { total=$2 }
|
||||
/^MemTotal:/ { tot=$2 }
|
||||
/^MemFree:/ { free=$2 }
|
||||
/^Buffers:/ { buff=$2 }
|
||||
/^Cached:/ { cache=$2 }
|
||||
/^Active:/ { active=$2 }
|
||||
/^Inactive:/ { inactive=$2 }
|
||||
END { if ( fc == 1 ) { free=free+cache+buff }
|
||||
{ freeperct=free/tot*100 }
|
||||
if ( freeperct > warn ) { result="OK" ; xit="0"}
|
||||
if ( freeperct <= warn ) {
|
||||
if ( freeperct > crit ) { result="WARNING" ; xit="1" }
|
||||
else if ( freeperct <= crit ) { result="CRITICAL" ; xit="2" }
|
||||
}
|
||||
{print xit" MEMORY "result" - "freeperct"% Free - Total:"tot/divnum div" Active:"active/divnum div" Inactive:"inactive/divnum div" Buffers:"buff/divnum div" Cached:"cache/divnum div" |Free="freeperct";"warn";"crit";0 Active="active";0;0;0 Inactive="inactive";0;0;0 Buffers="buff";0;0;0 Cached="cache";0;0;0" }
|
||||
}' /proc/meminfo)
|
||||
|
||||
echo ${RESULT#* }
|
||||
exit ${RESULT%% *}
|
||||
48
roles/server/files/nrpe/check_nfs
Normal file
48
roles/server/files/nrpe/check_nfs
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
## FILE: check_nfs.sh
|
||||
##
|
||||
## DESCRIPTION: This is a nagios compatible script to checks NFS mounts against what
|
||||
## should be mounted in /etc/fstab and if there is a stale mount.
|
||||
##
|
||||
## AUTHOR: Dennis Ruzeski (denniruz@gmail.com)
|
||||
##
|
||||
## Creation Date: 1/23/2013
|
||||
##
|
||||
## Last Modified: 1/25/2013
|
||||
##
|
||||
## VERSION: 1.0
|
||||
##
|
||||
## USAGE: ./check_nfs.sh
|
||||
## This version takes no arguments
|
||||
##
|
||||
## TODO: Improve the check for stale mounts, add command line arguments to provide the ability to
|
||||
## check mount statut, stale mounts, and latency separately.
|
||||
#
|
||||
declare -a nfs_mounts=( $(grep -v ^\# /etc/fstab |grep nfs |awk '{print $2}') )
|
||||
declare -a MNT_STATUS
|
||||
declare -a SFH_STATUS
|
||||
for mount_type in ${nfs_mounts[@]} ; do
|
||||
if [ $(stat -f -c '%T' ${mount_type}) = nfs ]; then
|
||||
read -t3 < <(stat -t ${mount_type})
|
||||
if [ $? -ne 0 ]; then
|
||||
SFH_STATUS=("${SFH_STATUS[@]}" "ERROR: ${mount_type} might be stale.")
|
||||
else
|
||||
MNT_STATUS=("${MNT_STATUS[@]}" "OK: ${mount_type} is ok.")
|
||||
fi
|
||||
else
|
||||
MNT_STATUS=("${MNT_STATUS[@]}" "ERROR: ${mount_type} is not properly mounted.")
|
||||
fi
|
||||
done
|
||||
echo ${MNT_STATUS[@]} ${SFH_STATUS[@]} |grep -q ERROR
|
||||
if [ $? -eq 0 ]; then
|
||||
RETVAL=2
|
||||
echo "CRITICAL - NFS mounts may be stale or unavailable"
|
||||
else
|
||||
RETVAL=0
|
||||
echo "OK - NFS mounts are not reporting any errors"
|
||||
fi
|
||||
unset -v MNT_STATUS
|
||||
unset -v SFH_STATUS
|
||||
exit ${RETVAL}
|
||||
|
||||
7
roles/server/files/nrpe/logrotate
Normal file
7
roles/server/files/nrpe/logrotate
Normal file
@@ -0,0 +1,7 @@
|
||||
/var/log/nrpe.log {
|
||||
rotate 3
|
||||
daily
|
||||
compress
|
||||
missingok
|
||||
notifempty
|
||||
}
|
||||
Reference in New Issue
Block a user