|
@@ -0,0 +1,863 @@
+#!/bin/bash
+#########################################################################
+# Script: check_equallogic #
+# Author: Claudio Kuenzler www.claudiokuenzler.com #
+# Purpose: Monitor Dell Equallogic with Nagios #
+# Description: Checks Dell Equallogic via SNMP. #
+# Can be used to query status and performance info #
+# Tested on: Check the following web page for compatibility matrix: #
+# claudiokuenzler.com/nagios-plugins/check_equallogic.php #
+# License: GPLv2 #
+# History: #
+# 20091109 Started Script programming checks: #
+# health, disk, raid, uptime, ps, info #
+# 20091112 Added ethif, conn #
+# 20091118 Added diskusage #
+# 20091119 Bugfix on Outputs (removed Pipes) #
+# 20091121 Public Release #
+# 20091204 Bugfix (removed IP addresses) #
+# 20091206 Bugfix (removed SNMP community names) #
+# 20091222 Fixed raid, ps, health and diskusage checks when multiple #
+# member devices exists. Mathias Sundman <mathias@openvpn.se> #
+# 20100112 Successful tests on PS5000XV - thanks to Scott Sawin #
+# 20100209 Compatibility matrix now on website (see Tested on above) #
+# 20100416 Beta Testing for rewritten ethif check #
+# 20100420 Corrected ethif output, finished new ethif check #
+# 20100526 Using proper order of snmpwalk command, thanks Roland Ripoll #
+# 20100531 Added perfdata for diskusage and connections, thx to Benoit #
+# 20100630 Corrected perfdata output (+added thresholds), thx Christian #
+# 20100809 Fixed conn type -> total of all eql members of group #
+# 20101026 /bin/sh back to /bin/bash (Ubuntu problems with /bin/dash) #
+# 20101026 Bugfix snmpwalk (vqe instead of vq), thanks Fabio Panigatti #
+# 20101102 Added fan #
+# 20101202 Added volumes (checks utilization of all volumes) #
+# 20110315 Bugfix in fan warning, diskusage output changed #
+# 20110323 Mysteriously disappeared temp check type added again #
+# 20110328 Beta Testing for etherrors check by Martin Conzelmann #
+# 20110404 Added thresholds to etherrors check by Martin Conzelmann #
+# 20110404 Bugfix in volumes check #
+# 20110407 New temp check - more information in output. M. Conzelmann #
+# 20110725 New disk check by Amir Shakoor (Bugfixes by Claudio Kuenzler)#
+# 20110804 New poolusage check by Chris Funderburg and Markus Becker #
+# 20110808 New vol check - checks single volume for utilization #
+# 20111013 Bugfix in vol check for similar vol names by Matt White #
+# 20111031 Bugfix in ethif check for int response by Francois Borlet #
+# 20120104 Bugfix in temp check if only one controller available #
+# 20120104 Bugfix in info check if only one controller available #
+# 20120123 Bugfix in volumes check #
+# 20120125 Added perfdata in volumes check, volume names now w/o quotes #
+# 20120319 Added poolconn check by Erwin Bleeker #
+# 20120330 Rewrite of poolusage (orig is now: memberusage) by E. Bleeker#
+# 20120405 Bugfix in poolusage to show result without thresholds #
+# 20120430 Added snapshots type by Roland Penner #
+# 20120503 Rewrite of info check (Fix for multiple members, FW Check) #
+# 20120815 Added percentage of raid rebuild when raid reconstructing #
+# 20120821 Minor bugfix in vol/volumes check (added space in perfdata) #
+#########################################################################
+# Usage: ./check_equallogic -H host -C community -t type [-v volume] [-w warning] [-c critical]
+#########################################################################
+help="check_equallogic (c) 2009-2012 Claudio Kuenzler (published under GPL licence)\n
+Usage: ./check_equallogic -H host -C community -t type [-v volume] [-w warning] [-c critical]\n
+Options:\n-H Hostname\n-C SNMP-Community name (at least read-only)\n-t Type to check, see list below\n-v Name of volume to check\n-w Warning Threshold\n-c Critical Threshold\n
+Requirements: snmpwalk, awk, grep, wc\n
+types:\nconn -> Checks total number of ISCSI connections (if no thresholds are given, outputs information)
+disk -> Checks Status of all disks
+diskusage -> Checks the actual usage of the defined raid (if no thresholds are given, outputs information)
+etherrors -> Checks ethernet interfaces for ethernet packet errors (thresholds possible)
+ethif -> Checks ethernet interfaces (if no thresholds are given, outputs information)
+fan -> Status of Fans
+health -> Overall health status of Equallogic device
+info -> Shows some Information and checks for same firmware version
+memberusage -> Shows disk utilisation of all members of the same group (if no thresholds are given, outputs information)
+poolconn -> Check highest number of ISCSI connections per pool (if no thresholds are given, outputs information)
+poolusage -> Checks utilization of pools (if no thresholds are given, outputs information)
+ps -> Checks Power Supply/Supplies
+raid -> Checks RAID status
+snapshots -> Checks Snapshot Reserve status (warning level is taken from the equallogic volume config, critical level can be set with -c )
+temp -> Checks Temperature sensors
+uptime -> Shows uptime
+vol -> Checks a single volume, must be used with -v option (if no thresholds are given, outputs information)
+volumes -> Checks utilization of all ISCSI volumes (if no thresholds are given, outputs information)"
+
+STATE_OK=0 # define the exit code if status is OK
+STATE_WARNING=1 # define the exit code if status is Warning
+STATE_CRITICAL=2 # define the exit code if status is Critical
+STATE_UNKNOWN=3 # define the exit code if status is Unknown
+PATH=/usr/local/bin:/usr/bin:/bin # Set path
+
+for cmd in snmpwalk awk grep wc [
+do
+ if ! `which ${cmd} 1>/dev/null`
+ then
+ echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
+ exit ${STATE_UNKNOWN}
+ fi
+done
+
+# Check for people who need help - aren't we all nice ;-)
+#########################################################################
+if [ "${1}" = "--help" -o "${#}" = "0" ];
+ then
+ echo -e "${help}";
+ exit 1;
+fi
+
+# Get user-given variables
+#########################################################################
+while getopts "H:C:t:v:w:c:" Input;
+do
+ case ${Input} in
+ H) host=${OPTARG};;
+ C) community=${OPTARG};;
+ t) type=${OPTARG};;
+ v) volume=${OPTARG};;
+ w) warning=${OPTARG};;
+ c) critical=${OPTARG};;
+ *) echo "Wrong option given. Please use options -H for host, -C for SNMP-Community, -t for type, -w for warning and -c for critical"
+ exit 1
+ ;;
+ esac
+done
+
+
+# Check Different Types
+#########################################################################
+ case ${type} in
+ health)
+ healthstatus=$(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.5.1.1)
+
+ s_crit=0; s_warn=0; s_ok=0; s_unknown=0
+ for s in $healthstatus
+ do
+ if [ "$s" = "3" ]; then s_crit=$((s_crit + 1)); fi
+ if [ "$s" = "2" ]; then s_warn=$((s_warn + 1)); fi
+ if [ "$s" = "1" ]; then s_ok=$((s_ok + 1)); fi
+ if [ "$s" = "0" ]; then s_unkown=$((s_unknown + 1)); fi
+ done
+
+ if [ $s_crit -gt 0 ]; then echo "OVERALL HEALTH CRITICAL"; exit ${STATE_CRITICAL}; fi
+ if [ $s_warn -gt 0 ]; then echo "OVERALL HEALTH WARNING"; exit ${STATE_WARNING}; fi
+ if [ $s_unknown -gt 0 ]; then echo "OVERALL HEALTH UNKNOWN"; exit ${STATE_UNKNOWN}; fi
+ if [ $s_ok -gt 0 ]; then echo "OVERALL HEALTH OK"; exit ${STATE_OK}; fi
+ ;;
+
+ temp)
+ #get names and temperatures
+ declare -a sensornames=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.6.1.2 | tr ' ' '_' | tr -d '"' ))
+ declare -a sensortemp=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.6.1.3 | awk -F : '{print $1}' | tr '\n' ' '))
+ declare -a sensortemp_min=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.6.1.8 | awk -F : '{print $1}' | tr '\n' ' '))
+ declare -a sensortemp_max=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.6.1.6 | awk -F : '{print $1}' | tr '\n' ' '))
+
+ # put this name, temp... together
+ c=0
+ for line in ${sensornames[@]}
+ do
+
+ if [ ${sensortemp[${c}]} -gt 0 ]
+ then
+ perfdata=$perfdata" ${sensornames[$c]}=${sensortemp[${c}]};${sensortemp_min[${c}]};${sensortemp_max[${c}]}"
+ #Check if state is CRITICAL. Compare against MIN and MAX
+ if [ ${sensortemp[${c}]} -gt ${sensortemp_max[${c}]} ] || [ ${sensortemp[${c}]} -lt ${sensortemp_min[${c}]} ]
+ then
+ sensorfinalcrit[${c}]="${sensornames[$c]} => ${sensortemp[${c}]}"
+ fi
+ fi
+
+ let c++
+ done
+
+ #Cut leading blank
+ perfdata=$( echo $perfdata | cut -d' ' -f 2-)
+
+ if [[ ${#sensorfinalcrit[*]} -gt 0 ]]
+ then echo "CRITICAL Sensor: ${sensorfinalcrit[*]} | $perfdata"; exit ${STATE_CRITICAL}
+ elif [[ ${#sensorfinalwarn[*]} -gt 0 ]]
+ then echo "WARNING Sensor: ${sensorfinalwarn[*]} | $perfdata"; exit ${STATE_WARNING}
+ elif [[ ${#sensorunknown[*]} -gt 0 ]]
+ then echo "UNKNOWN Check Sensors, an unknown error occured | $perfdata"; exit ${STATE_UNKNOWN}
+ else echo "All Sensors OK | $perfdata"; exit ${STATE_OK}
+ fi
+ ;;
+
+ diskold)
+ diskstatusok=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 1 | wc -l)
+ diskstatusspare=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 2 | wc -l)
+ diskstatusfailed=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 3 | wc -l)
+ diskstatusoff=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 4 | wc -l)
+ diskstatusaltsig=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 5 | wc -l)
+ diskstatustoosmall=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 6 | wc -l)
+ diskstatushistfailures=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 7 | wc -l)
+ diskstatusunsupported=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 8 | wc -l)
+ if [ ${diskstatusfailed} -gt 0 ] || [ ${diskstatustoosmall} -gt 0 ] || [ ${diskstatushistfailures} -gt 0 ] || [ ${diskstatusunsupported} -gt 0 ]
+ then disksumcritical=$(($diskstatusfailed + $diskstatustoosmall + $diskstatushistfailures + $diskstatusunsupported))
+ echo "DISK CRITICAL ${disksumcritical} disk(s) in critical state"; exit ${STATE_CRITICAL}
+ elif [ ${diskstatusoff} -gt 0 ] || [ ${diskstatusaltsig} -gt 0 ]
+ then disksumwarning=$(( ${diskstatusoff} + ${diskstatusaltsig} ))
+ echo "DISK WARNING $disksumwarning disk(s) in warning state"; exit ${STATE_WARNING}
+ else echo "DISK OK ${diskstatusok} disks OK ${diskstatusspare} disks spare"; exit ${STATE_OK}
+ fi
+ ;;
|