#!/bin/bash
# Nagios plugin to do a Novell eDirectory partition continuity check
# Written by Jesse Pretorius, jesse.pretorius@gmail.com
# Version 1.2, 20 July 2011
# Project location: www.monitoringexchange.org
#
# Changelog:
#   v1.2 : Added error handling for situation where all partitions have errors thanks to input from Magnus Felix
#   v1.1 : Added specific critical and unknown errors thanks to input from Magnus Felix

errorlvl=0
tmpfile=`mktemp`
warning=2
critical=6

### Beginning of functions ###

# If inappropriate parameters are provided, this usage statement is output
usage() {
echo -e "\nNagios plugin to do a Novell eDirectory partition continuity check"
echo -e "Written by Jesse Pretorius, jesse.pretorius@gmail.com"
echo -e "Version 1.2, 20 July 2011"
echo -e "Project location: www.monitoringexchange.org"
echo -e "\nUsage:"
echo -e "\t$0 <options>"
echo -e "\nOptions:"
echo -e "\t[-w warning]\tThe number of hours old the oldest partition sync age must be to produce a warning state. Default: 2"
echo -e "\t[-c critical]\tThe number of hours old the oldest partition sync age must be to produce a critical state. Default: 6"
echo -e "\nSample:"
echo -e "\t$0 -w 5 -c 10\n"
exit 3
}

### End of functions ###

while getopts w:c: OPTIONS
do
  case "$OPTIONS" in
    w ) warning=$OPTARG ;;
    c ) critical=$OPTARG ;;
    * ) usage ;;
  esac
done

if [ "$critical" -le "$warning" ]; then
  echo -e "\nThe critical level must be higher than the warning level!"
  usage
  exit 3
fi

# Run the ndsrepair command and save the output to a temporary file
if [ -e /opt/novell/eDirectory/bin/ndsrepair ]; then
  NDSREPAIR="/opt/novell/eDirectory/bin/ndsrepair"
elif [ -e /usr/bin/ndsrepair ]; then
  NDSREPAIR="/usr/bin/ndsrepair"
else
  NDSREPAIR=`which ndsrepair`
fi
$NDSREPAIR -E > $tmpfile 2>&1
ndsrepairerrorlvl="$?"

# If the command produces an error, return the error
if [ "$ndsrepairerrorlvl" -eq "5" ]; then
  output="CRITICAL: Unable to connect to eDirectory!"
  errorlvl=2
elif [ "$ndsrepairerrorlvl" -eq "8" ]; then
  output="UNKNOWN: The ndsrepair module is already loaded."
  errorlvl=3
elif [ "$ndsrepairerrorlvl" -eq "9" ]; then
  output="CRITICAL: eDirectory is in a DEFUNCT state!"
  errorlvl=2
elif [ "$ndsrepairerrorlvl" -eq "127" ]; then
  output="UNKNOWN: The ndsrepair binary could not be found."
  errorlvl=3
elif [ "$ndsrepairerrorlvl" -gt "0" ]; then
  output="UNKNOWN: Failed to run ndsrepair successfully! `cat $tmpfile`"
  errorlvl=3
else
  # Extract the details from the output in the temporary file
  oldest_part_date_time=`grep "All servers" $tmpfile | awk '{print $7"-"$8}' | sed 's/:/-/g' | awk -F"-" '{print $3 $1 $2" "$4" "$5" "$6}' | sort -r -u | tail -n 1`

  # If there are errors in all partitions then there will be no value in $oldest_part_date_time
  if [ -z "$oldest_part_date_time" ]; then
    output="UNKNOWN: No partitions are able to completely synchronise!"
    errorlvl=3
  else
    oldest_part_date="$(echo $oldest_part_date_time | awk '{print $1}')"
    oldest_part_hours="$(echo $oldest_part_date_time | awk '{print $2}' | sed 's/0*//')"
    if [ -z "$oldest_part_hours" ]; then oldest_part_hours="0"; fi
    oldest_part_minutes="$(echo $oldest_part_date_time | awk '{print $3}' | sed 's/0*//')"
    if [ -z "$oldest_part_minutes" ]; then oldest_part_minutes="0"; fi
    oldest_part_seconds="$(echo $oldest_part_date_time | awk '{print $4}' | sed 's/0*//')"
    if [ -z "$oldest_part_seconds" ]; then oldest_part_seconds="0"; fi

    # Calculate how old the oldest date is
    current_date_unix=$(date +%s)
    oldest_part_date_unix=$(($(date --date "$oldest_part_date" +%s)+$(($oldest_part_hours*60*60))+$(($oldest_part_minutes*60))+$oldest_part_seconds))
    diff_unix=$((current_date_unix-oldest_part_date_unix))
    if ((diff_unix < 0)); then abs=-1; else abs=1; fi
    oldest_part_age_hours=$((diff_unix/3600*abs))
    oldest_part_age_seconds=$((diff_unix % 3600))
    oldest_part_age_minutes=$((oldest_part_age_seconds/60*abs))
    oldest_part_age_seconds=$((diff_unix % 60))

    # Work out whether the oldest_part_age is above any thresholds and set the output accordingly
    if [ "$oldest_part_age_hours" -ge "$warning" ]; then
      if [ "$oldest_part_age_hours" -ge "$critical" ]; then
        output="CRITICAL: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old!"
        errorlvl=2
      else
        output="WARNING: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old!"
        errorlvl=1
      fi
    else
      output="OK: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old."
      errorlvl=0
    fi

    # Add the performance data to the output
    output="$output | 'oldest_partition_age'=$diff_unix;$(($warning*3600));$(($critical*3600));"

  fi
fi
# Clean up the temp file, output the result and the error level
rm -f $tmpfile
echo $output
exit $errorlvl
