#!/usr/bin/env python
#
#   Copyright Hari Sekhon 2007
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# 

"""Nagios plugin to test the status of all arrays on all Lsi MegaRaid
controllers on the local machine. Uses the megarc.bin program written by Lsi to
get the status of all arrays on all local Lsi MegaRaid controllers. Expects the
megarc.bin program to be in the same directory as this plugin"""

__version__ = 0.8

import os
import sys
import commands
from optparse import OptionParser

# Standard Nagios return codes
OK       = 0
WARNING  = 1
CRITICAL = 2
UNKNOWN  = 3

SRCDIR   = os.path.dirname(sys.argv[0])
BIN      = SRCDIR + "/megarc.bin"
MEGADEV  = "/dev/megadev0"


def end(status, message):
    """exits the plugin with first arg as the return code and the second
    arg as the message to output"""
        
    if status == OK:
        print "RAID OK: %s" % message
        sys.exit(OK)
    elif status == WARNING:
        print "RAID WARNING: %s" % message
        sys.exit(WARNING)
    elif status == CRITICAL:
        print "RAID CRITICAL: %s" % message
        sys.exit(CRITICAL)
    else:
        print "UNKNOWN: %s" % message
        sys.exit(UNKNOWN)


def make_megadev(devicenode):
    """Creates the device node needed for the Lsi utility to work
    (usually /dev/megadev0)"""

    try:
        devices = open("/proc/devices", "r")
        lines   = devices.read()
        devices.close()
    except IOError, error:
        end(UNKNOWN, "Error reading /proc/devices while trying to create " \
                   + "device node '%s' - %s" % (devicenode, error))
    device = ""
    for line in lines.split("\n"):
        line = line.split()
        if len(line) > 1:
            major_number = line[0]
            device       = line[1]
            if device == "megadev":
                break

    if device != "megadev":
        end(UNKNOWN, "Unable to create device node /dev/megadev0. Megadev " \
                   + "not found in /proc/devices. Please make sure you have " \
                   + "an Lsi MegaRaid card detected by your kernel first")
    cmd = "mknod /dev/megadev0 c %s 2" % major_number
    print >> sys.stderr, "running in shell: %s" % cmd
    try:
        result, output = commands.getstatusoutput(cmd) 
        if result != 0:
            end(UNKNOWN, "Error making device node '%s' - %s" \
                                                        % (devicenode, output))
        print >> sys.stderr, "%s" % output
        print >> sys.stderr, "now continuing with raid checks..."
    except OSError, error:
        end(UNKNOWN, "Error making '%s' device node - %s" % (devicenode, error))


if os.geteuid() != 0:
    end(UNKNOWN, "You must be root to run this plugin")

if not os.path.exists(BIN):
    end(UNKNOWN, "Lsi MegaRaid utility '%s' was not found" % BIN)

if not os.access(BIN, os.X_OK):
    end(UNKNOWN, "Lsi MegaRaid utility '%s' is not executable" % BIN)

if not os.path.exists(MEGADEV):
    print >> sys.stderr, "Megaraid device node not found (possible first " \
                       + "run?), creating it now..." 
    make_megadev(MEGADEV)


def run(args):
    """run megarc.bin util with passed in args and return output"""
    if args == "" or args == None:
        print "UNKNOWN: internal python error",
        print "- no cmd supplied for Lsi MegaRaid utility"
        sys.exit(UNKNOWN)
    cmd = "%s %s -nolog" % (BIN, args)
    result, output = commands.getstatusoutput(cmd)
    lines = output.split("\n")
    if result != 0:
        if lines[0][-25:] == "No such file or directory":
            end(UNKNOWN, "Cannot find Lsi MegaRaid utility '%s'" % BIN)
        elif len(lines) == 0:
            end(UNKNOWN, "No output from Lsi MegaRaid utility")
        elif len(lines) < 13:
            print >> sys.stderr, "Error running '%s':" % cmd
            print >> sys.stderr, "%s" % output
            end(UNKNOWN, "Output from Lsi MegaRaid utility is too short, "
                       + "please inspect code")
        else:
            end(UNKNOWN, "Error using MegaRaid utility - %s" \
                                                    % output.replace("\n", "|"))
    
    return lines


def get_controllers(verbosity):
    """finds and returns a list of all controllers on the local machine"""

    lines = run("-AllAdpInfo")

    if lines[11].strip() == "No Adapters Found":
        end(WARNING, "No LSI adapters were found on this machine")

    controllers = []
    controller_lines = lines[12:]
    for line in controller_lines:
        try:
            controller = int(line.split("\t")[1])
        except OSError,error:
            end(UNKNOWN, "Exception occurred in code - %s" % str(error))
        controllers.append(controller)

    if len(controllers) == 0:
        end(WARNING, "No LSI controllers were found on this machine")

    if verbosity >= 2:
        print "Found %s controller(s)" % len(controllers)
   
    return controllers


def test_raid(verbosity, no_summary=False):
    """tests all raid arrays on all Lsi controllers found on local machine
    and returns status code"""

    status = OK 
    message = ""
    number_arrays = 0
    non_optimal_arrays = 0
    controllers = get_controllers(verbosity)
    number_controllers = len(controllers)
    for controller in controllers:
        detailed_output = run("-dispCfg -a%s" % controller )
        if verbosity >= 3:
            for line in detailed_output:
                print "%s" % line
            print
        array_details = {}
        for line in detailed_output:
            if "Status:" in line:
                state = line.split(":")[-1][1:-1]
                logical_drive = line.split()[3][:-1]
                array_details[logical_drive] = [state]  
            if "RaidLevel:" in line:
                raid_level = line.split()[3]
                array_details[logical_drive].append(raid_level)
       
        if len(array_details) == 0:
            message += "No arrays found on controller %s. " % controller
            if status == OK:
                status = WARNING
            continue

        array_keys = array_details.keys()
        array_keys.sort()
        number_arrays += len(array_keys)

        for drive in array_keys:
            state = array_details[drive][0]
            if state != "OPTIMAL":
                non_optimal_arrays += 1
                raid_level = array_details[drive][1]
                # The Array number here is incremented by one because of the
                # inconsistent way that the LSI tools count arrays. 
                # This brings it back in line with the view in the bios 
                # and from megamgr.bin where the array counting starts at 
                # 1 instead of 0
                message += 'Array %s status is "%s"' % (int(drive)+1, state)
                message += '(Raid-%s on adapter %s), ' \
                                                  % (raid_level, controller)
                status = CRITICAL


    message = add_status_summary(status, \
                          message, \
                          non_optimal_arrays)

    message = message.rstrip(" ")
    message = message.rstrip(",")

    if not no_summary:
        message = add_checked_summary(message, \
                              number_arrays, \
                              number_controllers)
    return status, message


def add_status_summary(status, message, non_optimal_arrays):
    """Add initial summary information on the overall state of the arrays"""

    if status == OK:
        message += "All arrays OK"
    else:
        if non_optimal_arrays == 1:
            message = "%s array not OK - " % non_optimal_arrays \
                    + message
        else:
            message = "%s arrays not OK - " % non_optimal_arrays \
                    + message
       
    return message


def add_checked_summary(message, number_arrays, number_controllers):
    """ Adds ending summary information on how many arrays were checked"""

    message += " [%s array" % number_arrays
    if number_arrays != 1:
        message += "s"
    message += " checked on %s controller" % number_controllers

    if number_controllers == 1:
        message += "]"
    else:
        message += "s]"
    
    return message
    

def main():
    """parses args and calls func to test raid arrays"""

    parser = OptionParser()
    parser.add_option( "-n",
                       "--no-summary",
                       action="store_true",
                       dest="no_summary",
                       help="Do not display the number of arrays " \
                          + "checked. By default the number of arrays " \
                          + "checked are printed at the end of the " \
                          + "line. This is useful information and helps to " \
                          + "know that they are detected properly")

    parser.add_option(  "-v",
                        "--verbose",
                        action="count",
                        dest="verbosity",
                        help="Verbose mode. Good for testing plugin. By \
default only one result line is printed as per Nagios standards")

    parser.add_option( "-V",
                        "--version",
                        action = "store_true",
                        dest = "version",
                        help = "Print version number and exit" )

    (options, args) = parser.parse_args()

    no_summary = options.no_summary
    verbosity  = options.verbosity
    version    = options.version

    if args:
        parser.print_help()
        sys.exit(UNKNOWN)

    if version:
        print __version__
        sys.exit(OK)

    result, message = test_raid(verbosity, no_summary)

    end(result, message)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print "Caught Control-C..."
        sys.exit(CRITICAL)
