Nagios + SMS = TRUE?
Mark Ferlatte
ferlatte at cryptio.net
Fri Sep 26 23:56:22 CEST 2003
James Turnbull said on Fri, Sep 26, 2003 at 10:59:18AM +1000:
> >5 pages in 5 minutes; any more than that, we don't send them, and just send
> a
> ">Too many pages" page, which everyone understands to mean "Bad things have
> >happened".
>
> This is a great idea - we have just started looking at this ourselves.
> Would you be adverse to sharing the script?
Not at all. You'll have to hack it into shape for your installation; it
definately falls into the hack category.
If I get a chance, I'm going to clean it up so that I can use it in other
places, but that seems less likely with each day. :)
M
-------------- next part --------------
#!/usr/bin/python
import getopt
import smtplib
import traceback
import os
import sys
import re
import time
from types import *
#####################################
#
# send_mail_throttled.py
#
#
# This script is a mailer script that "throttles" mail sent to devices
# which we don't want to flood with mail, such as cell phones.
#
# Here's the design:
#
# If we get 5 pages in a one minute window:
# - block all pages, accumulate to a web page
# - send a page saying that we're stopping pages
# - wait 5 minutes before clearing the history and restarting paging.
#
# We need to maintain a history on a per-e-mail level
#
#
#
class WriteLog:
def __init__(self, stream):
self.stream = stream
self.mLogFile = open("/var/tmp/spt_log","a")
def write(self, text):
self.mLogFile.write(text)
self.stream.write(text)
def flush(self):
self.stream.flush()
sys.stdout = WriteLog(sys.stdout)
gMaxMessages = 5
gMinTime = 300
gBlockedTimeout = 300
def main():
try:
print "Time:",time.ctime(time.time())
to_addr = 'root at localhost'
subj = 'No subject'
mesg = 'No message'
options = ''
long_options = ['to=','subj=', 'mesg=']
opts, args = getopt.getopt(sys.argv[1:], options, long_options)
print str(sys.argv)
for o, a in opts:
if o == '--to':
to_addr = a
elif o == '--subj':
subj = a
elif o == '--mesg':
mesg = a
else:
raise "Unknown argument " + o
hist_filename = "/var/tmp/%s.mailhist" % to_addr
#
# Clean up the subject and message to get rid of excess verbiage
#
subj = subj.replace("ACKNOWLEDGEMENT","ACK")
subj = subj.replace("CRITICAL","CRIT")
subj = subj.replace("WARNING","WARN")
mesg = mesg.replace("\\n","\n")
mesg = mesg.replace("ACKNOWLEDGEMENT","ACK")
mesg = mesg.replace("CRITICAL","CRIT")
mesg = mesg.replace("WARNING","WARN")
# Check and see if a mail file exists
if not os.path.exists(hist_filename):
# We don't have a history file
# Create one, and then go and send
print "No history file",hist_filename+"!"
print "Sending page!"
send_mail_log("nagios at localhost",
to_addr,
subj,
mesg)
return
# We do have a history file
# Read out the history...
hist_file = open(hist_filename, 'r')
lines = hist_file.readlines()
mail_times = []
block_time = 0
time_re = re.compile("^(\d+)$")
block_re = re.compile("^(\d+)Blocked")
for line in lines:
re_match = time_re.match(line)
if re_match:
mail_times.append(int(re_match.group(1)))
else:
# We've been blocked for pager volume
re_match = block_re.match(line)
if re_match:
print "Blocked!"
block_time = int(re_match.group(1))
hist_file.close()
block_age = int(time.time()) - block_time
if block_time:
print "Blocked", block_age, "seconds ago!"
if block_age > gBlockedTimeout:
# It's been long enough, start sending pages again
print "Clearing block, starting mail again"
mail_times = []
else:
# We need to store it on the web page
print "Ignoring blocked message!"
return
mail_times.append(int(time.time()))
mail_times = mail_times[-gMaxMessages:]
print mail_times
if len(mail_times) < gMaxMessages:
print "Less than", gMaxMessages, "alerts, sending!"
try:
hist_file = open(hist_filename, 'w')
except:
hist_file = None
if hist_file:
for mail_time in mail_times[-gMaxMessages:-1]:
time_str = "%d\n" % mail_time
hist_file.write(time_str)
hist_file.truncate()
hist_file.close()
send_mail_log("nagios at localhost",
to_addr,
subj,
mesg)
return
end_time = mail_times[-1]
start_time = mail_times[0]
total_time = end_time - start_time
print gMaxMessages, "messages in", total_time, "seconds!"
if total_time > gMinTime:
hist_file = open(hist_filename, 'w')
for mail_time in mail_times[-gMaxMessages:-1]:
time_str = "%d\n" % mail_time
hist_file.write(time_str)
hist_file.truncate()
hist_file.close()
send_mail_log("nagios at localhost",
to_addr,
subj,
mesg)
return
# Uh oh, too many. Send out the warning alert and set
# us into archive and wait mode.
print "Too many messages!"
hist_file = open(hist_filename, 'w')
time_str = "%dBlocked\n" % int(time.time())
hist_file.write(time_str)
hist_file.truncate()
hist_file.close()
send_mail_log("nagios at localhost",
to_addr,
"",
"Too many pages, blocking for 5 minutes!")
except:
traceback.print_exc()
fm_list = traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback)
def send_mail_log(from_addr, to_addr, subject, body):
send_mail(from_addr, to_addr, subject, body)
hist_filename = "/var/tmp/%s.mailhist" % to_addr
try:
hist_file = open(hist_filename, 'a')
except:
hist_file = None
if hist_file:
time_str = "%d\n" % time.time()
hist_file.write(time_str)
hist_file.close()
def send_mail(from_addr, to_addrs, subject, body):
"""
Send mail using SMTP
"""
to_string = ''
if type(to_addrs) is StringType:
to_string = to_addrs
elif type(to_addrs) is ListType:
for count in range(0, len(to_addrs)):
to_string += to_addrs[count]
if count < len(to_addrs) - 1:
to_string += ', '
msg = "From: %(from_addr)s\r\nTo: %(to_string)s\r\n"\
"Subject: %(subject)s\r\n\r\n" % vars()
msg += body
server = smtplib.SMTP('mail')
server.sendmail(from_addr, to_addrs, msg)
server.quit()
main()
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
URL: <https://www.monitoring-lists.org/archive/users/attachments/20030926/2089e672/attachment.sig>
More information about the Users
mailing list