[Nagiosplug-help] Host monitoring
Grant Lowe
glowe at sbcglobal.net
Mon Oct 27 21:42:24 CET 2008
Hi Mark,
Still receiving the notifications and still can't explain it. Here's the data you asked for.
First the host definition.
define host {
host_name blarney
alias blarney
display_name Blarney
address 172.20.8.215
hostgroups solaris-servers
check_command check-host-alive
initial_state o
max_check_attempts 5
check_interval 3
retry_interval 3600
active_checks_enabled 0
passive_checks_enabled 1
check_period 24x7
obsess_over_host 0
check_freshness 0
event_handler_enabled 0
flap_detection_enabled 0
flap_detection_options o,d,u
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 0
contacts glowe
notification_interval 300
notification_period 24x7
notification_options d,u,r,f,s
notifications_enabled 1
# stalking_options
}
Here are the service definitions:
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Ping
check_command check_ping!200.0,20%!600.0,60%
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Secure Shell
check_command check_ssh
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description SMTP
check_command check_smtp
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description POP
check_command check_pop
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check ROOT Disk Free Space
check_command check_nrpe!check_root
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check USR Disk Free Space
check_command check_nrpe!check_usr
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check VAR Disk Free Space
check_command check_nrpe!check_var
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check Swap Space
check_command check_nrpe!check_swap
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check Remote Mail Queue
check_command check_nrpe!check_mailq
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check Remote Number of Processes
check_command check_nrpe!check_remote_procs
max_check_attempts 5
notification_interval 60
check_period 24x7
}
define service{
host_name blarney
hostgroup_name solaris-servers
service_description Check Remote Zombie Processes
check_command check_nrpe!check_zombie_procs
max_check_attempts 5
notification_interval 60
check_period 24x7
}
Here's the template:
define contact{
name generic-contact ; The name of this contact template
service_notification_period 24x7 ; service notifications can be sent anytime
host_notification_period 24x7 ; host notifications can be sent anytime
service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events
service_notification_commands notify-service-by-email ; send service notifications via email
host_notification_commands notify-host-by-email ; send host notifications via email
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE!
}
# Generic host definition template - This is NOT a real host, just a template!
define host{
name generic-host ; The name of this host template
notifications_enabled 1 ; Host notifications are enabled
event_handler_enabled 1 ; Host event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
notification_period 24x7 ; Send host notifications at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
}
define host{
name solaris-servers ; The name of this host template
use generic-host ; This template inherits other values from the generic-host template
check_period 24x7 ; By default, Solaris hosts are checked round the clock
check_interval 5 ; Actively check the host every 5 minutes
retry_interval 1 ; Schedule host check retries at 1 minute intervals
max_check_attempts 10 ; Check each Solaris host 10 times (max)
check_command check-host-alive ; Default command to check Solaris hosts
notification_period workhours ; Solaris admins hate to be woken up, so we only notify during the day
; Note that the notification_period variable is being overridden from
; the value that is inherited from the generic-host template!
notification_interval 120 ; Resend notifications every 2 hours
notification_options d,u,r ; Only send notifications for specific host states
contact_groups solaris-admins ; Notifications get sent to the admins by default
hostgroups solaris-server
register 0
}
Here are the logs for today:
/usr/local/nagios/share/ignoramus/CFG> cat /usr/local/nagios/var/nagios.log
[1225090800] CURRENT HOST STATE: blarney;UP;HARD;1;PING OK - Packet loss = 0%, RTA = 0.65 ms
[1225090800] CURRENT SERVICE STATE: blarney;Check ROOT Disk Free Space;OK;HARD;1;DISK OK - free space: / 797 MB (40% inode=81%):
[1225090800] CURRENT SERVICE STATE: blarney;Check Remote Mail Queue;OK;HARD;1;OK: mailq is empty
[1225090800] CURRENT SERVICE STATE: blarney;Check Remote Number of Processes;CRITICAL;HARD;5;PROCS CRITICAL: 1341 processes
[1225090800] CURRENT SERVICE STATE: blarney;Check Remote Zombie Processes;OK;HARD;1;PROCS OK: 1 process with STATE = Z
[1225090800] CURRENT SERVICE STATE: blarney;Check Swap Space;OK;HARD;1;SWAP OK - 100% free (16352 MB out of 16381 MB)
[1225090800] CURRENT SERVICE STATE: blarney;Check USR Disk Free Space;OK;HARD;1;DISK OK - free space: /usr 583 MB (29% inode=84%):
[1225090800] CURRENT SERVICE STATE: blarney;Check VAR Disk Free Space;OK;HARD;1;DISK OK - free space: /var 2036 MB (50% inode=95%):
[1225090800] CURRENT SERVICE STATE: blarney;POP;OK;HARD;1;POP OK - 0.012 second response time on port 110 [+OK Qpopper (version 4.0.5) at blarney starting.]
[1225090800] CURRENT SERVICE STATE: blarney;Ping;OK;HARD;1;PING OK - Packet loss = 0%, RTA = 1.27 ms
[1225090800] CURRENT SERVICE STATE: blarney;SMTP;OK;HARD;1;SMTP OK - 0.011 sec. response time
[1225090800] CURRENT SERVICE STATE: blarney;Secure Shell;OK;HARD;1;SSH OK - OpenSSH_4.3 (protocol 1.99)
[1225115621] SERVICE ALERT: blarney;Check Remote Zombie Processes;CRITICAL;SOFT;1;PROCS CRITICAL: 41 processes with STATE = Z
[1225115681] SERVICE ALERT: blarney;Check Remote Zombie Processes;CRITICAL;SOFT;2;PROCS CRITICAL: 32 processes with STATE = Z
[1225115741] SERVICE ALERT: blarney;Check Remote Zombie Processes;CRITICAL;SOFT;3;PROCS CRITICAL: 36 processes with STATE = Z
[1225115801] SERVICE ALERT: blarney;Check Remote Zombie Processes;CRITICAL;SOFT;4;PROCS CRITICAL: 19 processes with STATE = Z
[1225115861] SERVICE ALERT: blarney;Check Remote Zombie Processes;CRITICAL;HARD;5;PROCS CRITICAL: 29 processes with STATE = Z
[1225115861] SERVICE NOTIFICATION: glowe;blarney;Check Remote Zombie Processes;CRITICAL;notify-host-by-email;PROCS CRITICAL: 29 processes with STATE = Z
[1225119461] SERVICE NOTIFICATION: glowe;blarney;Check Remote Zombie Processes;CRITICAL;notify-host-by-email;PROCS CRITICAL: 43 processes with STATE = Z
[1225122761] SERVICE ALERT: blarney;Check Remote Zombie Processes;OK;HARD;5;PROCS OK: 2 processes with STATE = Z
[1225122761] SERVICE NOTIFICATION: glowe;blarney;Check Remote Zombie Processes;OK;notify-host-by-email;PROCS OK: 2 processes with STATE = Z
nagios-test /usr/local/nagios/share/ignoramus/CFG>
I think that's what you want.
Please let me know if you need any more data.
Thanks so much!
----- Original Message ----
From: Marc Powell <marc at ena.com>
To: nagios-user Mailinglist <nagios-users at lists.sourceforge.net>
Sent: Monday, October 27, 2008 7:26:27 AM
Subject: Re: [Nagios-users] [Nagiosplug-help] Host monitoring
On Oct 24, 2008, at 10:45 AM, Grant Lowe wrote:
> Hi Marc,
>
> This is still happening. Any more thoughts or ideas?
You're still receiving notifications? Please post the current host/
service config (including template) as well as the NOTIFICATION log
entry. Verify that you do not have multiple nagios daemons running
concurrently.
--
Marc
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Nagios-users mailing list
Nagios-users at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nagios-users
::: Please include Nagios version, plugin version (-v) and OS when reporting any issue.
::: Messages without supporting info will risk being sent to /dev/null
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Nagios-users mailing list
Nagios-users at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nagios-users
::: Please include Nagios version, plugin version (-v) and OS when reporting any issue.
::: Messages without supporting info will risk being sent to /dev/null
More information about the Users
mailing list