Nagios 3.0 hanging (10/19 CVS)

Shad L. Lords slords at lordsfam.net
Mon Oct 22 16:27:49 CEST 2007


I've had a few instances where nagios will be running but will fail to run 
checks or process anything.  I noticed it this morning and did a quick 
strace of the process to see what it was trying to do (see below).  I hope 
this will be of use to someone.

-Shad


waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063042
gettimeofday({1193063042, 429378}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063042
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063042
gettimeofday({1193063042, 684641}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063042
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063042
gettimeofday({1193063042, 936662}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
gettimeofday({1193063043, 188323}, NULL) = 0
time(NULL)                              = 1193063043
open("/var/spool/nagios", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_DIRECTORY) = -1 
EMFILE (Too many open files)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 
0666) = -1 EMFILE (Too many open files)
time(NULL)                              = 1193063043
gettimeofday({1193063043, 190670}, NULL) = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
gettimeofday({1193063043, 191882}, NULL) = 0
gettimeofday({1193063043, 192386}, NULL) = 0
open("/var/log/nagios/nagios.tmpfQ1rfM", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 
0600) = -1 EMFILE (Too many open files)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 
0666) = -1 EMFILE (Too many open files)
gettimeofday({1193063043, 193845}, NULL) = 0
time(NULL)                              = 1193063043
gettimeofday({1193063043, 194616}, NULL) = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
gettimeofday({1193063043, 195878}, NULL) = 0
gettimeofday({1193063043, 196376}, NULL) = 0
time(NULL)                              = 1193063043
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=2427, ...}) = 0
gettimeofday({1193063043, 197869}, NULL) = 0
time(NULL)                              = 1193063043
time(NULL)                              = 1193063043
time(NULL)                              = 1193063043
gettimeofday({1193063043, 199799}, NULL) = 0
umask(077)                              = 02
open("/var/spool/nagios/checkVC2i95", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 
0600) = -1 EMFILE (Too many open files)
umask(02)                               = 077
gettimeofday({1193063043, 201991}, NULL) = 0
time(NULL)                              = 1193063043
open("/usr/lib/nagios/plugins/check_latency", O_RDONLY|O_LARGEFILE) = -1 
EMFILE (Too many open files)
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, 
child_tidptr=0xb7fe3708) = -1 ENOMEM (Cannot allocate memory)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 
0666) = -1 EMFILE (Too many open files)
time(NULL)                              = 1193063043
time(NULL)                              = 1193063043
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=2427, ...}) = 0
gettimeofday({1193063043, 207687}, NULL) = 0
gettimeofday({1193063043, 208214}, NULL) = 0
gettimeofday({1193063043, 208596}, NULL) = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063043
gettimeofday({1193063043, 210367}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063043
gettimeofday({1193063043, 465052}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063043
gettimeofday({1193063043, 721250}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063043
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063043
gettimeofday({1193063043, 976644}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063044
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063044
gettimeofday({1193063044, 229116}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063044
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063044
gettimeofday({1193063044, 485005}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063044
waitpid(-1, NULL, WNOHANG)              = -1 ECHILD (No child processes)
time(NULL)                              = 1193063044
gettimeofday({1193063044, 741063}, NULL) = 0
nanosleep({0, 250000000}, NULL)         = 0
time(NULL)                              = 1193063044



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/




More information about the Developers mailing list