Nagios 3.0 hanging (10/19 CVS)
Shad L. Lords
slords at lordsfam.net
Mon Oct 22 16:27:49 CEST 2007
I've had a few instances where nagios will be running but will fail to run
checks or process anything. I noticed it this morning and did a quick
strace of the process to see what it was trying to do (see below). I hope
this will be of use to someone.
-Shad
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063042
gettimeofday({1193063042, 429378}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063042
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063042
gettimeofday({1193063042, 684641}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063042
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063042
gettimeofday({1193063042, 936662}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
gettimeofday({1193063043, 188323}, NULL) = 0
time(NULL) = 1193063043
open("/var/spool/nagios", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_DIRECTORY) = -1
EMFILE (Too many open files)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE,
0666) = -1 EMFILE (Too many open files)
time(NULL) = 1193063043
gettimeofday({1193063043, 190670}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
gettimeofday({1193063043, 191882}, NULL) = 0
gettimeofday({1193063043, 192386}, NULL) = 0
open("/var/log/nagios/nagios.tmpfQ1rfM", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE,
0600) = -1 EMFILE (Too many open files)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE,
0666) = -1 EMFILE (Too many open files)
gettimeofday({1193063043, 193845}, NULL) = 0
time(NULL) = 1193063043
gettimeofday({1193063043, 194616}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
gettimeofday({1193063043, 195878}, NULL) = 0
gettimeofday({1193063043, 196376}, NULL) = 0
time(NULL) = 1193063043
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=2427, ...}) = 0
gettimeofday({1193063043, 197869}, NULL) = 0
time(NULL) = 1193063043
time(NULL) = 1193063043
time(NULL) = 1193063043
gettimeofday({1193063043, 199799}, NULL) = 0
umask(077) = 02
open("/var/spool/nagios/checkVC2i95", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE,
0600) = -1 EMFILE (Too many open files)
umask(02) = 077
gettimeofday({1193063043, 201991}, NULL) = 0
time(NULL) = 1193063043
open("/usr/lib/nagios/plugins/check_latency", O_RDONLY|O_LARGEFILE) = -1
EMFILE (Too many open files)
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0xb7fe3708) = -1 ENOMEM (Cannot allocate memory)
open("/var/log/nagios/nagios.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE,
0666) = -1 EMFILE (Too many open files)
time(NULL) = 1193063043
time(NULL) = 1193063043
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=2427, ...}) = 0
gettimeofday({1193063043, 207687}, NULL) = 0
gettimeofday({1193063043, 208214}, NULL) = 0
gettimeofday({1193063043, 208596}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063043
gettimeofday({1193063043, 210367}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063043
gettimeofday({1193063043, 465052}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063043
gettimeofday({1193063043, 721250}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063043
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063043
gettimeofday({1193063043, 976644}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063044
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063044
gettimeofday({1193063044, 229116}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063044
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063044
gettimeofday({1193063044, 485005}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063044
waitpid(-1, NULL, WNOHANG) = -1 ECHILD (No child processes)
time(NULL) = 1193063044
gettimeofday({1193063044, 741063}, NULL) = 0
nanosleep({0, 250000000}, NULL) = 0
time(NULL) = 1193063044
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
More information about the Developers
mailing list