ndo2db problems on solaris 10 (ndoutils 1.4b7)
Michael Prochaska
michael at prochas.net
Wed Feb 27 16:14:06 CET 2008
hi!
> Funny you should mention this as we just found a fix for Solaris for
> ndoutils 1.4b3. Note that in the accept call 11 lines up from the
> bottom there is an EINTR error from accept. We've patched the call
> around the accept so that an EINTR causes a retry and this appears to
> work around the problem. See the patch attached. My guess is that this
> occurs because the signal is received at the same time that the parent
> gets a result on accept, so accept returns with this error rather than
> handling the child signal first.
>
thanks very much for the patch, it works partially => the process doesn't
die anymore but i've further problems writing to the database.
nagios.log:
[1204124795] Nagios 3.0rc2 starting... (PID=10288)
[1204124795] Local time is Wed Feb 27 16:06:35 CET 2008
[1204124795] LOG VERSION: 2.0
[1204124795] ndomod: NDOMOD 1.4b7 (10-31-2007) Copyright (c) 2005-2007
Ethan Galstad (nagios at nagios.org)
[1204124795] ndomod: Successfully connected to data sink. 0 queued items
to flush.
[1204124795] Event broker module '/usr/local/nagios/ndo/ndomod.o'
initialized successfully.
[1204124795] ndomod: Error writing to data sink! Some output may get lost...
[1204124795] Finished daemonizing... (New PID=10291)
[1204124811] ndomod: Successfully reconnected to data sink! 0 items lost,
253 queued items to flush.
[1204124811] ndomod: Error writing to data sink! Some output may get
lost. 236 queued items to flush.
[1204124827] ndomod: Successfully reconnected to data sink! 0 items lost,
316 queued items to flush.
[1204124827] ndomod: Error writing to data sink! Some output may get
lost. 299 queued items to flush.
[1204124836] Caught SIGTERM, shutting down...
[1204124836] Successfully shutdown... (PID=10291)
[1204124836] ndomod: Shutdown complete.
[1204124836] Event broker module '/usr/local/nagios/ndo/ndomod.o'
deinitialized successfully.
truss:
root at nagios_1 # truss -f -p 10003
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) (sleeping...)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) = 6
10003: fork1() = 10289
10289: fork1() (returning as child ...) = 10003
10289: getpid() = 10289 [10003]
10003: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10289: lwp_self() = 1
10003: close(6) = 0
10289: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10289: llseek(3, 0, SEEK_CUR) = 0
10289: close(3) = 0
10289: open("/usr/local/nagios/var/ndo2db.debug",
O_RDWR|O_APPEND|O_CREAT, 0666) = 3
10289: sigaction(SIGQUIT, 0xFFBFED80, 0xFFBFEE20) = 0
10289: sigaction(SIGTERM, 0xFFBFED80, 0xFFBFEE20) = 0
10289: sigaction(SIGINT, 0xFFBFED80, 0xFFBFEE20) = 0
10289: sigaction(SIGSEGV, 0xFFBFED80, 0xFFBFEE20) = 0
10289: sigaction(SIGFPE, 0xFFBFED80, 0xFFBFEE20) = 0
10289: open("/etc/netconfig", O_RDONLY|O_LARGEFILE) = 7
10289: fcntl(7, F_DUPFD, 0x00000100) Err#22 EINVAL
10289: read(7, " # p r a g m a i d e n".., 1024) = 1024
10289: read(7, " t s t p i _ c".., 1024) = 215
10289: read(7, 0x000400F8, 1024) = 0
10289: lseek(7, 0, SEEK_SET) = 0
10289: read(7, " # p r a g m a i d e n".., 1024) = 1024
10289: read(7, " t s t p i _ c".., 1024) = 215
10289: read(7, 0x000400F8, 1024) = 0
10289: close(7) = 0
10289: open("/dev/udp", O_RDONLY) = 7
10289: ioctl(7, SIOCGLIFNUM, 0xFFBFEBD4) = 0
10289: close(7) = 0
10289: getuid() = 100 [100]
10289: getuid() = 100 [100]
10289: door_info(4, 0xFFBFE8E0) = 0
10289: door_call(4, 0xFFBFE988) = 0
10289: sigaction(SIGPIPE, 0xFFBFEC40, 0xFFBFECE0) = 0
10289: so_socket(PF_INET, SOCK_STREAM, IPPROTO_IP, "", SOV_DEFAULT) = 7
10289: brk(0x00041B10) = 0
10289: brk(0x00045B10) = 0
10289: fcntl(7, F_SETFL, (no flags)) = 0
10289: fcntl(7, F_GETFL) = 2
10289: connect(7, 0xFFBFED20, 16, SOV_DEFAULT) = 0
10289: setsockopt(7, SOL_SOCKET, SO_RCVTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10289: setsockopt(7, SOL_SOCKET, SO_SNDTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10289: brk(0x00045B10) = 0
10289: brk(0x00047B10) = 0
10289: setsockopt(7, ip, 3, 0xFFBFE29C, 4, SOV_DEFAULT) = 0
10289: setsockopt(7, tcp, TCP_NODELAY, 0xFFBFE298, 4, SOV_DEFAULT) = 0
10289: setsockopt(7, SOL_SOCKET, SO_KEEPALIVE, 0xFFBFE30C, 4,
SOV_DEFAULT) = 0
10289: read(7, " 4\0\0\0\n 5 . 0 . 5 1\0".., 16384) = 56
10289: brk(0x00047B10) = 0
10289: brk(0x00049B10) = 0
10289: brk(0x00049B10) = 0
10289: brk(0x0004BB10) = 0
10289: stat64("/usr/local/mysql/share/mysql/charsets/Index.xml",
0xFFBFDB08) = 0
10289: brk(0x0004BB10) = 0
10289: brk(0x0004FB10) = 0
10289: open64("/usr/local/mysql/share/mysql/charsets/Index.xml",
O_RDONLY) = 8
10289: read(8, " < ? x m l v e r s i o".., 18173) = 18173
10289: close(8) = 0
10289: brk(0x0004FB10) = 0
10289: brk(0x00051B10) = 0
10289: brk(0x00051B10) = 0
10289: brk(0x00053B10) = 0
10289: write(7, " C\0\001\rA2\0\0\0\0\0 @".., 71) = 71
10289: read(7, " W\0\002FF1504 # 2 8 0 0".., 16384) = 91
10289: shutdown(7, SHUT_RDWR, SOV_DEFAULT) = 0
10289: close(7) = 0
10289: getpid() = 10289 [10003]
10289: open("/proc/10289/psinfo", O_RDONLY) = 7
10289: read(7, "02\0\0\0\0\0\001\0\0 ( 1".., 336) = 336
10289: close(7) = 0
10289: fstat(-1, 0xFFBFE140) Err#9 EBADF
10289: open("/dev/conslog", O_WRONLY) = 7
10289: fcntl(7, F_SETFD, 0x00000001) = 0
10289: fstat(7, 0xFFBFE140) = 0
10289: fstat(7, 0xFFBFEBA0) = 0
10289: time() = 1204124795
10289: open("/usr/share/lib/zoneinfo/Europe/Vienna", O_RDONLY) = 8
10289: fstat64(8, 0xFFBFDFD0) = 0
10289: read(8, " T Z i f\0\0\0\0\0\0\0\0".., 801) = 801
10289: close(8) = 0
10289: getpid() = 10289 [10003]
10289: putmsg(7, 0xFFBFE258, 0xFFBFE24C, 0) = 0
10289: open("/var/run/syslog_door", O_RDONLY) = 8
10289: door_info(8, 0xFFBFE190) = 0
10289: getpid() = 10289 [10003]
10289: door_call(8, 0xFFBFE178) = 0
10289: close(8) = 0
10289: read(6, "\n\n H E L L O\n P R O T".., 511) = 511
10289: Incurred fault #6, FLTBOUNDS %pc = 0xFF20738C
10289: siginfo: SIGSEGV SEGV_MAPERR addr=0x44415441
10289: Received signal #11, SIGSEGV [caught]
10289: siginfo: SIGSEGV SEGV_MAPERR addr=0x44415441
10289: schedctl() = 0xFEC9E000
10289: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10289: _exit(0)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) (sleeping...)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) = 6
10003: fork1() = 10292
10003: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10292: fork1() (returning as child ...) = 10003
10003: close(6) = 0
10292: getpid() = 10292 [10003]
10292: lwp_self() = 1
10292: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10292: llseek(3, 0, SEEK_CUR) = 0
10292: close(3) = 0
10292: open("/usr/local/nagios/var/ndo2db.debug",
O_RDWR|O_APPEND|O_CREAT, 0666) = 3
10292: sigaction(SIGQUIT, 0xFFBFED80, 0xFFBFEE20) = 0
10292: sigaction(SIGTERM, 0xFFBFED80, 0xFFBFEE20) = 0
10292: sigaction(SIGINT, 0xFFBFED80, 0xFFBFEE20) = 0
10292: sigaction(SIGSEGV, 0xFFBFED80, 0xFFBFEE20) = 0
10292: sigaction(SIGFPE, 0xFFBFED80, 0xFFBFEE20) = 0
10292: open("/etc/netconfig", O_RDONLY|O_LARGEFILE) = 7
10292: fcntl(7, F_DUPFD, 0x00000100) Err#22 EINVAL
10292: read(7, " # p r a g m a i d e n".., 1024) = 1024
10292: read(7, " t s t p i _ c".., 1024) = 215
10292: read(7, 0x000400F8, 1024) = 0
10292: lseek(7, 0, SEEK_SET) = 0
10292: read(7, " # p r a g m a i d e n".., 1024) = 1024
10292: read(7, " t s t p i _ c".., 1024) = 215
10292: read(7, 0x000400F8, 1024) = 0
10292: close(7) = 0
10292: open("/dev/udp", O_RDONLY) = 7
10292: ioctl(7, SIOCGLIFNUM, 0xFFBFEBD4) = 0
10292: close(7) = 0
10292: getuid() = 100 [100]
10292: getuid() = 100 [100]
10292: door_info(4, 0xFFBFE8E0) = 0
10292: door_call(4, 0xFFBFE988) = 0
10292: sigaction(SIGPIPE, 0xFFBFEC40, 0xFFBFECE0) = 0
10292: so_socket(PF_INET, SOCK_STREAM, IPPROTO_IP, "", SOV_DEFAULT) = 7
10292: brk(0x00041B10) = 0
10292: brk(0x00045B10) = 0
10292: fcntl(7, F_SETFL, (no flags)) = 0
10292: fcntl(7, F_GETFL) = 2
10292: connect(7, 0xFFBFED20, 16, SOV_DEFAULT) = 0
10292: setsockopt(7, SOL_SOCKET, SO_RCVTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10292: setsockopt(7, SOL_SOCKET, SO_SNDTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10292: brk(0x00045B10) = 0
10292: brk(0x00047B10) = 0
10292: setsockopt(7, ip, 3, 0xFFBFE29C, 4, SOV_DEFAULT) = 0
10292: setsockopt(7, tcp, TCP_NODELAY, 0xFFBFE298, 4, SOV_DEFAULT) = 0
10292: setsockopt(7, SOL_SOCKET, SO_KEEPALIVE, 0xFFBFE30C, 4,
SOV_DEFAULT) = 0
10292: read(7, " 4\0\0\0\n 5 . 0 . 5 1\0".., 16384) = 56
10292: brk(0x00047B10) = 0
10292: brk(0x00049B10) = 0
10292: brk(0x00049B10) = 0
10292: brk(0x0004BB10) = 0
10292: stat64("/usr/local/mysql/share/mysql/charsets/Index.xml",
0xFFBFDB08) = 0
10292: brk(0x0004BB10) = 0
10292: brk(0x0004FB10) = 0
10292: open64("/usr/local/mysql/share/mysql/charsets/Index.xml",
O_RDONLY) = 8
10292: read(8, " < ? x m l v e r s i o".., 18173) = 18173
10292: close(8) = 0
10292: brk(0x0004FB10) = 0
10292: brk(0x00051B10) = 0
10292: brk(0x00051B10) = 0
10292: brk(0x00053B10) = 0
10292: write(7, " C\0\001\rA2\0\0\0\0\0 @".., 71) = 71
10292: read(7, " W\0\002FF1504 # 2 8 0 0".., 16384) = 91
10292: shutdown(7, SHUT_RDWR, SOV_DEFAULT) = 0
10292: close(7) = 0
10292: getpid() = 10292 [10003]
10292: open("/proc/10292/psinfo", O_RDONLY) = 7
10292: read(7, "02\0\0\0\0\0\001\0\0 ( 4".., 336) = 336
10292: close(7) = 0
10292: fstat(-1, 0xFFBFE140) Err#9 EBADF
10292: open("/dev/conslog", O_WRONLY) = 7
10292: fcntl(7, F_SETFD, 0x00000001) = 0
10292: fstat(7, 0xFFBFE140) = 0
10292: fstat(7, 0xFFBFEBA0) = 0
10292: time() = 1204124811
10292: open("/usr/share/lib/zoneinfo/Europe/Vienna", O_RDONLY) = 8
10292: fstat64(8, 0xFFBFDFD0) = 0
10292: read(8, " T Z i f\0\0\0\0\0\0\0\0".., 801) = 801
10292: close(8) = 0
10292: getpid() = 10292 [10003]
10292: putmsg(7, 0xFFBFE258, 0xFFBFE24C, 0) = 0
10292: open("/var/run/syslog_door", O_RDONLY) = 8
10292: door_info(8, 0xFFBFE190) = 0
10292: getpid() = 10292 [10003]
10292: door_call(8, 0xFFBFE178) = 0
10292: close(8) = 0
10292: read(6, "\n\n H E L L O\n P R O T".., 511) = 501
10292: Incurred fault #6, FLTBOUNDS %pc = 0xFF20738C
10292: siginfo: SIGSEGV SEGV_MAPERR addr=0x52544441
10292: Received signal #11, SIGSEGV [caught]
10292: siginfo: SIGSEGV SEGV_MAPERR addr=0x52544441
10292: schedctl() = 0xFECA8000
10292: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10292: _exit(0)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) (sleeping...)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) = 6
10003: fork1() = 10294
10003: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10294: fork1() (returning as child ...) = 10003
10294: getpid() = 10294 [10003]
10294: lwp_self() = 1
10003: close(6) = 0
10294: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10294: llseek(3, 0, SEEK_CUR) = 0
10294: close(3) = 0
10294: open("/usr/local/nagios/var/ndo2db.debug",
O_RDWR|O_APPEND|O_CREAT, 0666) = 3
10294: sigaction(SIGQUIT, 0xFFBFED80, 0xFFBFEE20) = 0
10294: sigaction(SIGTERM, 0xFFBFED80, 0xFFBFEE20) = 0
10294: sigaction(SIGINT, 0xFFBFED80, 0xFFBFEE20) = 0
10294: sigaction(SIGSEGV, 0xFFBFED80, 0xFFBFEE20) = 0
10294: sigaction(SIGFPE, 0xFFBFED80, 0xFFBFEE20) = 0
10294: open("/etc/netconfig", O_RDONLY|O_LARGEFILE) = 7
10294: fcntl(7, F_DUPFD, 0x00000100) Err#22 EINVAL
10294: read(7, " # p r a g m a i d e n".., 1024) = 1024
10294: read(7, " t s t p i _ c".., 1024) = 215
10294: read(7, 0x000400F8, 1024) = 0
10294: lseek(7, 0, SEEK_SET) = 0
10294: read(7, " # p r a g m a i d e n".., 1024) = 1024
10294: read(7, " t s t p i _ c".., 1024) = 215
10294: read(7, 0x000400F8, 1024) = 0
10294: close(7) = 0
10294: open("/dev/udp", O_RDONLY) = 7
10294: ioctl(7, SIOCGLIFNUM, 0xFFBFEBD4) = 0
10294: close(7) = 0
10294: getuid() = 100 [100]
10294: getuid() = 100 [100]
10294: door_info(4, 0xFFBFE8E0) = 0
10294: door_call(4, 0xFFBFE988) = 0
10294: sigaction(SIGPIPE, 0xFFBFEC40, 0xFFBFECE0) = 0
10294: so_socket(PF_INET, SOCK_STREAM, IPPROTO_IP, "", SOV_DEFAULT) = 7
10294: brk(0x00041B10) = 0
10294: brk(0x00045B10) = 0
10294: fcntl(7, F_SETFL, (no flags)) = 0
10294: fcntl(7, F_GETFL) = 2
10294: connect(7, 0xFFBFED20, 16, SOV_DEFAULT) = 0
10294: setsockopt(7, SOL_SOCKET, SO_RCVTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10294: setsockopt(7, SOL_SOCKET, SO_SNDTIMEO, 0xFFBFE1B8, 8, SOV_DEFAULT)
Err#99 ENOPROTOOPT
10294: brk(0x00045B10) = 0
10294: brk(0x00047B10) = 0
10294: setsockopt(7, ip, 3, 0xFFBFE29C, 4, SOV_DEFAULT) = 0
10294: setsockopt(7, tcp, TCP_NODELAY, 0xFFBFE298, 4, SOV_DEFAULT) = 0
10294: setsockopt(7, SOL_SOCKET, SO_KEEPALIVE, 0xFFBFE30C, 4,
SOV_DEFAULT) = 0
10294: read(7, " 4\0\0\0\n 5 . 0 . 5 1\0".., 16384) = 56
10294: brk(0x00047B10) = 0
10294: brk(0x00049B10) = 0
10294: brk(0x00049B10) = 0
10294: brk(0x0004BB10) = 0
10294: stat64("/usr/local/mysql/share/mysql/charsets/Index.xml",
0xFFBFDB08) = 0
10294: brk(0x0004BB10) = 0
10294: brk(0x0004FB10) = 0
10294: open64("/usr/local/mysql/share/mysql/charsets/Index.xml",
O_RDONLY) = 8
10294: read(8, " < ? x m l v e r s i o".., 18173) = 18173
10294: close(8) = 0
10294: brk(0x0004FB10) = 0
10294: brk(0x00051B10) = 0
10294: brk(0x00051B10) = 0
10294: brk(0x00053B10) = 0
10294: write(7, " C\0\001\rA2\0\0\0\0\0 @".., 71) = 71
10294: read(7, " W\0\002FF1504 # 2 8 0 0".., 16384) = 91
10294: shutdown(7, SHUT_RDWR, SOV_DEFAULT) = 0
10294: close(7) = 0
10294: getpid() = 10294 [10003]
10294: open("/proc/10294/psinfo", O_RDONLY) = 7
10294: read(7, "02\0\0\0\0\0\001\0\0 ( 6".., 336) = 336
10294: close(7) = 0
10294: fstat(-1, 0xFFBFE140) Err#9 EBADF
10294: open("/dev/conslog", O_WRONLY) = 7
10294: fcntl(7, F_SETFD, 0x00000001) = 0
10294: fstat(7, 0xFFBFE140) = 0
10294: fstat(7, 0xFFBFEBA0) = 0
10294: time() = 1204124827
10294: open("/usr/share/lib/zoneinfo/Europe/Vienna", O_RDONLY) = 8
10294: fstat64(8, 0xFFBFDFD0) = 0
10294: read(8, " T Z i f\0\0\0\0\0\0\0\0".., 801) = 801
10294: close(8) = 0
10294: getpid() = 10294 [10003]
10294: putmsg(7, 0xFFBFE258, 0xFFBFE24C, 0) = 0
10294: open("/var/run/syslog_door", O_RDONLY) = 8
10294: door_info(8, 0xFFBFE190) = 0
10294: getpid() = 10294 [10003]
10294: door_call(8, 0xFFBFE178) = 0
10294: close(8) = 0
10294: read(6, "\n\n H E L L O\n P R O T".., 511) = 364
10294: Incurred fault #6, FLTBOUNDS %pc = 0xFF20738C
10294: siginfo: SIGSEGV SEGV_MAPERR addr=0x52544441
10294: Received signal #11, SIGSEGV [caught]
10294: siginfo: SIGSEGV SEGV_MAPERR addr=0x52544441
10294: schedctl() = 0xFECAA000
10294: lwp_sigmask(SIG_SETMASK, 0x00000000, 0x00000000) = 0xFFBFFEFF
[0x0000FFFF]
10294: _exit(0)
10003: accept(5, 0xFFBFF554, 0xFFBFF564, SOV_DEFAULT) (sleeping...)
any ideas?
best regards,
michael
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
More information about the Developers
mailing list