Need help detecting where signal exception originates from

Hello!

Having a problem with my ruby program.
Periodically I get SIGALARM exceptions in my application, which crash my application.
I managed to stop app from crashing using Kernel.trap
Now I want to find the root cause of this exception, so that I don’t need Kernel.trap.
Most likely it’s from some C library, but I need to find out which is it(to fix it, or at least file a bug)
Unfortunately I can’t reliably reproduce this error, so the only way was to use gdb/strace for this, but still can’t seem to find the problem…
Any tips for how I should approach this?

Ruby version - 2.6.5
Linux - Ubuntu 18.04.3

Strace backtrace

[pid 7154] 17:48:08.736960 write(5, “\1\0\0\0\0\0\0\0”, 8) = 8
[pid 7154] 17:48:08.737030 futex(0x7f84c9a38e88, FUTEX_WAIT_PRIVATE, 0, {tv_sec=2, tv_nsec=999999550} <unfinished …>
[pid 7121] 17:48:08.737087 <… futex resumed> ) = 1
[pid 7121] 17:48:08.737134 read(5, “\2\0\0\0\0\0\0\0”, 8) = 8
[pid 7121] 17:48:08.737194 read(5, 0x7ffd20b226d0, 8) = -1 EAGAIN (Resource temporarily unavailable)
[pid 7121] 17:48:08.737400 rt_sigaction(SIGINT, {sa_handler=SIG_IGN, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f84f0331f20}, {sa_handler=0x7f84c9196a50, sa_mask=[INT], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f84f0331f20}, 8) = 0
[pid 7121] 17:48:08.753136 close(10) = 0
[pid 7121] 17:48:08.753331 close(11) = 0
[pid 7121] 17:48:08.753453 close(22) = 0
[pid 7121] 17:48:08.753573 close(23) = 0
[pid 7121] 17:48:08.753800 close(19) = 0
[pid 7121] 17:48:08.753927 close(20) = 0
[pid 7121] 17:48:08.754049 close(21) = 0
[pid 7121] 17:48:08.754295 close(12) = 0
[pid 7121] 17:48:08.754661 close(28) = 0
[pid 7121] 17:48:08.754890 close(13) = 0
[pid 7121] 17:48:08.755021 close(17) = 0
[pid 7121] 17:48:08.755175 close(18) = 0
[pid 7121] 17:48:08.756596 close(14) = 0
[pid 7121] 17:48:08.756763 close(15) = 0
[pid 7121] 17:48:08.756894 close(16) = 0
[pid 7121] 17:48:08.757274 close(4) = 0
[pid 7121] 17:48:08.757413 close(8) = 0
[pid 7121] 17:48:08.757543 close(9) = 0
[pid 7121] 17:48:08.757824 close(3) = 0
[pid 7121] 17:48:08.758035 getpid() = 7121
[pid 7121] 17:48:08.758167 getpid() = 7121
[pid 7121] 17:48:08.758296 timer_delete(0) = 0
[pid 7121] 17:48:08.761338 munmap(0x7f84cb4c4000, 3149824) = 0
[pid 7121] 17:48:08.763335 rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=[ALRM], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f84f0331f20}, {sa_handler=0x7f84c940edc0, sa_mask=[ALRM], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f84f0331f20}, 8) = 0
[pid 7121] 17:48:08.763511 rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
[pid 7121] 17:48:08.763658 getpid() = 7121
[pid 7121] 17:48:08.763785 gettid() = 7121
[pid 7121] 17:48:08.763914 tgkill(7121, 7121, SIGALRM) = 0
[pid 7121] 17:48:08.764046 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid 7121] 17:48:08.764178 — SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=7121, si_uid=1001} —
[pid 7154] 17:48:08.764262 <… futex resumed>) = ?
[pid 7154] 17:48:08.764349 +++ killed by SIGALRM +++
[pid 7153] 17:48:08.764376 <… futex resumed>) = ?
[pid 7153] 17:48:08.764432 +++ killed by SIGALRM +++
17:48:08.776268 +++ killed by SIGALRM +++

GDB backtrace

#0 esignal_init (argc=1, argv=0x7fff805e9ea8, self=93874583077120) at signal.c:338
#1 0x00007fc4df30a3e6 in vm_call0_cfunc_with_frame (ci=0x7fff805e9c20,
cc=0x7fff805e9c50, argv=0x7fff805e9ea8, calling=0x7fff805e9c30, ec=0x5560d911d4b8)
at vm_eval.c:86
#2 vm_call0_cfunc (argv=0x7fff805e9ea8, cc=0x7fff805e9c50, ci=0x7fff805e9c20,
calling=0x7fff805e9c30, ec=0x5560d911d4b8) at vm_eval.c:100
#3 vm_call0_body (argv=0x7fff805e9ea8, cc=0x7fff805e9c50, ci=0x7fff805e9c20,
calling=0x7fff805e9c30, ec=0x5560d911d4b8) at vm_eval.c:131
#4 rb_vm_call0 (ec=0x5560d911d4b8, recv=recv@entry=93874583077120, id=id@entry=3025,
argc=argc@entry=1767997196, argv=0x7fff805e9ea8, argv@entry=0xbd1,
me=) at vm_eval.c:60
#5 0x00007fc4df30c221 in rb_call0 (scope=CALL_FCALL, self=,
argv=0xbd1, argc=1767997196, mid=3025, recv=93874583077120, ec=)
at vm_eval.c:308
#6 rb_call (scope=CALL_FCALL, argv=0x1, argc=1767997196, mid=3025,
recv=93874583077120) at vm_eval.c:601
#7 rb_funcallv (recv=recv@entry=93874583077120, mid=mid@entry=3025, argc=1767997196,
argc@entry=1, argv=argv@entry=0x7fff805e9ea8) at vm_eval.c:823
#8 0x00007fc4df167946 in rb_obj_call_init (obj=obj@entry=93874583077120,
argc=argc@entry=1, argv=argv@entry=0x7fff805e9ea8) at eval.c:1628
#9 0x00007fc4df1f3e3f in rb_class_s_new (klass=93874447399520, argv=0x7fff805e9ea8,
argc=1) at object.c:2187
#10 rb_class_new_instance (argc=1, argv=0x7fff805e9ea8, klass=93874447399520)
at object.c:2208
#11 0x00007fc4df30a3e6 in vm_call0_cfunc_with_frame (ci=0x7fff805e9d70,
cc=0x7fff805e9da0, argv=0x7fff805e9ea8, calling=0x7fff805e9d80, ec=0x5560d911d4b8)
at vm_eval.c:86
#12 vm_call0_cfunc (argv=0x7fff805e9ea8, cc=0x7fff805e9da0, ci=0x7fff805e9d70,
calling=0x7fff805e9d80, ec=0x5560d911d4b8) at vm_eval.c:100
#13 vm_call0_body (argv=0x7fff805e9ea8, cc=0x7fff805e9da0, ci=0x7fff805e9d70,
calling=0x7fff805e9d80, ec=0x5560d911d4b8) at vm_eval.c:131
#14 rb_vm_call0 (ec=0x5560d911d4b8, recv=, id=id@entry=3345,
argc=argc@entry=1, argv=argv@entry=0x7fff805e9ea8, me=)
at vm_eval.c:60
#15 0x00007fc4df2ff01e in rb_check_funcall_default (recv=,
mid=mid@entry=3345, argc=argc@entry=1, argv=argv@entry=0x7fff805e9ea8,
def=def@entry=52) at vm_eval.c:426
#16 0x00007fc4df2ff05b in rb_check_funcall (recv=, mid=mid@entry=3345,
argc=argc@entry=1, argv=argv@entry=0x7fff805e9ea8) at vm_eval.c:404
#17 0x00007fc4df1638a0 in make_exception (isstr=1, argv=0x7fff805e9ea0,
argv@entry=0x7fff805e9e50, argc=argc@entry=2) at eval.c:787
#18 rb_make_exception (argc=argc@entry=2, argv=argv@entry=0x7fff805e9ea0)
at eval.c:829
#19 0x00007fc4df2bfe0d in rb_threadptr_raise (argv=0x7fff805e9ea0, argc=2,
target_th=0x5560d911d030) at thread.c:2271
#20 rb_threadptr_signal_raise (th=, sig=)
at thread.c:2293
#21 0x00007fc4df27ed85 in rb_signal_exec (th=th@entry=0x5560d911d030,
sig=) at signal.c:1146
#22 0x00007fc4df2c1aea in rb_threadptr_execute_interrupts (th=,
blocking_timing=) at thread.c:2194
#23 0x00007fc4da23ca85 in readline_getc (input=0x5560e00773d0) at readline.c:234
—Type to continue, or q to quit—
#24 0x00007fc4da01b86e in rl_read_key () from /lib/x86_64-linux-gnu/libreadline.so.7
#25 0x00007fc4da0029d4 in ?? () from /lib/x86_64-linux-gnu/libreadline.so.7
#26 0x00007fc4da003395 in _rl_dispatch_subseq ()
from /lib/x86_64-linux-gnu/libreadline.so.7
#27 0x00007fc4da003786 in readline_internal_char ()
from /lib/x86_64-linux-gnu/libreadline.so.7
#28 0x00007fc4da003f25 in readline () from /lib/x86_64-linux-gnu/libreadline.so.7
#29 0x00007fc4df1647a4 in rb_protect (proc=proc@entry=0x7fc4da23c9a0 <readline_get>,
data=data@entry=93874582811696, pstate=pstate@entry=0x7fff805ea32c) at eval.c:1034
#30 0x00007fc4da23cc92 in readline_readline (argc=,
argv=, self=) at readline.c:509
#31 0x00007fc4df2f57ea in vm_call_cfunc_with_frame (ci=0x5560d9ec0430,
cc=, calling=, reg_cfp=0x7fc4df834910,
ec=0x5560d911d4b8) at vm_insnhelper.c:1908
#32 vm_call_cfunc (ec=0x5560d911d4b8, reg_cfp=0x7fc4df834910,
calling=, ci=0x5560d9ec0430, cc=)
at vm_insnhelper.c:1924
#33 0x00007fc4df2ff242 in vm_exec_core (ec=ec@entry=0x5560d911d4b8,
initial=initial@entry=0) at insns.def:765
#34 0x00007fc4df305bff in rb_vm_exec (ec=ec@entry=0x5560d911d4b8,
mjit_enable_p=mjit_enable_p@entry=1) at vm.c:1885
#35 0x00007fc4df30c951 in invoke_block (captured=,
opt_pc=, type=, cref=0x0, self=,
iseq=, ec=0x5560d911d4b8) at vm.c:1021
#36 invoke_iseq_block_from_c (me=0x0, is_lambda=, cref=0x0,
passed_block_handler=0, argv=0x0, argc=0, self=,
captured=, ec=0x5560d911d4b8) at vm.c:1092
#37 invoke_block_from_c_bh (argc=, argv=,
passed_block_handler=, cref=,
is_lambda=, force_blockarg=,
block_handler=, ec=) at vm.c:1110
#38 vm_yield (argc=0, argv=0x0, ec=0x5560d911d4b8) at vm.c:1155
#39 rb_yield_0 (argv=0x0, argc=0) at vm_eval.c:978
#40 loop_i () at vm_eval.c:1057
#41 0x00007fc4df164402 in rb_rescue2 (b_proc=0x7fc4df30c680 <loop_i>, data1=0,
r_proc=0x7fc4df2f0c40 <loop_stop>, data2=0) at eval.c:935
#42 0x00007fc4df2f57ea in vm_call_cfunc_with_frame (ci=0x5560d9de6000,
cc=, calling=, reg_cfp=0x7fc4df834bb0,
ec=0x5560d911d4b8) at vm_insnhelper.c:1908
#43 vm_call_cfunc (ec=0x5560d911d4b8, reg_cfp=0x7fc4df834bb0,
calling=, ci=0x5560d9de6000, cc=)
at vm_insnhelper.c:1924
#44 0x00007fc4df308b63 in vm_call_method (ec=0x5560d911d4b8, cfp=0x7fc4df834bb0,
calling=, ci=, cc=)
at vm_insnhelper.c:2400
#45 0x00007fc4df2ff304 in vm_exec_core (ec=ec@entry=0x5560d911d4b8,
initial=initial@entry=0) at insns.def:750
#46 0x00007fc4df305bff in rb_vm_exec (ec=ec@entry=0x5560d911d4b8,
mjit_enable_p=mjit_enable_p@entry=1) at vm.c:1885
#47 0x00007fc4df30d267 in invoke_block (captured=,
opt_pc=, type=, cref=0x0, self=93874582743200,
iseq=, ec=0x5560d911d4b8) at vm.c:1021
—Type to continue, or q to quit—
#48 invoke_iseq_block_from_c (me=0x0, is_lambda=0, cref=0x0, passed_block_handler=0,
argv=0x7fff805eadc8, argc=1, self=93874582743200, captured=,
ec=0x5560d911d4b8) at vm.c:1092
#49 invoke_block_from_c_bh (argc=,
passed_block_handler=, cref=, is_lambda=0,
force_blockarg=, argv=,
block_handler=, ec=) at vm.c:1110
#50 vm_yield (argc=1, argv=0x7fff805eadc8, ec=0x5560d911d4b8) at vm.c:1155
#51 rb_yield_0 (argv=0x7fff805eadc8, argc=1) at vm_eval.c:978
#52 catch_i (tag=, data=) at vm_eval.c:1900
#53 0x00007fc4df2f97f1 in vm_catch_protect (tag=,
func=0x7fc4df30cfb0 <catch_i>, data=0, stateptr=stateptr@entry=0x7fff805eaf94,
ec=ec@entry=0x5560d911d4b8) at vm_eval.c:1982
#54 0x00007fc4df2f993c in rb_catch_obj (t=, func=,
data=) at vm_eval.c:2008
#55 0x00007fc4df2f57ea in vm_call_cfunc_with_frame (ci=0x5560d9e1b060,
cc=, calling=, reg_cfp=0x7fc4df834c20,
ec=0x5560d911d4b8) at vm_insnhelper.c:1908
#56 vm_call_cfunc (ec=0x5560d911d4b8, reg_cfp=0x7fc4df834c20,
calling=, ci=0x5560d9e1b060, cc=)
at vm_insnhelper.c:1924
#57 0x00007fc4df308b63 in vm_call_method (ec=0x5560d911d4b8, cfp=0x7fc4df834c20,
calling=, ci=, cc=)
at vm_insnhelper.c:2400
#58 0x00007fc4df2ff304 in vm_exec_core (ec=ec@entry=0x5560d911d4b8,
initial=initial@entry=0) at insns.def:750
#59 0x00007fc4df305bff in rb_vm_exec (ec=ec@entry=0x5560d911d4b8,
mjit_enable_p=mjit_enable_p@entry=1) at vm.c:1885
#60 0x00007fc4df30d267 in invoke_block (captured=,
opt_pc=, type=, cref=0x0, self=93874582692200,
iseq=, ec=0x5560d911d4b8) at vm.c:1021
#61 invoke_iseq_block_from_c (me=0x0, is_lambda=0, cref=0x0, passed_block_handler=0,
argv=0x7fff805eb3e8, argc=1, self=93874582692200, captured=,
ec=0x5560d911d4b8) at vm.c:1092
#62 invoke_block_from_c_bh (argc=,
passed_block_handler=, cref=, is_lambda=0,
force_blockarg=, argv=,
block_handler=, ec=) at vm.c:1110
#63 vm_yield (argc=1, argv=0x7fff805eb3e8, ec=0x5560d911d4b8) at vm.c:1155
#64 rb_yield_0 (argv=0x7fff805eb3e8, argc=1) at vm_eval.c:978
#65 catch_i (tag=, data=) at vm_eval.c:1900
#66 0x00007fc4df2f97f1 in vm_catch_protect (tag=,
func=0x7fc4df30cfb0 <catch_i>, data=0, stateptr=stateptr@entry=0x7fff805eb5b4,
ec=ec@entry=0x5560d911d4b8) at vm_eval.c:1982
#67 0x00007fc4df2f993c in rb_catch_obj (t=, func=,
data=) at vm_eval.c:2008
#68 0x00007fc4df2f57ea in vm_call_cfunc_with_frame (ci=0x5560d9d24530,
cc=, calling=, reg_cfp=0x7fc4df834d00,
ec=0x5560d911d4b8) at vm_insnhelper.c:1908
#69 vm_call_cfunc (ec=0x5560d911d4b8, reg_cfp=0x7fc4df834d00,
calling=, ci=0x5560d9d24530, cc=)
at vm_insnhelper.c:1924
—Type to continue, or q to quit—
#70 0x00007fc4df308b63 in vm_call_method (ec=0x5560d911d4b8, cfp=0x7fc4df834d00,
calling=, ci=, cc=)
at vm_insnhelper.c:2400
#71 0x00007fc4df2ff304 in vm_exec_core (ec=0x5560d911d4b8, initial=initial@entry=0)
at insns.def:750
#72 0x00007fc4df30621f in rb_vm_exec (ec=0x5560d911d4b8,
mjit_enable_p=mjit_enable_p@entry=1) at vm.c:1894
#73 0x00007fc4df309f59 in rb_iseq_eval (iseq=iseq@entry=0x5560d9afac28) at vm.c:2133
#74 0x00007fc4df1b51e7 in rb_load_internal0 (ec=ec@entry=0x5560d911d4b8,
fname=fname@entry=93874457393520, wrap=wrap@entry=0) at load.c:612
#75 0x00007fc4df1b8a51 in rb_require_internal (fname=,
fname@entry=93874457393680, safe=0) at load.c:1029
#76 0x00007fc4df1b8c99 in rb_require_safe (fname=93874457393680, safe=)
at load.c:1075
#77 0x00007fc4df2f57ea in vm_call_cfunc_with_frame (ci=0x5560d994ab40,
cc=, calling=, reg_cfp=0x7fc4df834fa0,
ec=0x5560d911d4b8) at vm_insnhelper.c:1908
#78 vm_call_cfunc (ec=0x5560d911d4b8, reg_cfp=0x7fc4df834fa0,
calling=, ci=0x5560d994ab40, cc=)
at vm_insnhelper.c:1924
#79 0x00007fc4df308b63 in vm_call_method (ec=0x5560d911d4b8, cfp=0x7fc4df834fa0,
calling=, ci=, cc=)
at vm_insnhelper.c:2400
#80 0x00007fc4df2ff242 in vm_exec_core (ec=ec@entry=0x5560d911d4b8,
initial=initial@entry=0) at insns.def:765
#81 0x00007fc4df305bff in rb_vm_exec (ec=0x5560d911d4b8,
mjit_enable_p=mjit_enable_p@entry=1) at vm.c:1885
#82 0x00007fc4df30a080 in rb_iseq_eval_main (iseq=iseq@entry=0x5560d954cbf0)
at vm.c:2144
#83 0x00007fc4df160ea6 in ruby_exec_internal (n=0x5560d954cbf0) at eval.c:262
#84 0x00007fc4df16331d in ruby_exec_node (n=, n@entry=0x5560d954cbf0)
at eval.c:326
#85 0x00007fc4df1672ae in ruby_run_node (n=0x5560d954cbf0) at eval.c:318
#86 0x00005560d747694b in main (argc=, argv=)
at ./main.c:42

After sometime found the problem.

One of the c libraries was using an thread unsafe version, after forcing it to use thread safe version, the problem disappeared.