unbound-1.19.0 alloc_reg_obtain() core dumps

Sami Kerola kerolasa at iki.fi
Fri Feb 9 12:18:02 UTC 2024


Hello again,

Crashes keep on happening.  Instead of reporting the same backtraces here
are some that I think I have not reported yet.  Different debugging attempts
are marked with with '== <date> ==' header so that it is easy to tell which
backtraces and value print outs are related to each other.

== 2024-02-09t03-11-37 ==

Program terminated with signal SIGSEGV, Segmentation fault.

(gdb) bt -full
#0  0x00005564b581c4f9 in infra_lookup_nottl (infra=0x5564b7646663,
infra at entry=0x5564b7bd9230, addr=0x0, addr at entry=0x5564b8042518,
addrlen=0, addrlen at entry=16, name=0x0, name at entry=0x5564b844b0d8 "rg",
namelen=namelen at entry=11, wr=-1207894016, wr at entry=1)
    at services/cache/infra.c:383
        k = {addr = {ss_family = 2,
            __ss_padding =
"\0005\307\004\2125\000\000\000\000\000\000\000\000\003\000\000\000\000\000\000\000P5/\273dU\000\000\200\262J\270dU\000\0000\262J\270dU\000\0000\262J\270dU\000\000&\310\203\265dU\000\000\240\210\241\226\374\177\000\000\360\323J\270dU\000\000\030\351g\347\353\177\000\000\300\210\241\226\374\177\000\000\020\000\000\0000\000\000\000\220\211\241\226\374\177\000\000\320\210\241\226\374\177\000",
__ss_align = 0}, addrlen = 16, zonename = 0x5564b844b0d8 "rg", namelen
= 11, entry = {lock = {__data = {__readers = 0, __writers = 0,
                __wrphase_futex = 16, __writers_futex = 48, __pad3 =
2527168976, __pad4 = 32764, __cur_writer = -1767798512, __shared =
32764, __rwelision = 87 'W', __pad1 = "\331\t\350\353\177\000", __pad2
= 93891074126344, __flags = 3045382490},
              __size =
"\000\000\000\000\000\000\000\000\020\000\000\0000\000\000\000\320\211\241\226\374\177\000\000\020\211\241\226\374\177\000\000W\331\t\350\353\177\000\000\b\362\036\270dU\000\000Z\331\204\265\001\000\000",
__align = 0}, overflow_next = 0xe3,
            lru_next = 0xe7f29b0d76027000, lru_prev = 0x3000000010,
hash = 2527169040, key = 0x7ffc96a18950, data = 0x0}}
#1  0x00005564b581ca2b in infra_rtt_update (infra=0x5564b7bd9230,
addr=0x5564b8042518, addrlen=16, nm=0x5564b844b0d8 "rg", nmlen=11,
qtype=1, roundtrip=-1, orig_rtt=12000, timenow=1707448279) at
services/cache/infra.c:579
        e = <optimized out>
        data = <optimized out>
        needtoinsert = <optimized out>
        expired = <optimized out>
        rto = <optimized out>
        oldprobedelay = <optimized out>
#2  0x00005564b58b3dea in serviced_udp_callback (c=0x5564bb2aa9e0,
arg=0x5564b80424c0, error=error at entry=-2, rep=rep at entry=0x0) at
services/outside_network.c:3270
        sq = 0x5564b80424c0
        outnet = 0x5564baa5abe0
        now = <optimized out>
        p = <optimized out>
#3  0x00005564b58afcf6 in pending_udp_timer_cb (arg=0x5564b81b36a0) at
services/outside_network.c:1592
        p = 0x5564b81b36a0
        outnet = 0x5564baa5abe0
        __func__ = "pending_udp_timer_cb"
#4  0x00007febe8094482 in ?? ()
No symbol table info available.
#5  0x00005564ba902cf0 in ?? ()
No symbol table info available.
...
#15 0x0000000000000000 in ?? ()
No symbol table info available.

(gdb) print *addr at entry
$1 = {ss_family = 2, __ss_padding = "\0005\307\004\2125", '\000'
<repeats 15 times>, "S", '\000' <repeats 95 times>, __ss_align = 0}
(gdb) print *addr
Cannot access memory at address 0x0
(gdb) print *infra
Cannot access memory at address 0x5564b7646663
(gdb) print k
$2 = {addr = {ss_family = 2,
    __ss_padding =
"\0005\307\004\2125\000\000\000\000\000\000\000\000\003\000\000\000\000\000\000\000P5/\273dU\000\000\200\262J\270dU\000\0000\262J\270dU\000\0000\262J\270dU\000\000&\310\203\265dU\000\000\240\210\241\226\374\177\000\000\360\323J\270dU\000\000\030\351g\347\353\177\000\000\300\210\241\226\374\177\000\000\020\000\000\0000\000\000\000\220\211\241\226\374\177\000\000\320\210\241\226\374\177\000",
__ss_align = 0}, addrlen = 16, zonename = 0x5564b844b0d8 "rg", namelen
= 11, entry = {lock = {__data = {__readers = 0, __writers = 0,
__wrphase_futex = 16,
        __writers_futex = 48, __pad3 = 2527168976, __pad4 = 32764,
__cur_writer = -1767798512, __shared = 32764, __rwelision = 87 'W',
__pad1 = "\331\t\350\353\177\000", __pad2 = 93891074126344, __flags =
3045382490},
      __size = "\000\000\000\000\000\000\000\000\020\000\000\0000\000\000\000\320\211\241\226\374\177\000\000\020\211\241\226\374\177\000\000W\331\t\350\353\177\000\000\b\362\036\270dU\000\000Z\331\204\265\001\000\000",
__align = 0}, overflow_next = 0xe3,
    lru_next = 0xe7f29b0d76027000, lru_prev = 0x3000000010, hash =
2527169040, key = 0x7ffc96a18950, data = 0x0}}
(gdb) print wr
$3 = -1207894016
(gdb) print *wr
Cannot access memory at address 0xffffffffb8010000

It looks like 'addr' became null while infra_lookup_nottl() was running.

== 2024-02-08t02-10-49 ==

Program terminated with signal SIGSEGV, Segmentation fault.

(gdb) bt -full
#0  rbtree_find_less_equal (rbtree=0x56182f18b550, key=0x7ffc99f47c90,
result=0x7ffc99f47c88) at util/rbtree.c:527
        r = <optimized out>
        node = 0x0
        __func__ = "rbtree_find_less_equal"
#1  0x000056182a24b3f1 in rbtree_search (key=0x7ffc99f47c90,
rbtree=<optimized out>) at util/rbtree.c:285
        node = 0x0
        node = <optimized out>
#2  mesh_area_find (mesh=<optimized out>, cinfo=<optimized out>,
qinfo=<optimized out>, qflags=<optimized out>, prime=<optimized out>,
valrec=<optimized out>) at services/mesh.c:1592
        key = {node = {parent = 0x56182bb0dca8, left = 0x0, right =
0x0, key = 0x7ffc99f47c90, color = 0 '\000'}, run_node = {parent =
0x0, left = 0x56182e79c42c, right = 0x23, key = 0x100000003, color = 0
'\000'}, s = {qinfo = {qname = 0x7ffc99f48050
"\001e\froot-servers\003net",
              qname_len = 20, qtype = 1, qclass = 1, local_alias =
0x0}, query_flags = 256, is_priming = 0, is_valrec = 0, reply = 0x0,
return_msg = 0x0, return_rcode = 0, reply_origin = 0x7ffc99f482f0,
blacklist = 0x56182bae22d0, region = 0x4, errinf = 0x0, curmod = 3,
ext_state = {
              module_state_initial, module_state_initial,
module_state_initial, 707042248, 22040, 790017248, 22040,
module_restart_next, module_state_initial, 2582937080, 32764,
1883532271, 32511, 4294967295, module_state_initial, 779732000}, minfo
= {0x0, 0x7ffc99f47ff0,
              0x7ffc99f481c0, 0x45, 0x43, 0x7ffc99f480f0,
0x7ffc99f480f0, 0x7eff70449afd, 0x56182e79c3f0, 0x56182f16b4e0,
0x60a03274372, 0x0, 0xffffffff00000000, 0xa0289dd641a06f00, 0x0,
0x7ffc99f47ff0}, env = 0x56182a35ccc8
<dnstap.message.field_descriptors.lto_priv+936>,
            mesh_info = 0x84, prefetch_leeway = 140722891423984,
serve_expired_data = 0x7ffc99f480f0, edns_opts_front_in =
0x7ffc99f480f0, edns_opts_back_out = 0x7eff70448ab3, edns_opts_back_in
= 0xe, edns_opts_front_out = 0x81, no_cache_lookup = 0, no_cache_store
= 0,
            need_refetch = -1712030080, was_ratelimited = 32764,
qstarttime = 0, is_cachedb_answer = 0, client_info = 0x0,
respip_action_info = 0x7eff70446e64, rpz_passthru = 0, tcp_required =
0, is_drop = 1883532271}, reply_list = 0x0, cb_list = 0x7ffc99f47ef0,
super_set = {
            root = 0x0, count = 139635549566676, cmp =
0x56002bd196d0}, sub_set = {root = 0xa0289dd641a06f00, count = 53, cmp
= 0x56182a25abb5 <ipstrtoaddr+213>}, num_activated = 140722891423872,
prev = 0x7eff704499bd, next = 0x0, list_select = (unknown:
0x99f47ef0), unique = 0x0,
          replies_sent = 92 '\\'}
        result = <optimized out>
#3  0x000056182a2df54f in mesh_new_callback.constprop.0
(mesh=0x56182f18b520, qinfo=qinfo at entry=0x7ffc99f47fb0,
edns=edns at entry=0x7ffc99f48250, buf=buf at entry=0x56182f19bdd0,
cb=cb at entry=0x56182a2881f0 <auth_xfer_probe_lookup_callback>,
cb_arg=cb_arg at entry=0x56182bd26350,
    rpz_passthru=0, qid=0, qflags=256) at services/mesh.c:559
        s = 0x0
        unique = 0
        timeout = 0
        was_detached = 0
        was_noreply = 0
        added = 0
#4  0x000056182a287683 in xfr_probe_lookup_host (env=0x56182c1b3518,
xfr=0x56182bd26350) at services/authzone.c:6630
        and so on

(gdb) print *node
Cannot access memory at address 0x0

Red-black tree node became null while it was examined.

== 2024-02-08t04-40-48 ==

Program terminated with signal SIGSEGV, Segmentation fault.

(gdb) bt
#0  0x000055da1457abeb in mesh_state_compare (ap=0x55da15570c00,
bp=0x0) at services/mesh.c:144
#1  0x000055da1459195a in rbtree_find_less_equal
(rbtree=rbtree at entry=0x55da183c5058, key=0x55da15570c00,
result=result at entry=0x7ffea92b1638) at util/rbtree.c:527
#2  0x000055da145919e7 in rbtree_search (key=<optimized out>,
rbtree=0x55da183c5058) at util/rbtree.c:285
#3  rbtree_delete (rbtree=0x55da183c5058, key=<optimized out>) at
util/rbtree.c:333
#4  0x000055da1462439c in mesh_state_delete.part.0.isra.0
(qstate=<optimized out>) at services/mesh.c:984
#5  0x000055da145808c2 in mesh_state_delete (qstate=0x55da15570c50) at
services/mesh.c:952
#6  mesh_continue (ev=<synthetic pointer>, s=<optimized out>,
mstate=0x55da15570c00, mesh=0x55da183c5040) at services/mesh.c:1843
#7  mesh_run (mesh=0x55da183c5040, mstate=0x55da15570c00,
ev=<optimized out>, e=0x0) at services/mesh.c:1874
#8  0x000055da14549f13 in mesh_report_reply (what=0,
reply=0x7ffea92b1a40, e=0x55da155749a0, mesh=<optimized out>) at
services/mesh.c:816
#9  worker_handle_service_reply (c=0x55da1837fa30, arg=0x55da155749a0,
error=0, reply_info=0x7ffea92b1a40) at daemon/worker.c:268
#10 0x000055da145f7069 in serviced_callbacks
(sq=sq at entry=0x55da183f6a60, error=error at entry=0,
c=c at entry=0x55da1837fa30, rep=rep at entry=0x7ffea92b1a40) at
services/outside_network.c:3044
#11 0x000055da145f7e30 in serviced_udp_callback (c=0x55da1837fa30,
arg=0x55da183f6a60, error=error at entry=0, rep=rep at entry=0x7ffea92b1a40)
at services/outside_network.c:3384
#12 0x000055da145f3b7c in outnet_udp_cb (c=<optimized out>,
arg=0x55da17b2c6d0, error=<optimized out>, reply_info=0x7ffea92b1a40)
at services/outside_network.c:1537
#13 0x000055da145eba58 in comm_point_udp_callback (fd=236,
event=<optimized out>, arg=<optimized out>) at util/netevent.c:1145
#14 0x00007f6cf021d585 in ?? ()
#15 0x000055da179d4790 in ?? ()
#16 0x000055da179d4920 in ?? ()
#17 0x000000000013ce8c in ?? ()
#18 0x0000000025b244d4 in ?? ()
#19 0x000000000013cef5 in ?? ()
#20 0x33eddbe1ed004d00 in ?? ()
#21 0x000000000013ce7d in ?? ()
#22 0x000055da179d44d0 in ?? ()
#23 0x000055da179d4628 in ?? ()
#24 0x33eddbe1ed004d00 in ?? ()
#25 0x0000000000000000 in ?? ()

(gdb) print *a
$1 = {node = {parent = 0x55da155b0d00, left = 0x55da14696680
<rbtree_null_node>, right = 0x55da14696680 <rbtree_null_node>, key =
0x55da15570c00, color = 1 '\001'}, run_node = {parent = 0x55da14696680
<rbtree_null_node>, left = 0x55da14696680 <rbtree_null_node>,
    right = 0x55da14696680 <rbtree_null_node>, key = 0x55da15570c00,
color = 0 '\000'}, s = {qinfo = {qname = 0x55da15570e70
"\froot-servers\003net", qname_len = 18, qtype = 43, qclass = 1,
local_alias = 0x0}, query_flags = 272, is_priming = 0, is_valrec = 1,
reply = 0x0,
    return_msg = 0x55da155749c0, return_rcode = 0, reply_origin =
0x55da155d52c8, blacklist = 0x0, region = 0x55da15570bc0, errinf =
0x0, curmod = 0, ext_state = {module_finished, module_finished,
module_state_initial <repeats 14 times>}, minfo = {0x0,
0x55da15570eb8,
      0x0 <repeats 14 times>}, env = 0x55da153ecf48, mesh_info =
0x55da15570c00, prefetch_leeway = 0, serve_expired_data = 0x0,
edns_opts_front_in = 0x0, edns_opts_back_out = 0x0, edns_opts_back_in
= 0x0, edns_opts_front_out = 0x0, no_cache_lookup = 0, no_cache_store
= 0,
    need_refetch = 0, was_ratelimited = 0, qstarttime = 1707367230,
is_cachedb_answer = 0, client_info = 0x0, respip_action_info = 0x0,
rpz_passthru = 0, tcp_required = 0, is_drop = 0}, reply_list = 0x0,
cb_list = 0x0, super_set = {root = 0x55da15570e88, count = 1,
    cmp = 0x55da1457adf0 <mesh_state_ref_compare>}, sub_set = {root =
0x55da14696680 <rbtree_null_node>, count = 0, cmp = 0x55da1457adf0
<mesh_state_ref_compare>}, num_activated = 4, prev = 0x0, next = 0x0,
list_select = mesh_no_list, unique = 0x0, replies_sent = 1 '\001'}
(gdb) print *b
Cannot access memory at address 0x0

(gdb) frame 1
#1  0x000055da1459195a in rbtree_find_less_equal
(rbtree=rbtree at entry=0x55da183c5058, key=0x55da15570c00,
result=result at entry=0x7ffea92b1638) at util/rbtree.c:527
527     util/rbtree.c: No such file or directory.
(gdb) print *node
$2 = {parent = 0x0, left = 0x0, right = 0x0, key = 0x0, color = 0 '\000'}

Got a bit further with red-black tree handling, but ultimately failed due
null key.

== 2024-02-09t00-07-58 ==

Program terminated with signal SIGSEGV, Segmentation fault.

(gdb) bt -full
#0  0x000055ec98cde3ef in new_entry (infra=infra at entry=0x55ec9a467230,
addr=addr at entry=0x55ec9a950958, addrlen=0, addrlen at entry=16, name=0x0,
name at entry=0x55ec9ad6ee88 "consul\005cfops\003net",
namelen=namelen at entry=11, tm=tm at entry=1707437259) at
services/cache/infra.c:440
        data = 0x550000000000
        key = 0x0
#1  0x000055ec98cdec79 in infra_rtt_update (infra=0x55ec9a467230,
addr=0x55ec9a950958, addrlen=16, nm=0x55ec9ad6ee88
"consul\005cfops\003net", nmlen=11, qtype=1, roundtrip=-1,
orig_rtt=12000, timenow=1707437259) at services/cache/infra.c:586
        e = <optimized out>
        data = <optimized out>
        needtoinsert = 0
        expired = 0
        rto = 1
        oldprobedelay = 0
#2  0x000055ec98d75dea in serviced_udp_callback (c=0x55ec9db37580,
arg=0x55ec9a950900, error=error at entry=-2, rep=rep at entry=0x0) at
services/outside_network.c:3270
        sq = 0x55ec9a950900
        outnet = 0x55ec9d2e6840
        now = <optimized out>
        p = <optimized out>
#3  0x000055ec98d71cf6 in pending_udp_timer_cb (arg=0x55ec9a7eb900) at
services/outside_network.c:1592
        p = 0x55ec9a7eb900
        outnet = 0x55ec9d2e6840
        __func__ = "pending_udp_timer_cb"
#4  0x00007f2462df7482 in ?? ()
No symbol table info available.
...
#15 0x0000000000000000 in ?? ()
No symbol table info available.

Trying to 'key->addrlen = addrlen' in to null key, which is really strange
point to fail because three earlier lines of code were key->value
assignments and they worked.

== 2024-02-04t08-56-00 ==

Program terminated with signal SIGABRT, Aborted.

(gdb) bt -full
#0  0x00007f01a06a9e2c in __pthread_mutex_cond_lock_full
(mutex=0x130fd1) at ../nptl/pthread_mutex_lock.c:210
        assume_other_futex_waiters = <optimized out>
        oldval = <optimized out>
        id = -1507605342
        __PRETTY_FUNCTION__ = "__pthread_mutex_cond_lock_full"
#1  0x0000555873f8f97e in iter_handle (qstate=<optimized out>,
iq=0x555874edec28, ie=0x55587489c710, id=1) at
iterator/iterator.c:4031
        cont = 1
#2  0x00007f01a07bb138 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#3  0x00007fff00000018 in ?? ()
No symbol table info available.
#4  0x00007fff288d2de0 in ?? ()
No symbol table info available.
#5  0x00007fff288d2d70 in ?? ()
No symbol table info available.
#6  0xa623c4a208356200 in ?? ()
No symbol table info available.
#7  0x0000003000000010 in ?? ()
No symbol table info available.
#8  0x00007fff288d2e50 in ?? ()
No symbol table info available.
#9  0x00007f01a07bb138 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#10 0x000007f01a07f242 in ?? ()
No symbol table info available.
#11 0x000055587400d0b0 in reuse_cmp (key1=0x7f01a069e24e
<__libc_message+366>, key2=0x7f01a07bb138) at
services/outside_network.c:175
        r = <optimized out>
#12 0x00007f01a07f2440 in main_arena () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#13 0x0000000000000000 in ?? ()
No symbol table info available.

Was the pthread destroyed before unlocking was attempted?

== 2024-02-03t16-54-26 ==

Program terminated with signal SIGSEGV, Segmentation fault.

(gdb) bt -full
#0  0x00007ff192d717db in __memcpy_avx_unaligned_erms () at
../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:557
No locals.
#1  0x0000564423d53032 in regional_alloc_init (size=5,
init=0x5644253bfe58, r=0x7ff192df23f0 <main_arena+1936>) at
util/regional.c:189
        s = <optimized out>
        s = <optimized out>
#2  serviced_create (region=0x7ff192df23f0 <main_arena+1936>,
alloc=0x564424d56ca0, pad_queries_block_size=0, opt_list=<optimized
out>, qtype=1, zonelen=5, zone=0x5644253bfe58 "\003net", addrlen=16,
addr=0x7ffe944f2db0, tls_auth_name=0x0, ssl_upstream=0,
tcp_upstream=0, nocaps=0, want_dnssec=1, dnssec=32784, buff=<optimized
out>, outnet=0x56442792f2f0)
    at services/outside_network.c:2628
        sq = 0x5644251363c0
        t = {tv_sec = 16, tv_usec = 94850680552456}
        sq = <optimized out>
        t = <optimized out>
#3  outnet_serviced_query (callback=<optimized out>,
was_ratelimited=0x7ffe944f2da8, env=0x5644251efc08, buff=<optimized
out>, callback_arg=0x5644253c3600, qstate=0x5644253bfa40, zonelen=5,
zone=0x5644253bfe58 "\003net", addrlen=16, addr=0x7ffe944f2db0,
tls_auth_name=0x0, ssl_upstream=0, tcp_upstream=0, check_ratelimit=1,
nocaps=0, want_dnssec=1, dnssec=32784,
    flags=<optimized out>, qinfo=<optimized out>, outnet=<optimized
out>) at services/outside_network.c:3466
        sq = <optimized out>
        region = 0x7ff192df23f0 <main_arena+1936>
        timenow = 1706979247
        cb = <optimized out>
        client_string_addr = <optimized out>
        backed_up_opt_list = <optimized out>
        per_upstream_opt_list = 0x0
        sq = <optimized out>
        cb = <optimized out>
        client_string_addr = <optimized out>
        region = <optimized out>
        backed_up_opt_list = <optimized out>
        per_upstream_opt_list = <optimized out>
        timenow = <optimized out>
#4  worker_send_query (qinfo=<optimized out>, flags=<optimized out>,
dnssec=32784, want_dnssec=1, nocaps=0, check_ratelimit=1,
addr=0x7ffe944f2db0, addrlen=16, zone=0x5644253bfe58 "\003net",
zonelen=5, tcp_upstream=0, ssl_upstream=0, tls_auth_name=0x0,
q=0x5644253bfa40, was_ratelimited=0x7ffe944f2da8) at
daemon/worker.c:2395
        etc etc...

How is it even possible to get memory alignment issue?

-- snip

As mentioned earlier these are coming from systems that have connectivity
problems with configuration to keep items in cache.  Unfortunately I have
not examined unbound code base to the point where I could say if failing
requests can cause entries to be removed from cache.

Please let me know if there is anything I can do to make debugging easier.

-- 
Sami Kerola
https://kerolasa.iki.fi/


More information about the Unbound-users mailing list