Nginx crashing w/ core dump under moderate load

I’m experiencing a problem with Nginx 0.8.53 crashing under moderate
load. It’s configured as a proxy with ssi enabled. Config file,
backtrace, et al are below (the only modifications are hostnames to
protect the innocent). I don’t have a debug log, but I have a lot of
other useful info.

phil.

##################################################################
2010/11/16 14:55:19 [alert] 7568#0: worker process 7569 exited on signal
11 (core dumped)

####################################################################
[phil@web01 nginx]$ /opt/local/nginx/sbin/nginx -V
nginx version: nginx/0.8.53
built by gcc 4.1.2 20080704 (Red Hat 4.1.2-48)
configure arguments: --prefix=/opt/local/nginx --with-http_realip_module

####################################################################
sudo gdb /opt/local/nginx/sbin/nginx /tmp/core.27113
[sudo] password for phil:
GNU gdb (GDB) Red Hat Enterprise Linux (7.0.1-23.el5_5.2)
Copyright (C) 2009 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later
http://gnu.org/licenses/gpl.html
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type “show
copying”
and “show warranty” for details.
This GDB was configured as “i386-redhat-linux-gnu”.
For bug reporting instructions, please see:
http://www.gnu.org/software/gdb/bugs/
Reading symbols from /opt/local/nginx-0.8.53/sbin/nginx…done.

warning: .dynamic section for “/lib/libc.so.6” is not at the expected
address

warning: difference appears to be caused by prelink, adjusting
expectations
Reading symbols from /lib/libcrypt.so.1…(no debugging symbols
found)…done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libpcre.so.0…(no debugging symbols
found)…done.
Loaded symbols for /lib/libpcre.so.0
Reading symbols from /lib/libcrypto.so.6…(no debugging symbols
found)…done.
Loaded symbols for /lib/libcrypto.so.6
Reading symbols from /usr/lib/libz.so.1…(no debugging symbols
found)…done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libc.so.6…(no debugging symbols
found)…done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libdl.so.2…(no debugging symbols
found)…done.
Loaded symbols for /lib/libdl.so.2
Reading symbols from /lib/ld-linux.so.2…(no debugging symbols
found)…done.
Loaded symbols for /lib/ld-linux.so.2
Reading symbols from /lib/libnss_files.so.2…(no debugging symbols
found)…done.
Loaded symbols for /lib/libnss_files.so.2
Core was generated by `nginx: worker process
'.
Program terminated with signal 11, Segmentation fault.
#0 ngx_rbtree_min (tree=0xb778c040, node=0xb778d380) at
src/core/ngx_rbtree.h:75
75 while (node->left != sentinel) {
(gdb) bt
#0 ngx_rbtree_min (tree=0xb778c040, node=0xb778d380) at
src/core/ngx_rbtree.h:75
#1 ngx_rbtree_delete (tree=0xb778c040, node=0xb778d380) at
src/core/ngx_rbtree.c:178
#2 0x080856e0 in ngx_http_file_cache_free (c=0x85bbe9c, tf=0x0) at
src/http/ngx_http_file_cache.c:941
#3 0x0807be76 in ngx_http_upstream_finalize_request (r=0x85c7fa0,
u=0x85bbc0c, rc=499) at src/http/ngx_http_upstream.c:3002
#4 0x0807e005 in ngx_http_upstream_next (r=0x85c7fa0, u=0x85bbc0c,
ft_type=4) at src/http/ngx_http_upstream.c:2834
#5 0x0807e0fb in ngx_http_upstream_process_header (r=0x85c7fa0,
u=0x85bbc0c) at src/http/ngx_http_upstream.c:1459
#6 0x0807c965 in ngx_http_upstream_handler (ev=0x0) at
src/http/ngx_http_upstream.c:895
#7 0x0805df3f in ngx_event_expire_timers () at
src/event/ngx_event_timer.c:149
#8 0x0805de76 in ngx_process_events_and_timers (cycle=0x85b5e08) at
src/event/ngx_event.c:261
#9 0x08063913 in ngx_worker_process_cycle (cycle=0x85b5e08, data=0x0)
at src/os/unix/ngx_process_cycle.c:795
#10 0x08062201 in ngx_spawn_process (cycle=0x85b5e08, proc=0x806385b
<ngx_worker_process_cycle>, data=0x0,
name=0x80a3a81 “worker process”, respawn=-3) at
src/os/unix/ngx_process.c:196
#11 0x08062f22 in ngx_start_worker_processes (cycle=0x85b5e08, n=1,
type=-3) at src/os/unix/ngx_process_cycle.c:355
#12 0x08063e97 in ngx_master_process_cycle (cycle=0x85b5e08) at
src/os/unix/ngx_process_cycle.c:136
#13 0x0804b4ff in main (argc=3, argv=0xbfd5b294) at src/core/nginx.c:401

Forgot the config file.

phil.

user nobody;
worker_processes 1;

#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;

#pid logs/nginx.pid;

worker_rlimit_nofile 65535;
working_directory /opt/local/nginx/cores/;
worker_rlimit_core 500M;

events {
worker_connections 10240;
}

http {

server_tokens off;

include mime.types;
default_type application/octet-stream;

log_format main '$remote_addr - $remote_user [$time_local]
“$request” ’
'$status $body_bytes_sent “$http_referer” ’
‘“$http_user_agent” “$http_x_forwarded_for”’;

access_log logs/access.log main;

sendfile on;
#tcp_nopush on;

#keepalive_timeout 0;
keepalive_timeout 5;

gzip on;

Only for pre-production testing

include xxx-network.include;

proxy_cache_path /var/www/cache levels=1:2
keys_zone=myhostname-cache:8m max_size=1000m inactive=1m;
proxy_temp_path /var/www/cache/tmp;
proxy_cache_methods GET;

server {
listen 80 default;
server_name www.myhostname.com secure.myhostname.com
www.myhostname.com.xxx.net;

  proxy_cache_use_stale updating;
  proxy_cache_valid 200 1m;
   proxy_cache_key "$scheme://$proxy_host$request_uri $cookie_a 

$cookie_JSESSIONID $cookie_b $cookie_c";
proxy_buffer_size 64k;
proxy_buffers 32 64k;

   charset utf-8;

   access_log  logs/www.myhostname.com.access.log  main;

   # This turns out to be critically important.  Otherwise, it sends 

Nginx in a loop on the homepage
# and it SSIs the homepage into itself over 200 times. This
seems like a bug in Nginx, so be aware
# of it during upgrades.
location /foo {
ssi off;
proxy_pass http://127.0.0.1:8080/foo;
}

   location / {
    ssi on;
    ssi_silent_errors off;
    log_subrequest on;

    ########################################################################################
  # NOTE / IMPORTANT / URGENT / README / ATTENTION / ACHTUNG / 

ATTENZIONE /
########################################################################################
#
# These settings MUST be changed together. If you turn caching
off by commenting out the
# proxy cache directive, you MUST to change X-Caching-Mode to
“Off”. Valid values for this
# header are On/Off. FooCode, foo.jsp, the /foo controller and
the
# esi.tag all rely on this being handled properly.
#

    proxy_set_header X-Caching-Mode On;
    proxy_cache myhostname-cache;

  #
    ########################################################################################


    proxy_pass http://127.0.0.1:8080/;
       root   html;
       index  index.html index.htm;

    # For the ESI-like webhit-recording servlet to know what the 

original URI is.
proxy_set_header X-Original-Request-URI $request_uri;
proxy_set_header X-Original-Request-Method $request_method;

    # First added for spring security to redirect correctly for 

protected pages
proxy_set_header Host $http_host;

    # http://wiki.nginx.org/NginxLikeApache
    proxy_set_header X-Forwarded-Host $host;
    proxy_set_header X-Forwarded-Server $host;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

  # Make things served up by this nginx instance totally 

non-cacheable to the outside world.
# This is complex and has implications if you change it. Discuss
laboriously with your teammates.
expires epoch;

    # Pre-launch stuff.
      # error_page 403 http://x.myhostname.com/;

    satisfy any;

    # For blocking access to everything except the xxx net.
    # include xxx-network.include; <-- this file format needs 

redone.

    # Open up to xxx
  include xxx-network.include;

    # Open up to xxx
  allow xxx;
  allow xxx;

    # Leave these in always - the local net and xxx.
  allow xxx;
  allow xxx;

  # xxx box for load testing
  allow xxx;

    # Otherwise block.
  deny all;

    # Put a password on stuff, per xxx.
  auth_basic "myhostname";
  auth_basic_user_file .htpasswd;

      # redirect server error pages to the static page /50x.html
      #
  error_page   500 502 503 504  /50x.html;
    location = /50x.html {
    root   html;
  }

}

}

myhostname.com - perm redirect to www

server {
server_name myhostname.com;
rewrite ^/(.*) http://www.myhostname.com/$1 permanent;
}

static assets

server {
server_name semi-1.prod.xxx.net xxx.prod.xxx.net;
root /var/www/domains/prod.xxx.net/semi-1/htdocs/;
expires max;

location ~ /.svn/* {
    deny  all;
}

}

}

It turns out that removing the “max_size=1000m inactive=1m” attributes
for the proxy_cache_path directive removes the constant core-dumping
crashiness that we have been experiencing.

Any thoughts as to why this may be the case would be interesting.

phil.

On Nov 18, 2010, at 3:52 PM, Philip Jacob wrote:

http {
access_log logs/access.log main;
include xxx-network.include;
proxy_cache_valid 200 1m;
# of it during upgrades.

########################################################################################

    proxy_cache myhostname-cache;
    proxy_set_header X-Original-Request-URI $request_uri;
  # Make things served up by this nginx instance totally non-cacheable to 

the outside world.

      # redirect server error pages to the static page /50x.html

myhostname.com - perm redirect to www

I’m experiencing a problem with Nginx 0.8.53 crashing under moderate load.
It’s configured as a proxy with ssi enabled. Config file, backtrace, et al are
below (the only modifications are hostnames to protect the innocent). I don’t
have a debug log, but I have a lot of other useful info.
configure arguments: --prefix=/opt/local/nginx --with-http_realip_module
This GDB was configured as “i386-redhat-linux-gnu”.
Loaded symbols for /lib/libpcre.so.0
Reading symbols from /lib/libnss_files.so.2…(no debugging symbols
found)…done.
#4 0x0807e005 in ngx_http_upstream_next (r=0x85c7fa0, u=0x85bbc0c, ft_type=4)
at src/http/ngx_http_upstream.c:2834


Philip Jacob
http://www.whirlycott.com/phil/

Hello!

On Sat, Nov 20, 2010 at 09:26:07AM -0500, Whirlycott wrote:

It turns out that removing the “max_size=1000m inactive=1m”
attributes for the proxy_cache_path directive removes the
constant core-dumping crashiness that we have been experiencing.

Any thoughts as to why this may be the case would be
interesting.

Looks like insufficient locking and inactive=1m is low enough to
trigger problems between cache manager removing entries from cache
and nginx workers updating it.

You may want to check which atomic ops are used in your setup. In
particular,

built by gcc 4.1.2 20080704 (Red Hat 4.1.2-48)

gcc you used to complile nginx looks somewhat old, though it
have builtin atomic ops. If there are bugs in them - you’ll
likely to see similar segfaults (and AFAIR there were problems on
modern CPUs…).

Trivial test is to try recompiling nginx with something like

./configure --with-cc-opt="-DNGX_HAVE_GCC_ATOMIC=0"

and check if it helps.

Maxim D.