From 5a7dd001250da73ce2236f245b6bba9a1663477e Mon Sep 17 00:00:00 2001 From: Christian Grothoff Date: Thu, 2 Dec 2021 08:02:20 +0100 Subject: [PATCH] improve suicide logic --- ...exchange.taler-exchange-aggregator.service | 3 +- ...ler-exchange.taler-exchange-closer.service | 3 +- ...aler-exchange.taler-exchange-httpd.service | 6 +- ...ler-exchange.taler-exchange-httpd@.service | 26 +++ ...aler-exchange.taler-exchange-httpd@.socket | 14 ++ ...change.taler-exchange-secmod-eddsa.service | 3 +- ...exchange.taler-exchange-secmod-rsa.service | 3 +- ...r-exchange.taler-exchange-transfer.service | 3 +- ...-exchange.taler-exchange-wirewatch.service | 3 +- ...exchange.taler-exchange-wirewatch@.service | 16 ++ src/exchange/exchange.conf | 3 + src/exchange/taler-exchange-httpd.c | 148 ++++++++++++------ src/exchange/taler-exchange-httpd.h | 7 + src/exchange/taler-exchange-httpd_keys.c | 83 ++++++++-- 14 files changed, 258 insertions(+), 63 deletions(-) create mode 100644 debian/taler-exchange.taler-exchange-httpd@.service create mode 100644 debian/taler-exchange.taler-exchange-httpd@.socket create mode 100644 debian/taler-exchange.taler-exchange-wirewatch@.service diff --git a/debian/taler-exchange.taler-exchange-aggregator.service b/debian/taler-exchange.taler-exchange-aggregator.service index d0fcbf60c..683c1a81e 100644 --- a/debian/taler-exchange.taler-exchange-aggregator.service +++ b/debian/taler-exchange.taler-exchange-aggregator.service @@ -5,7 +5,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-aggregator Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-aggregator -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-closer.service b/debian/taler-exchange.taler-exchange-closer.service index b2a820d35..01c52b2d4 100644 --- a/debian/taler-exchange.taler-exchange-closer.service +++ b/debian/taler-exchange.taler-exchange-closer.service @@ -5,7 +5,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-closer Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-closer -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-httpd.service b/debian/taler-exchange.taler-exchange-httpd.service index de65e17b6..7db2b1194 100644 --- a/debian/taler-exchange.taler-exchange-httpd.service +++ b/debian/taler-exchange.taler-exchange-httpd.service @@ -8,7 +8,11 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-httpd Type=simple -Restart=on-failure +# Depending on the configuration, the service suicides and then +# needs to be restarted. +Restart=always +# Do not dally on restarts. +RestartSec=1ms ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-httpd@.service b/debian/taler-exchange.taler-exchange-httpd@.service new file mode 100644 index 000000000..4235f72e9 --- /dev/null +++ b/debian/taler-exchange.taler-exchange-httpd@.service @@ -0,0 +1,26 @@ +% This is a systemd service template. +[Unit] +Description=GNU Taler payment system exchange REST API at %I +AssertPathExists=/run/taler/exchange-httpd +Requires=taler-exchange-httpd@%i.socket taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service +After=postgres.service network.target taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service +PartOf=taler-exchange.target + +[Service] +User=taler-exchange-httpd +Type=simple +# Depending on the configuration, the service suicides and then +# needs to be restarted. +Restart=always +# Do not dally on restarts. +RestartSec=1ms +EnvironmentFile=/etc/environment +ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf +StandardOutput=journal +StandardError=journal +PrivateTmp=no +PrivateDevices=yes +ProtectSystem=full + +[Install] +WantedBy=multi-user.target diff --git a/debian/taler-exchange.taler-exchange-httpd@.socket b/debian/taler-exchange.taler-exchange-httpd@.socket new file mode 100644 index 000000000..e1d6b6bd4 --- /dev/null +++ b/debian/taler-exchange.taler-exchange-httpd@.socket @@ -0,0 +1,14 @@ +[Unit] +Description=Taler Exchange Socket at %I +PartOf=taler-exchange-httpd@%i.service + +[Socket] +ListenStream=80 +Accept=no +Service=taler-exchange-httpd@%i.service +SocketUser=taler-exchange-httpd +SocketGroup=www-data +SocketMode=0660 + +[Install] +WantedBy=sockets.target diff --git a/debian/taler-exchange.taler-exchange-secmod-eddsa.service b/debian/taler-exchange.taler-exchange-secmod-eddsa.service index 7406e3c76..e4898581c 100644 --- a/debian/taler-exchange.taler-exchange-secmod-eddsa.service +++ b/debian/taler-exchange.taler-exchange-secmod-eddsa.service @@ -6,7 +6,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-secmod-eddsa Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-secmod-eddsa -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-secmod-rsa.service b/debian/taler-exchange.taler-exchange-secmod-rsa.service index b4fedd1cc..6c5a3d613 100644 --- a/debian/taler-exchange.taler-exchange-secmod-rsa.service +++ b/debian/taler-exchange.taler-exchange-secmod-rsa.service @@ -6,7 +6,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-secmod-rsa Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-secmod-rsa -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-transfer.service b/debian/taler-exchange.taler-exchange-transfer.service index 274b64624..b2615b7c9 100644 --- a/debian/taler-exchange.taler-exchange-transfer.service +++ b/debian/taler-exchange.taler-exchange-transfer.service @@ -6,7 +6,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-wire Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-transfer -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-wirewatch.service b/debian/taler-exchange.taler-exchange-wirewatch.service index 2705ca2b5..54704cb80 100644 --- a/debian/taler-exchange.taler-exchange-wirewatch.service +++ b/debian/taler-exchange.taler-exchange-wirewatch.service @@ -6,7 +6,8 @@ PartOf=taler-exchange.target [Service] User=taler-exchange-wire Type=simple -Restart=on-failure +Restart=always +RestartSec=100ms ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf StandardOutput=journal StandardError=journal diff --git a/debian/taler-exchange.taler-exchange-wirewatch@.service b/debian/taler-exchange.taler-exchange-wirewatch@.service new file mode 100644 index 000000000..54704cb80 --- /dev/null +++ b/debian/taler-exchange.taler-exchange-wirewatch@.service @@ -0,0 +1,16 @@ +[Unit] +Description=GNU Taler payment system exchange wirewatch service +After=network.target +PartOf=taler-exchange.target + +[Service] +User=taler-exchange-wire +Type=simple +Restart=always +RestartSec=100ms +ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf +StandardOutput=journal +StandardError=journal +PrivateTmp=yes +PrivateDevices=yes +ProtectSystem=full diff --git a/src/exchange/exchange.conf b/src/exchange/exchange.conf index 4d353acec..92de5e31c 100644 --- a/src/exchange/exchange.conf +++ b/src/exchange/exchange.conf @@ -40,6 +40,9 @@ PORT = 8081 # transfers to enable tracking. BASE_URL = http://localhost:8081/ +# Maximum number of requests this process should handle before +# committing suicide. +# MAX_REQUESTS = # How long should the aggregator sleep if it has nothing to do? AGGREGATOR_IDLE_SLEEP_INTERVAL = 60 s diff --git a/src/exchange/taler-exchange-httpd.c b/src/exchange/taler-exchange-httpd.c index 64304e9c2..327bcd1e5 100644 --- a/src/exchange/taler-exchange-httpd.c +++ b/src/exchange/taler-exchange-httpd.c @@ -76,6 +76,11 @@ static int allow_address_reuse; */ const struct GNUNET_CONFIGURATION_Handle *TEH_cfg; +/** + * Handle to the HTTP server. + */ +static struct MHD_Daemon *mhd; + /** * Our KYC configuration. */ @@ -122,6 +127,12 @@ static unsigned int connection_timeout = 30; */ static int connection_close; +/** + * True if we should commit suicide once all active + * connections are finished. + */ +bool TEH_suicide; + /** * Value to return from main() */ @@ -137,6 +148,11 @@ static uint16_t serve_port; */ static unsigned long long req_count; +/** + * Counter for the number of open connections. + */ +static unsigned long long active_connections; + /** * Limit for the number of requests this HTTP may process before restarting. * (This was added as one way of dealing with unavoidable memory fragmentation @@ -262,6 +278,45 @@ handle_post_coins (struct TEH_RequestContext *rc, } +/** + * Increments our request counter and checks if this + * process should commit suicide. + */ +static void +check_suicide (void) +{ + int fd; + pid_t chld; + unsigned long long cnt; + + cnt = req_count++; + if (req_max != cnt) + return; + GNUNET_log (GNUNET_ERROR_TYPE_INFO, + "Restarting exchange service after %llu requests\n", + cnt); + /* Stop accepting new connections */ + fd = MHD_quiesce_daemon (mhd); + GNUNET_break (0 == close (fd)); + /* Continue handling existing connections in child, + so that this process can die and be replaced by + systemd with a fresh one */ + chld = fork (); + if (-1 == chld) + { + GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, + "fork"); + _exit (1); + } + if (0 != chld) + { + /* We are the parent, instant-suicide! */ + _exit (0); + } + TEH_suicide = true; +} + + /** * Function called whenever MHD is done with a request. If the * request was a POST, we may have stored a `struct Buffer *` in the @@ -290,6 +345,7 @@ handle_mhd_completion_callback (void *cls, return; GNUNET_async_scope_enter (&rc->async_scope_id, &old_scope); + check_suicide (); TEH_check_invariants (); if (NULL != rc->rh_cleaner) rc->rh_cleaner (rc); @@ -1642,8 +1698,19 @@ connection_done (void *cls, (void) cls; (void) connection; (void) socket_context; - unsigned long long cnt; + switch (toe) + { + case MHD_CONNECTION_NOTIFY_STARTED: + active_connections++; + break; + case MHD_CONNECTION_NOTIFY_CLOSED: + active_connections--; + if (TEH_suicide && + (0 == active_connections) ) + GNUNET_SCHEDULER_shutdown (); + break; + } #if HAVE_DEVELOPER /* We only act if the connection is closed. */ if (MHD_CONNECTION_NOTIFY_CLOSED != toe) @@ -1651,15 +1718,6 @@ connection_done (void *cls, if (NULL != input_filename) GNUNET_SCHEDULER_shutdown (); #endif - cnt = req_count++; - if (req_max == cnt) - { - GNUNET_log (GNUNET_ERROR_TYPE_INFO, - "Restarting exchange service after %llu requests\n", - cnt); - (void) kill (getpid (), - SIGTERM); - } } @@ -1780,46 +1838,42 @@ run (void *cls, GNUNET_SCHEDULER_shutdown (); return; } - { - struct MHD_Daemon *mhd; - - mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME - | MHD_USE_PIPE_FOR_SHUTDOWN - | MHD_USE_DEBUG | MHD_USE_DUAL_STACK - | MHD_USE_TCP_FASTOPEN, - (-1 == fh) ? serve_port : 0, - NULL, NULL, - &handle_mhd_request, NULL, - MHD_OPTION_LISTEN_BACKLOG_SIZE, - (unsigned int) 1024, - MHD_OPTION_LISTEN_SOCKET, - fh, - MHD_OPTION_EXTERNAL_LOGGER, - &TALER_MHD_handle_logs, - NULL, - MHD_OPTION_NOTIFY_COMPLETED, - &handle_mhd_completion_callback, - NULL, - MHD_OPTION_NOTIFY_CONNECTION, - &connection_done, - NULL, - MHD_OPTION_CONNECTION_TIMEOUT, - connection_timeout, - (0 == allow_address_reuse) + mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME + | MHD_USE_PIPE_FOR_SHUTDOWN + | MHD_USE_DEBUG | MHD_USE_DUAL_STACK + | MHD_USE_TCP_FASTOPEN, + (-1 == fh) ? serve_port : 0, + NULL, NULL, + &handle_mhd_request, NULL, + MHD_OPTION_LISTEN_BACKLOG_SIZE, + (unsigned int) 1024, + MHD_OPTION_LISTEN_SOCKET, + fh, + MHD_OPTION_EXTERNAL_LOGGER, + &TALER_MHD_handle_logs, + NULL, + MHD_OPTION_NOTIFY_COMPLETED, + &handle_mhd_completion_callback, + NULL, + MHD_OPTION_NOTIFY_CONNECTION, + &connection_done, + NULL, + MHD_OPTION_CONNECTION_TIMEOUT, + connection_timeout, + (0 == allow_address_reuse) ? MHD_OPTION_END : MHD_OPTION_LISTENING_ADDRESS_REUSE, - (unsigned int) allow_address_reuse, - MHD_OPTION_END); - if (NULL == mhd) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Failed to launch HTTP service. Is the port in use?\n"); - GNUNET_SCHEDULER_shutdown (); - return; - } - global_ret = EXIT_SUCCESS; - TALER_MHD_daemon_start (mhd); + (unsigned int) allow_address_reuse, + MHD_OPTION_END); + if (NULL == mhd) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Failed to launch HTTP service. Is the port in use?\n"); + GNUNET_SCHEDULER_shutdown (); + return; } + global_ret = EXIT_SUCCESS; + TALER_MHD_daemon_start (mhd); atexit (&write_stats); #if HAVE_DEVELOPER diff --git a/src/exchange/taler-exchange-httpd.h b/src/exchange/taler-exchange-httpd.h index d52ce1a0f..4f59a5821 100644 --- a/src/exchange/taler-exchange-httpd.h +++ b/src/exchange/taler-exchange-httpd.h @@ -157,6 +157,13 @@ extern int TEH_allow_keys_timetravel; */ extern char *TEH_revocation_directory; +/** + * True if we should commit suicide once all active + * connections are finished. Also forces /keys requests + * to terminate if they are long-polling. + */ +extern bool TEH_suicide; + /** * Master public key (according to the * configuration in the exchange directory). diff --git a/src/exchange/taler-exchange-httpd_keys.c b/src/exchange/taler-exchange-httpd_keys.c index 6ac39aa80..bff057a50 100644 --- a/src/exchange/taler-exchange-httpd_keys.c +++ b/src/exchange/taler-exchange-httpd_keys.c @@ -36,6 +36,12 @@ #define SKR_LIMIT 32 +/** + * When do we forcefully timeout a /keys request? + */ +#define KEYS_TIMEOUT GNUNET_TIME_UNIT_MINUTES + + /** * Taler protocol version in the format CURRENT:REVISION:AGE * as used by GNU libtool. See @@ -355,6 +361,11 @@ struct SuspendedKeysRequests * The suspended connection. */ struct MHD_Connection *connection; + + /** + * When does this request timeout? + */ + struct GNUNET_TIME_Absolute timeout; }; @@ -398,6 +409,11 @@ static unsigned int skr_size; */ static struct MHD_Connection *skr_connection; +/** + * Task to force timeouts on /keys requests. + */ +static struct GNUNET_SCHEDULER_Task *keys_tt; + /** * For how long should a signing key be legally retained? * Configuration value. @@ -419,6 +435,40 @@ static struct TALER_SecurityModulePublicKeyP esign_sm_pub; */ static bool terminating; + +/** + * Function called to forcefully resume suspended keys requests. + * + * @param cls unused, NULL + */ +static void +keys_timeout_cb (void *cls) +{ + struct SuspendedKeysRequests *skr; + + (void) cls; + keys_tt = NULL; + while (NULL != (skr = skr_head)) + { + if (GNUNET_TIME_absolute_is_future (skr->timeout)) + break; + GNUNET_log (GNUNET_ERROR_TYPE_INFO, + "Resuming /keys request due to timeout\n"); + GNUNET_CONTAINER_DLL_remove (skr_head, + skr_tail, + skr); + MHD_resume_connection (skr->connection); + TALER_MHD_daemon_trigger (); + GNUNET_free (skr); + } + if (NULL == skr) + return; + keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout, + &keys_timeout_cb, + NULL); +} + + /** * Suspend /keys request while we (hopefully) are waiting to be * provisioned with key material. @@ -445,6 +495,13 @@ suspend_request (struct MHD_Connection *connection) GNUNET_CONTAINER_DLL_insert (skr_head, skr_tail, skr); + skr->timeout = GNUNET_TIME_relative_to_absolute (KEYS_TIMEOUT); + if (NULL == keys_tt) + { + keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout, + &keys_timeout_cb, + NULL); + } skr_size++; if (skr_size > SKR_LIMIT) { @@ -477,9 +534,8 @@ check_dk (void *cls, { struct TEH_DenominationKey *dk = value; - + (void) cls; (void) hc; - (void) value; GNUNET_assert (TALER_DENOMINATION_INVALID != dk->denom_pub.cipher); if (TALER_DENOMINATION_RSA == dk->denom_pub.cipher) GNUNET_assert (GNUNET_CRYPTO_rsa_public_key_check ( @@ -1073,6 +1129,11 @@ TEH_keys_init () void TEH_keys_finished () { + if (NULL != keys_tt) + { + GNUNET_SCHEDULER_cancel (keys_tt); + keys_tt = NULL; + } if (NULL != key_state) destroy_key_state (key_state, true); @@ -2282,13 +2343,17 @@ TEH_keys_get_handler (struct TEH_RequestContext *rc, ksh = TEH_keys_get_state (); if (NULL == ksh) { - if ( (SKR_LIMIT == skr_size) && - (rc->connection == skr_connection) ) + if ( ( (SKR_LIMIT == skr_size) && + (rc->connection == skr_connection) ) || + TEH_suicide) { - return TALER_MHD_reply_with_error (rc->connection, - MHD_HTTP_INTERNAL_SERVER_ERROR, - TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING, - "too many connections suspended on /keys"); + return TALER_MHD_reply_with_error ( + rc->connection, + MHD_HTTP_SERVICE_UNAVAILABLE, + TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING, + TEH_suicide + ? "server terminating" + : "too many connections suspended waiting on /keys"); } return suspend_request (rc->connection); } @@ -2688,7 +2753,7 @@ TEH_keys_management_get_keys_handler (const struct TEH_RequestHandler *rh, if (NULL == ksh) { return TALER_MHD_reply_with_error (connection, - MHD_HTTP_INTERNAL_SERVER_ERROR, + MHD_HTTP_SERVICE_UNAVAILABLE, TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING, "no key state"); }