improve suicide logic

This commit is contained in:
Christian Grothoff 2021-12-02 08:02:20 +01:00
parent e8fc5d8d5c
commit 5a7dd00125
No known key found for this signature in database
GPG Key ID: 939E6BE1E29FC3CC
14 changed files with 258 additions and 63 deletions

View File

@ -5,7 +5,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-aggregator
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-aggregator -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -5,7 +5,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-closer
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-closer -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -8,7 +8,11 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-httpd
Type=simple
Restart=on-failure
# Depending on the configuration, the service suicides and then
# needs to be restarted.
Restart=always
# Do not dally on restarts.
RestartSec=1ms
ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -0,0 +1,26 @@
% This is a systemd service template.
[Unit]
Description=GNU Taler payment system exchange REST API at %I
AssertPathExists=/run/taler/exchange-httpd
Requires=taler-exchange-httpd@%i.socket taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service
After=postgres.service network.target taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service
PartOf=taler-exchange.target
[Service]
User=taler-exchange-httpd
Type=simple
# Depending on the configuration, the service suicides and then
# needs to be restarted.
Restart=always
# Do not dally on restarts.
RestartSec=1ms
EnvironmentFile=/etc/environment
ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
PrivateTmp=no
PrivateDevices=yes
ProtectSystem=full
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,14 @@
[Unit]
Description=Taler Exchange Socket at %I
PartOf=taler-exchange-httpd@%i.service
[Socket]
ListenStream=80
Accept=no
Service=taler-exchange-httpd@%i.service
SocketUser=taler-exchange-httpd
SocketGroup=www-data
SocketMode=0660
[Install]
WantedBy=sockets.target

View File

@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-secmod-eddsa
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-secmod-eddsa -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-secmod-rsa
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-secmod-rsa -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-wire
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-transfer -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-wire
Type=simple
Restart=on-failure
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal

View File

@ -0,0 +1,16 @@
[Unit]
Description=GNU Taler payment system exchange wirewatch service
After=network.target
PartOf=taler-exchange.target
[Service]
User=taler-exchange-wire
Type=simple
Restart=always
RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
PrivateTmp=yes
PrivateDevices=yes
ProtectSystem=full

View File

@ -40,6 +40,9 @@ PORT = 8081
# transfers to enable tracking.
BASE_URL = http://localhost:8081/
# Maximum number of requests this process should handle before
# committing suicide.
# MAX_REQUESTS =
# How long should the aggregator sleep if it has nothing to do?
AGGREGATOR_IDLE_SLEEP_INTERVAL = 60 s

View File

@ -76,6 +76,11 @@ static int allow_address_reuse;
*/
const struct GNUNET_CONFIGURATION_Handle *TEH_cfg;
/**
* Handle to the HTTP server.
*/
static struct MHD_Daemon *mhd;
/**
* Our KYC configuration.
*/
@ -122,6 +127,12 @@ static unsigned int connection_timeout = 30;
*/
static int connection_close;
/**
* True if we should commit suicide once all active
* connections are finished.
*/
bool TEH_suicide;
/**
* Value to return from main()
*/
@ -137,6 +148,11 @@ static uint16_t serve_port;
*/
static unsigned long long req_count;
/**
* Counter for the number of open connections.
*/
static unsigned long long active_connections;
/**
* Limit for the number of requests this HTTP may process before restarting.
* (This was added as one way of dealing with unavoidable memory fragmentation
@ -262,6 +278,45 @@ handle_post_coins (struct TEH_RequestContext *rc,
}
/**
* Increments our request counter and checks if this
* process should commit suicide.
*/
static void
check_suicide (void)
{
int fd;
pid_t chld;
unsigned long long cnt;
cnt = req_count++;
if (req_max != cnt)
return;
GNUNET_log (GNUNET_ERROR_TYPE_INFO,
"Restarting exchange service after %llu requests\n",
cnt);
/* Stop accepting new connections */
fd = MHD_quiesce_daemon (mhd);
GNUNET_break (0 == close (fd));
/* Continue handling existing connections in child,
so that this process can die and be replaced by
systemd with a fresh one */
chld = fork ();
if (-1 == chld)
{
GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR,
"fork");
_exit (1);
}
if (0 != chld)
{
/* We are the parent, instant-suicide! */
_exit (0);
}
TEH_suicide = true;
}
/**
* Function called whenever MHD is done with a request. If the
* request was a POST, we may have stored a `struct Buffer *` in the
@ -290,6 +345,7 @@ handle_mhd_completion_callback (void *cls,
return;
GNUNET_async_scope_enter (&rc->async_scope_id,
&old_scope);
check_suicide ();
TEH_check_invariants ();
if (NULL != rc->rh_cleaner)
rc->rh_cleaner (rc);
@ -1642,8 +1698,19 @@ connection_done (void *cls,
(void) cls;
(void) connection;
(void) socket_context;
unsigned long long cnt;
switch (toe)
{
case MHD_CONNECTION_NOTIFY_STARTED:
active_connections++;
break;
case MHD_CONNECTION_NOTIFY_CLOSED:
active_connections--;
if (TEH_suicide &&
(0 == active_connections) )
GNUNET_SCHEDULER_shutdown ();
break;
}
#if HAVE_DEVELOPER
/* We only act if the connection is closed. */
if (MHD_CONNECTION_NOTIFY_CLOSED != toe)
@ -1651,15 +1718,6 @@ connection_done (void *cls,
if (NULL != input_filename)
GNUNET_SCHEDULER_shutdown ();
#endif
cnt = req_count++;
if (req_max == cnt)
{
GNUNET_log (GNUNET_ERROR_TYPE_INFO,
"Restarting exchange service after %llu requests\n",
cnt);
(void) kill (getpid (),
SIGTERM);
}
}
@ -1780,46 +1838,42 @@ run (void *cls,
GNUNET_SCHEDULER_shutdown ();
return;
}
{
struct MHD_Daemon *mhd;
mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME
| MHD_USE_PIPE_FOR_SHUTDOWN
| MHD_USE_DEBUG | MHD_USE_DUAL_STACK
| MHD_USE_TCP_FASTOPEN,
(-1 == fh) ? serve_port : 0,
NULL, NULL,
&handle_mhd_request, NULL,
MHD_OPTION_LISTEN_BACKLOG_SIZE,
(unsigned int) 1024,
MHD_OPTION_LISTEN_SOCKET,
fh,
MHD_OPTION_EXTERNAL_LOGGER,
&TALER_MHD_handle_logs,
NULL,
MHD_OPTION_NOTIFY_COMPLETED,
&handle_mhd_completion_callback,
NULL,
MHD_OPTION_NOTIFY_CONNECTION,
&connection_done,
NULL,
MHD_OPTION_CONNECTION_TIMEOUT,
connection_timeout,
(0 == allow_address_reuse)
mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME
| MHD_USE_PIPE_FOR_SHUTDOWN
| MHD_USE_DEBUG | MHD_USE_DUAL_STACK
| MHD_USE_TCP_FASTOPEN,
(-1 == fh) ? serve_port : 0,
NULL, NULL,
&handle_mhd_request, NULL,
MHD_OPTION_LISTEN_BACKLOG_SIZE,
(unsigned int) 1024,
MHD_OPTION_LISTEN_SOCKET,
fh,
MHD_OPTION_EXTERNAL_LOGGER,
&TALER_MHD_handle_logs,
NULL,
MHD_OPTION_NOTIFY_COMPLETED,
&handle_mhd_completion_callback,
NULL,
MHD_OPTION_NOTIFY_CONNECTION,
&connection_done,
NULL,
MHD_OPTION_CONNECTION_TIMEOUT,
connection_timeout,
(0 == allow_address_reuse)
? MHD_OPTION_END
: MHD_OPTION_LISTENING_ADDRESS_REUSE,
(unsigned int) allow_address_reuse,
MHD_OPTION_END);
if (NULL == mhd)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Failed to launch HTTP service. Is the port in use?\n");
GNUNET_SCHEDULER_shutdown ();
return;
}
global_ret = EXIT_SUCCESS;
TALER_MHD_daemon_start (mhd);
(unsigned int) allow_address_reuse,
MHD_OPTION_END);
if (NULL == mhd)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Failed to launch HTTP service. Is the port in use?\n");
GNUNET_SCHEDULER_shutdown ();
return;
}
global_ret = EXIT_SUCCESS;
TALER_MHD_daemon_start (mhd);
atexit (&write_stats);
#if HAVE_DEVELOPER

View File

@ -157,6 +157,13 @@ extern int TEH_allow_keys_timetravel;
*/
extern char *TEH_revocation_directory;
/**
* True if we should commit suicide once all active
* connections are finished. Also forces /keys requests
* to terminate if they are long-polling.
*/
extern bool TEH_suicide;
/**
* Master public key (according to the
* configuration in the exchange directory).

View File

@ -36,6 +36,12 @@
#define SKR_LIMIT 32
/**
* When do we forcefully timeout a /keys request?
*/
#define KEYS_TIMEOUT GNUNET_TIME_UNIT_MINUTES
/**
* Taler protocol version in the format CURRENT:REVISION:AGE
* as used by GNU libtool. See
@ -355,6 +361,11 @@ struct SuspendedKeysRequests
* The suspended connection.
*/
struct MHD_Connection *connection;
/**
* When does this request timeout?
*/
struct GNUNET_TIME_Absolute timeout;
};
@ -398,6 +409,11 @@ static unsigned int skr_size;
*/
static struct MHD_Connection *skr_connection;
/**
* Task to force timeouts on /keys requests.
*/
static struct GNUNET_SCHEDULER_Task *keys_tt;
/**
* For how long should a signing key be legally retained?
* Configuration value.
@ -419,6 +435,40 @@ static struct TALER_SecurityModulePublicKeyP esign_sm_pub;
*/
static bool terminating;
/**
* Function called to forcefully resume suspended keys requests.
*
* @param cls unused, NULL
*/
static void
keys_timeout_cb (void *cls)
{
struct SuspendedKeysRequests *skr;
(void) cls;
keys_tt = NULL;
while (NULL != (skr = skr_head))
{
if (GNUNET_TIME_absolute_is_future (skr->timeout))
break;
GNUNET_log (GNUNET_ERROR_TYPE_INFO,
"Resuming /keys request due to timeout\n");
GNUNET_CONTAINER_DLL_remove (skr_head,
skr_tail,
skr);
MHD_resume_connection (skr->connection);
TALER_MHD_daemon_trigger ();
GNUNET_free (skr);
}
if (NULL == skr)
return;
keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout,
&keys_timeout_cb,
NULL);
}
/**
* Suspend /keys request while we (hopefully) are waiting to be
* provisioned with key material.
@ -445,6 +495,13 @@ suspend_request (struct MHD_Connection *connection)
GNUNET_CONTAINER_DLL_insert (skr_head,
skr_tail,
skr);
skr->timeout = GNUNET_TIME_relative_to_absolute (KEYS_TIMEOUT);
if (NULL == keys_tt)
{
keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout,
&keys_timeout_cb,
NULL);
}
skr_size++;
if (skr_size > SKR_LIMIT)
{
@ -477,9 +534,8 @@ check_dk (void *cls,
{
struct TEH_DenominationKey *dk = value;
(void) cls;
(void) hc;
(void) value;
GNUNET_assert (TALER_DENOMINATION_INVALID != dk->denom_pub.cipher);
if (TALER_DENOMINATION_RSA == dk->denom_pub.cipher)
GNUNET_assert (GNUNET_CRYPTO_rsa_public_key_check (
@ -1073,6 +1129,11 @@ TEH_keys_init ()
void
TEH_keys_finished ()
{
if (NULL != keys_tt)
{
GNUNET_SCHEDULER_cancel (keys_tt);
keys_tt = NULL;
}
if (NULL != key_state)
destroy_key_state (key_state,
true);
@ -2282,13 +2343,17 @@ TEH_keys_get_handler (struct TEH_RequestContext *rc,
ksh = TEH_keys_get_state ();
if (NULL == ksh)
{
if ( (SKR_LIMIT == skr_size) &&
(rc->connection == skr_connection) )
if ( ( (SKR_LIMIT == skr_size) &&
(rc->connection == skr_connection) ) ||
TEH_suicide)
{
return TALER_MHD_reply_with_error (rc->connection,
MHD_HTTP_INTERNAL_SERVER_ERROR,
TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
"too many connections suspended on /keys");
return TALER_MHD_reply_with_error (
rc->connection,
MHD_HTTP_SERVICE_UNAVAILABLE,
TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
TEH_suicide
? "server terminating"
: "too many connections suspended waiting on /keys");
}
return suspend_request (rc->connection);
}
@ -2688,7 +2753,7 @@ TEH_keys_management_get_keys_handler (const struct TEH_RequestHandler *rh,
if (NULL == ksh)
{
return TALER_MHD_reply_with_error (connection,
MHD_HTTP_INTERNAL_SERVER_ERROR,
MHD_HTTP_SERVICE_UNAVAILABLE,
TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
"no key state");
}