wallet-core: improve crypto worker error handling

This commit is contained in:
Florian Dold 2022-09-26 14:40:06 +02:00
parent 360cb80610
commit 25eb7624b3
No known key found for this signature in database
GPG Key ID: D2E4F00F29D02A4B
5 changed files with 612 additions and 87 deletions

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,9 @@
/** /**
* Imports. * Imports.
*/ */
import { Logger } from "@gnu-taler/taler-util"; import { Logger, TalerErrorCode } from "@gnu-taler/taler-util";
import { TalerError } from "../../errors.js";
import { openPromise } from "../../util/promiseUtils.js";
import { timer, performanceNow, TimerHandle } from "../../util/timer.js"; import { timer, performanceNow, TimerHandle } from "../../util/timer.js";
import { nullCrypto, TalerCryptoInterface } from "../cryptoImplementation.js"; import { nullCrypto, TalerCryptoInterface } from "../cryptoImplementation.js";
import { CryptoWorker } from "./cryptoWorkerInterface.js"; import { CryptoWorker } from "./cryptoWorkerInterface.js";
@ -32,7 +34,7 @@ const logger = new Logger("cryptoApi.ts");
/** /**
* State of a crypto worker. * State of a crypto worker.
*/ */
interface WorkerState { interface WorkerInfo {
/** /**
* The actual worker thread. * The actual worker thread.
*/ */
@ -64,6 +66,8 @@ interface WorkItem {
* Time when the work was submitted to a (non-busy) worker thread. * Time when the work was submitted to a (non-busy) worker thread.
*/ */
startTime: BigInt; startTime: BigInt;
state: WorkItemState;
} }
/** /**
@ -92,12 +96,18 @@ export class CryptoApiStoppedError extends Error {
} }
} }
export enum WorkItemState {
Pending = 1,
Running = 2,
Finished = 3,
}
/** /**
* Dispatcher for cryptographic operations to underlying crypto workers. * Dispatcher for cryptographic operations to underlying crypto workers.
*/ */
export class CryptoDispatcher { export class CryptoDispatcher {
private nextRpcId = 1; private nextRpcId = 1;
private workers: WorkerState[]; private workers: WorkerInfo[];
private workQueues: WorkItem[][]; private workQueues: WorkItem[][];
private workerFactory: CryptoWorkerFactory; private workerFactory: CryptoWorkerFactory;
@ -141,7 +151,7 @@ export class CryptoDispatcher {
/** /**
* Start a worker (if not started) and set as busy. * Start a worker (if not started) and set as busy.
*/ */
wake(ws: WorkerState, work: WorkItem): void { wake(ws: WorkerInfo, work: WorkItem): void {
if (this.stopped) { if (this.stopped) {
return; return;
} }
@ -167,10 +177,11 @@ export class CryptoDispatcher {
}; };
this.resetWorkerTimeout(ws); this.resetWorkerTimeout(ws);
work.startTime = performanceNow(); work.startTime = performanceNow();
work.state = WorkItemState.Running;
timer.after(0, () => worker.postMessage(msg)); timer.after(0, () => worker.postMessage(msg));
} }
resetWorkerTimeout(ws: WorkerState): void { resetWorkerTimeout(ws: WorkerInfo): void {
if (ws.idleTimeoutHandle !== null) { if (ws.idleTimeoutHandle !== null) {
ws.idleTimeoutHandle.clear(); ws.idleTimeoutHandle.clear();
ws.idleTimeoutHandle = null; ws.idleTimeoutHandle = null;
@ -187,7 +198,7 @@ export class CryptoDispatcher {
ws.idleTimeoutHandle.unref(); ws.idleTimeoutHandle.unref();
} }
handleWorkerError(ws: WorkerState, e: any): void { handleWorkerError(ws: WorkerInfo, e: any): void {
if (ws.currentWorkItem) { if (ws.currentWorkItem) {
logger.error(`error in worker during ${ws.currentWorkItem.operation}`, e); logger.error(`error in worker during ${ws.currentWorkItem.operation}`, e);
} else { } else {
@ -203,6 +214,7 @@ export class CryptoDispatcher {
logger.error(e as string); logger.error(e as string);
} }
if (ws.currentWorkItem !== null) { if (ws.currentWorkItem !== null) {
ws.currentWorkItem.state = WorkItemState.Finished;
ws.currentWorkItem.reject(e); ws.currentWorkItem.reject(e);
ws.currentWorkItem = null; ws.currentWorkItem = null;
this.numBusy--; this.numBusy--;
@ -210,7 +222,7 @@ export class CryptoDispatcher {
this.findWork(ws); this.findWork(ws);
} }
private findWork(ws: WorkerState): void { private findWork(ws: WorkerInfo): void {
// try to find more work for this worker // try to find more work for this worker
for (let i = 0; i < NUM_PRIO; i++) { for (let i = 0; i < NUM_PRIO; i++) {
const q = this.workQueues[NUM_PRIO - i - 1]; const q = this.workQueues[NUM_PRIO - i - 1];
@ -225,26 +237,38 @@ export class CryptoDispatcher {
} }
} }
handleWorkerMessage(ws: WorkerState, msg: any): void { handleWorkerMessage(ws: WorkerInfo, msg: any): void {
const id = msg.data.id; const id = msg.data.id;
if (typeof id !== "number") { if (typeof id !== "number") {
console.error("rpc id must be number"); logger.error("rpc id must be number");
return; return;
} }
const currentWorkItem = ws.currentWorkItem; const currentWorkItem = ws.currentWorkItem;
ws.currentWorkItem = null; ws.currentWorkItem = null;
this.numBusy--;
this.findWork(ws);
if (!currentWorkItem) { if (!currentWorkItem) {
console.error("unsolicited response from worker"); logger.error("unsolicited response from worker");
return; return;
} }
if (id !== currentWorkItem.rpcId) { if (id !== currentWorkItem.rpcId) {
console.error(`RPC with id ${id} has no registry entry`); logger.error(`RPC with id ${id} has no registry entry`);
return; return;
} }
if (currentWorkItem.state === WorkItemState.Running) {
this.numBusy--;
currentWorkItem.state = WorkItemState.Finished;
if (msg.data.type === "success") {
currentWorkItem.resolve(msg.data.result); currentWorkItem.resolve(msg.data.result);
} else if (msg.data.type === "error") {
currentWorkItem.reject(
TalerError.fromDetail(TalerErrorCode.WALLET_CRYPTO_WORKER_ERROR, {
innerError: msg.data.error,
}),
);
} else {
currentWorkItem.reject(new Error("bad message from crypto worker"));
}
}
this.findWork(ws);
} }
cryptoApi: TalerCryptoInterface; cryptoApi: TalerCryptoInterface;
@ -258,7 +282,7 @@ export class CryptoDispatcher {
this.cryptoApi = fns; this.cryptoApi = fns;
this.workerFactory = workerFactory; this.workerFactory = workerFactory;
this.workers = new Array<WorkerState>(workerFactory.getConcurrency()); this.workers = new Array<WorkerInfo>(workerFactory.getConcurrency());
for (let i = 0; i < this.workers.length; i++) { for (let i = 0; i < this.workers.length; i++) {
this.workers[i] = { this.workers[i] = {
@ -282,36 +306,42 @@ export class CryptoDispatcher {
if (this.stopped) { if (this.stopped) {
throw new CryptoApiStoppedError(); throw new CryptoApiStoppedError();
} }
const p: Promise<T> = new Promise<T>((resolve, reject) => {
const rpcId = this.nextRpcId++; const rpcId = this.nextRpcId++;
const myProm = openPromise<T>();
const workItem: WorkItem = { const workItem: WorkItem = {
operation, operation,
req, req,
resolve, resolve: myProm.resolve,
reject, reject: myProm.reject,
rpcId, rpcId,
startTime: BigInt(0), startTime: BigInt(0),
state: WorkItemState.Pending,
}; };
let scheduled = false;
if (this.numBusy === this.workers.length) { if (this.numBusy === this.workers.length) {
// All workers are busy, queue work item
const q = this.workQueues[priority]; const q = this.workQueues[priority];
if (!q) { if (!q) {
throw Error("assertion failed"); throw Error("assertion failed");
} }
this.workQueues[priority].push(workItem); this.workQueues[priority].push(workItem);
return; scheduled = true;
} }
if (!scheduled) {
for (const ws of this.workers) { for (const ws of this.workers) {
if (ws.currentWorkItem !== null) { if (ws.currentWorkItem !== null) {
continue; continue;
} }
this.wake(ws, workItem); this.wake(ws, workItem);
return; scheduled = true;
break;
}
} }
if (!scheduled) {
// Could not schedule work.
throw Error("assertion failed"); throw Error("assertion failed");
}); }
// Make sure that we wait for the result while a timer is active // Make sure that we wait for the result while a timer is active
// to prevent the event loop from dying, as just waiting for a promise // to prevent the event loop from dying, as just waiting for a promise
@ -324,14 +354,20 @@ export class CryptoDispatcher {
logger.warn(`crypto RPC call ('${operation}') timed out`); logger.warn(`crypto RPC call ('${operation}') timed out`);
timedOut = true; timedOut = true;
reject(new Error(`crypto RPC call ('${operation}') timed out`)); reject(new Error(`crypto RPC call ('${operation}') timed out`));
if (workItem.state === WorkItemState.Running) {
workItem.state = WorkItemState.Finished;
this.numBusy--;
}
}); });
p.then((x) => { myProm.promise
.then((x) => {
if (timedOut) { if (timedOut) {
return; return;
} }
timeout.clear(); timeout.clear();
resolve(x); resolve(x);
}).catch((x) => { })
.catch((x) => {
logger.info(`crypto RPC call ${operation} threw`); logger.info(`crypto RPC call ${operation} threw`);
if (timedOut) { if (timedOut) {
return; return;

View File

@ -22,6 +22,7 @@ import { CryptoWorker } from "./cryptoWorkerInterface.js";
import os from "os"; import os from "os";
import { Logger } from "@gnu-taler/taler-util"; import { Logger } from "@gnu-taler/taler-util";
import { nativeCryptoR } from "../cryptoImplementation.js"; import { nativeCryptoR } from "../cryptoImplementation.js";
import { getErrorDetailFromException } from "../../errors.js";
const logger = new Logger("nodeThreadWorker.ts"); const logger = new Logger("nodeThreadWorker.ts");
@ -69,58 +70,72 @@ const workerCode = `
* a message. * a message.
*/ */
export function handleWorkerMessage(msg: any): void { export function handleWorkerMessage(msg: any): void {
const handleRequest = async (): Promise<void> => {
const req = msg.req; const req = msg.req;
if (typeof req !== "object") { if (typeof req !== "object") {
console.error("request must be an object"); logger.error("request must be an object");
return; return;
} }
const id = msg.id; const id = msg.id;
if (typeof id !== "number") { if (typeof id !== "number") {
console.error("RPC id must be number"); logger.error("RPC id must be number");
return; return;
} }
const operation = msg.operation; const operation = msg.operation;
if (typeof operation !== "string") { if (typeof operation !== "string") {
console.error("RPC operation must be string"); logger.error("RPC operation must be string");
return; return;
} }
const handleRequest = async (): Promise<void> => {
const impl = nativeCryptoR; const impl = nativeCryptoR;
if (!(operation in impl)) { if (!(operation in impl)) {
console.error(`crypto operation '${operation}' not found`); logger.error(`crypto operation '${operation}' not found`);
return; return;
} }
let responseMsg: any;
try { try {
const result = await (impl as any)[operation](impl, req); const result = await (impl as any)[operation](impl, req);
responseMsg = { data: { type: "success", result, id } };
} catch (e: any) {
logger.error(`error during operation: ${e.stack ?? e.toString()}`);
responseMsg = {
data: {
type: "error",
error: getErrorDetailFromException(e),
id,
},
};
}
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires // eslint-disable-next-line @typescript-eslint/no-var-requires
const _r = "require"; const _r = "require";
const worker_threads: typeof import("worker_threads") = const worker_threads: typeof import("worker_threads") =
module[_r]("worker_threads"); module[_r]("worker_threads");
// const worker_threads = require("worker_threads"); // const worker_threads = require("worker_threads");
const p = worker_threads.parentPort; const p = worker_threads.parentPort;
worker_threads.parentPort?.postMessage;
if (p) { if (p) {
p.postMessage({ data: { result, id } }); p.postMessage(responseMsg);
} else { } else {
console.error("parent port not available (not running in thread?"); logger.error("parent port not available (not running in thread?");
} }
} catch (e) { } catch (e: any) {
console.error("error during operation", e); logger.error(
`error sending back operation result: ${e.stack ?? e.toString()}`,
);
return; return;
} }
}; };
handleRequest().catch((e) => { handleRequest().catch((e) => {
console.error("error in node worker", e); logger.error("error in node worker", e);
}); });
} }
export function handleWorkerError(e: Error): void { export function handleWorkerError(e: Error): void {
console.log("got error from worker", e); logger.error(`got error from worker: ${e.stack ?? e.toString()}`);
} }
export class NodeThreadCryptoWorkerFactory implements CryptoWorkerFactory { export class NodeThreadCryptoWorkerFactory implements CryptoWorkerFactory {
@ -161,7 +176,7 @@ class NodeThreadCryptoWorker implements CryptoWorker {
this.nodeWorker = new worker_threads.Worker(workerCode, { eval: true }); this.nodeWorker = new worker_threads.Worker(workerCode, { eval: true });
this.nodeWorker.on("error", (err: Error) => { this.nodeWorker.on("error", (err: Error) => {
console.error("error in node worker:", err); logger.error("error in node worker:", err);
if (this.onerror) { if (this.onerror) {
this.onerror(err); this.onerror(err);
} }

View File

@ -15,6 +15,7 @@
*/ */
import { Logger } from "@gnu-taler/taler-util"; import { Logger } from "@gnu-taler/taler-util";
import { getErrorDetailFromException } from "../../errors.js";
import { import {
nativeCryptoR, nativeCryptoR,
TalerCryptoInterfaceR, TalerCryptoInterfaceR,
@ -139,7 +140,7 @@ export class SynchronousCryptoWorker {
private dispatchMessage(msg: any): void { private dispatchMessage(msg: any): void {
if (this.onmessage) { if (this.onmessage) {
this.onmessage({ data: msg }); this.onmessage(msg);
} }
} }
@ -151,20 +152,27 @@ export class SynchronousCryptoWorker {
const impl = this.cryptoImplR; const impl = this.cryptoImplR;
if (!(operation in impl)) { if (!(operation in impl)) {
console.error(`crypto operation '${operation}' not found`); logger.error(`crypto operation '${operation}' not found`);
return; return;
} }
let result: any; let responseMsg: any;
try { try {
result = await (impl as any)[operation](impl, req); const result = await (impl as any)[operation](impl, req);
responseMsg = { data: { type: "success", result, id } };
} catch (e: any) { } catch (e: any) {
logger.error(`error during operation '${operation}': ${e}`); logger.error(`error during operation: ${e.stack ?? e.toString()}`);
return; responseMsg = {
data: {
type: "error",
id,
error: getErrorDetailFromException(e),
},
};
} }
try { try {
setTimeout(() => this.dispatchMessage({ result, id }), 0); setTimeout(() => this.dispatchMessage(responseMsg), 0);
} catch (e) { } catch (e) {
logger.error("got error during dispatch", e); logger.error("got error during dispatch", e);
} }
@ -176,22 +184,22 @@ export class SynchronousCryptoWorker {
postMessage(msg: any): void { postMessage(msg: any): void {
const req = msg.req; const req = msg.req;
if (typeof req !== "object") { if (typeof req !== "object") {
console.error("request must be an object"); logger.error("request must be an object");
return; return;
} }
const id = msg.id; const id = msg.id;
if (typeof id !== "number") { if (typeof id !== "number") {
console.error("RPC id must be number"); logger.error("RPC id must be number");
return; return;
} }
const operation = msg.operation; const operation = msg.operation;
if (typeof operation !== "string") { if (typeof operation !== "string") {
console.error("RPC operation must be string"); logger.error("RPC operation must be string");
return; return;
} }
this.handleRequest(operation, id, req).catch((e) => { this.handleRequest(operation, id, req).catch((e) => {
console.error("Error while handling crypto request:", e); logger.error("Error while handling crypto request:", e);
}); });
} }

View File

@ -73,6 +73,9 @@ export interface DetailsMap {
[TalerErrorCode.WALLET_PAY_MERCHANT_SERVER_ERROR]: { [TalerErrorCode.WALLET_PAY_MERCHANT_SERVER_ERROR]: {
requestError: TalerErrorDetail; requestError: TalerErrorDetail;
}; };
[TalerErrorCode.WALLET_CRYPTO_WORKER_ERROR]: {
innerError: TalerErrorDetail;
};
} }
type ErrBody<Y> = Y extends keyof DetailsMap ? DetailsMap[Y] : never; type ErrBody<Y> = Y extends keyof DetailsMap ? DetailsMap[Y] : never;