diff --git a/CHANGELOG.md b/CHANGELOG.md index 7551a42..2c92520 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # elastic-apm-http-client changelog +## Unreleased + +- Fix an issue when running in a Lambda function, where a missing or erroring + APM Lambda extension could result in apmclient back-off such that (a) the + end-of-lambda-invocation signaling (`?flushed=true`) would not happen and + (b) premature "beforeExit" event could result in the Lambda Runtime + responding `null` before the Lambda function could respond + (https://github.com/elastic/apm-agent-nodejs/issues/1831). + ## v11.0.0 - Add support for coordinating data flushing in an AWS Lambda environment. The diff --git a/index.js b/index.js index fbf6b06..4a4b203 100644 --- a/index.js +++ b/index.js @@ -781,9 +781,18 @@ Client.prototype._destroy = function (err, cb) { // Return the appropriate backoff delay (in milliseconds) before a next possible // request to APM server. // Spec: https://github.com/elastic/apm/blob/main/specs/agents/transport.md#transport-errors +// +// In a Lambda environment, a backoff delay can be harmful: The backoff +// setTimeout is unref'd, to not hold the process open. A subsequent Lambda +// function invocation during that timer will result in no active handles and +// a process "beforeExit" event. That event is interpreted by the Lambda Runtime +// as "the Lambda function callback was never called", and it terminates the +// function and responds with `null`. The solution is to never backoff in a +// Lambda environment -- we expect and assume the Lambda extension is working, +// and pass responsibility for backoff to the extension. Client.prototype._getBackoffDelay = function (isErr) { let reconnectCount = this._backoffReconnectCount - if (isErr) { + if (isErr && !isLambdaExecutionEnvironment) { this._backoffReconnectCount++ } else { this._backoffReconnectCount = 0