diff --git a/common/client-side-metrics-attributes.ts b/common/client-side-metrics-attributes.ts new file mode 100644 index 000000000..d1f31ab63 --- /dev/null +++ b/common/client-side-metrics-attributes.ts @@ -0,0 +1,45 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Attributes (labels) associated with a Bigtable metric. These + * attributes provide context for the metric values. + */ +export interface Attributes { + projectId: string; + instanceId: string; + table: string; + cluster?: string | null; + zone?: string | null; + appProfileId?: string; + methodName: string; + attemptStatus?: string; + finalOperationStatus?: string; + streamingOperation?: string; + clientName: string; +} + +/** + * Converts an Attributes object to a string representation. + * This string representation is suitable for use as labels or tags. + * The order of attributes in the output string is fixed: + * projectId;instanceId;table;cluster;zone;appProfileId;methodName;attemptStatus;finalOperationStatus;streamingOperation;clientName + * If an attribute is null or undefined, the empty string is used. + * @param {Attributes} a The Attributes object to convert. + * @returns A string representation of the attribute. + */ +export function attributesToString(a: Attributes) { + const p = (attribute?: string | null) => (attribute ? attribute : ''); + return `${p(a.projectId)};${p(a.instanceId)};${p(a.table)};${p(a.cluster)};${p(a.zone)};${p(a.appProfileId)};${p(a.methodName)};${p(a.attemptStatus)};${p(a.finalOperationStatus)};${p(a.streamingOperation)};nodejs-bigtable`; +} diff --git a/common/logger.ts b/common/logger.ts new file mode 100644 index 000000000..7b09b8737 --- /dev/null +++ b/common/logger.ts @@ -0,0 +1,52 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * A simple logger interface for logging messages. Implementations of this interface + * can provide various logging mechanisms (e.g., console logging, file logging, etc.). + */ +interface ILogger { + log(message: string): void; +} + +/** + * An abstract base class that provides a logger instance. Subclasses can use this logger + * for logging messages. + */ +export abstract class WithLogger { + protected logger: ILogger; + /** + * @param {ILogger} logger The logger instance to be used by this object. + */ + constructor(logger: ILogger) { + this.logger = logger; + } +} + +/** + * An abstract base class that provides a logger instance and a name. Subclasses + * can use the logger for logging messages, incorporating the name for context. + */ +export abstract class WithLoggerAndName { + protected logger: ILogger; + protected name: string; + /** + * @param {ILogger} logger The logger instance to be used by this object. + * @param {string} name The name associated with this object. + */ + constructor(logger: ILogger, name: string) { + this.logger = logger; + this.name = name; + } +} diff --git a/common/test-date-provider.ts b/common/test-date-provider.ts new file mode 100644 index 000000000..71ef66aee --- /dev/null +++ b/common/test-date-provider.ts @@ -0,0 +1,56 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import {WithLogger} from './logger'; + +/** + * A test implementation of a Date-like object. Used for testing purposes. It provides a + * getTime method that returns a pre-determined fake date value, allowing for + * deterministic testing of time-dependent functionality. + */ +class TestDateLike { + private fakeDate; + /** + * @param {number} fakeDate The fake date value to be returned by getTime(), in milliseconds. + */ + constructor(fakeDate: number) { + this.fakeDate = fakeDate; + } + /** + * Returns the fake date value that this object was created with. + * @returns {number} The fake date, in milliseconds. + */ + getTime() { + return this.fakeDate; + } +} + +/** + * A test implementation of a DateProvider. Used for testing purposes. Provides + * a deterministic series of fake dates, with each call to getDate() returning a date 1000ms later than the last. + * Logs each date value returned for verification purposes. + */ +export class TestDateProvider extends WithLogger { + private dateCounter = 0; + /** + * Returns a new fake date 1000ms later than the last. Logs the date for test verification. + * @returns {TestDateLike} A fake date object. + */ + getDate() { + // The test assumes exactly 1s passes between each getDate call. + this.dateCounter = this.dateCounter + 1000; + this.logger.log(`getDate call returns ${this.dateCounter.toString()} ms`); + return new TestDateLike(this.dateCounter); + } +} diff --git a/common/test-meter-provider.ts b/common/test-meter-provider.ts new file mode 100644 index 000000000..1590fe322 --- /dev/null +++ b/common/test-meter-provider.ts @@ -0,0 +1,88 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import {WithLogger, WithLoggerAndName} from './logger'; +import {Attributes, attributesToString} from './client-side-metrics-attributes'; + +/** + * A test implementation of a MeterProvider. This MeterProvider is used for testing purposes. + * It doesn't send metrics to a backend, but instead logs metric updates for verification. + */ +export class TestMeterProvider extends WithLogger { + /** + * Returns a TestMeter, that logs metric updates for verification. + * @param {string} name The name of the meter. + * @returns {TestMeter} + */ + getMeter(name: string) { + return new TestMeter(this.logger, name); + } +} + +/** + * A test implementation of a Meter. Used for testing purposes. It doesn't send metrics to a backend, + * but instead logs metric updates for verification. + */ +class TestMeter extends WithLoggerAndName { + /** + * Creates a test histogram. The TestHistogram logs when values are recorded. + * @param {string} instrument The name of the instrument. + * @returns {TestHistogram} + */ + createHistogram(instrument: string) { + return new TestHistogram(this.logger, `${this.name}:${instrument}`); + } + /** + * Creates a test counter. The TestCounter logs when values are added. + * @param {string} instrument The name of the instrument. + * @returns {TestCounter} + */ + createCounter(instrument: string) { + return new TestCounter(this.logger, `${this.name}:${instrument}`); + } +} + +/** + * A test implementation of a Counter. Used for testing purposes. It doesn't send metrics to a backend, + * but instead logs value additions for verification. + */ +class TestCounter extends WithLoggerAndName { + /** + * Simulates adding a value to the counter. Logs the value and the counter name. + * @param {number} value The value to be added to the counter. + * @param {Attributes} attributes The attributes associated with the value. + */ + add(value: number, attributes: Attributes) { + this.logger.log( + `Value added to counter ${this.name} = ${value.toString()} with attributes ${attributesToString(attributes)}` + ); + } +} + +/** + * A test implementation of a Histogram. Used for testing purposes. It doesn't send metrics to a backend, + * but instead logs recorded values for verification. + */ +class TestHistogram extends WithLoggerAndName { + /** + * Simulates recording a value in the histogram. Logs the value and the histogram name. + * @param {number} value The value to be recorded in the histogram. + * @param {Attributes} attributes The attributes associated with the value. + */ + record(value: number, attributes: Attributes) { + this.logger.log( + `Value added to histogram ${this.name} = ${value.toString()} with attributes ${attributesToString(attributes)}` + ); + } +} diff --git a/package.json b/package.json index c0b1a07e9..2dc24800e 100644 --- a/package.json +++ b/package.json @@ -47,9 +47,14 @@ "precompile": "gts clean" }, "dependencies": { + "@google-cloud/opentelemetry-cloud-monitoring-exporter": "^0.20.0", + "@google-cloud/opentelemetry-resource-util": "^2.4.0", "@google-cloud/precise-date": "^4.0.0", "@google-cloud/projectify": "^4.0.0", "@google-cloud/promisify": "^4.0.0", + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/resources": "^1.30.0", + "@opentelemetry/sdk-metrics": "^1.30.0", "arrify": "^2.0.0", "concat-stream": "^2.0.0", "dot-prop": "^6.0.0", diff --git a/src/client-side-metrics/metrics-tracer-factory.ts b/src/client-side-metrics/metrics-tracer-factory.ts new file mode 100644 index 000000000..a4155c241 --- /dev/null +++ b/src/client-side-metrics/metrics-tracer-factory.ts @@ -0,0 +1,600 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import {Attributes} from '../../common/client-side-metrics-attributes'; + +const { + MeterProvider, + Histogram, + Counter, + PeriodicExportingMetricReader, +} = require('@opentelemetry/sdk-metrics'); +import * as Resources from '@opentelemetry/resources'; +import {MetricExporter} from '@google-cloud/opentelemetry-cloud-monitoring-exporter'; +import * as ResourceUtil from '@google-cloud/opentelemetry-resource-util'; +import {ObservabilityOptions} from './observability-options'; +import * as fs from 'fs'; + +/** + * Information about a Bigtable operation. + */ +interface OperationInfo { + /** + * The number of retries attempted for the operation. + */ + retries?: number; + /** + * The final status of the operation (e.g., 'OK', 'ERROR'). + */ + finalOperationStatus: string; + /** + * Number of times a connectivity error occurred during the operation. + */ + connectivityErrorCount?: number; + streamingOperation: string; +} + +/** + * Information about a single attempt of a Bigtable operation. + */ +interface AttemptInfo { + /** + * The final status of the attempt (e.g., 'OK', 'ERROR'). + */ + finalOperationStatus: string; + /** + * Whether the operation is a streaming operation or not + */ + streamingOperation: string; +} + +/** + * A collection of OpenTelemetry metric instruments used to record + * Bigtable client-side metrics. + */ +interface Metrics { + operationLatencies: typeof Histogram; + attemptLatencies: typeof Histogram; + retryCount: typeof Counter; + applicationBlockingLatencies: typeof Histogram; + firstResponseLatencies: typeof Histogram; + serverLatencies: typeof Histogram; + connectivityErrorCount: typeof Histogram; + clientBlockingLatencies: typeof Histogram; +} + +/** + * An interface representing a Date-like object. Provides a `getTime` method + * for retrieving the time value in milliseconds. Used for abstracting time + * in tests. + */ +interface DateLike { + /** + * Returns the time value in milliseconds. + * @returns The time value in milliseconds. + */ + getTime(): number; +} + +/** + * Interface for a provider that returns DateLike objects. Used for mocking dates in tests. + */ +interface DateProvider { + /** + * Returns a DateLike object. + * @returns A DateLike object representing the current time or a fake time value. + */ + getDate(): DateLike; +} + +/** + * The default DateProvider implementation. Returns the current date and time. + */ +class DefaultDateProvider { + /** + * Returns a new Date object representing the current time. + * @returns {Date} The current date and time. + */ + getDate() { + return new Date(); + } +} + +/** + * An interface representing a tabular API surface, such as a Bigtable table. + */ +export interface ITabularApiSurface { + instance: { + id: string; + }; + id: string; + bigtable: { + appProfileId?: string; + }; +} + +const packageJSON = fs.readFileSync('package.json'); +const version = JSON.parse(packageJSON.toString()).version; + +/** + * A class for tracing and recording client-side metrics related to Bigtable operations. + */ +class MetricsTracer { + private operationStartTime: DateLike | null; + private attemptStartTime: DateLike | null; + private metrics: Metrics; + private zone: string | null | undefined; + private cluster: string | null | undefined; + private tabularApiSurface: ITabularApiSurface; + private methodName: string; + private projectId?: string; + private receivedFirstResponse: boolean; + private serverTimeRead: boolean; + private lastReadTime: DateLike | null; + private dateProvider: DateProvider; + + /** + * @param metrics The metrics instruments to record data with. + * @param tabularApiSurface Information about the Bigtable table being accessed. + * @param methodName The name of the method being traced. + * @param dateProvider A provider for date/time information (for testing). + */ + constructor( + metrics: Metrics, + tabularApiSurface: ITabularApiSurface, + methodName: string, + projectId?: string, + dateProvider?: DateProvider + ) { + this.metrics = metrics; + this.zone = null; + this.cluster = null; + this.tabularApiSurface = tabularApiSurface; + this.methodName = methodName; + this.operationStartTime = null; + this.attemptStartTime = null; + this.receivedFirstResponse = false; + this.lastReadTime = null; + this.serverTimeRead = false; + this.projectId = projectId; + if (dateProvider) { + this.dateProvider = dateProvider; + } else { + this.dateProvider = new DefaultDateProvider(); + } + } + + /** + * Assembles the basic attributes for metrics. These attributes provide + * context about the Bigtable environment and the operation being performed. + * @param {string} projectId The Google Cloud project ID. + * @returns {Attributes} An object containing the basic attributes. + */ + private getBasicAttributes(projectId: string) { + return { + projectId, + instanceId: this.tabularApiSurface.instance.id, + table: this.tabularApiSurface.id, + cluster: this.cluster, + zone: this.zone, + appProfileId: this.tabularApiSurface.bigtable.appProfileId, + methodName: this.methodName, + clientName: `nodejs-bigtable/${version}`, + }; + } + + /** + * Assembles the attributes for operation latency metrics. These attributes + * provide context about the Bigtable environment, the operation being performed, and the final status of the operation. + * Includes whether the operation was a streaming operation or not. + * @param {string} projectId The Google Cloud project ID. + * @param {string} finalOperationStatus The final status of the operation. + * @param {string} streamOperation Whether the operation was a streaming operation or not. + * @returns {Attributes} An object containing the attributes for operation latency metrics. + */ + private getOperationLatencyAttributes( + projectId: string, + finalOperationStatus: string, + streamOperation?: string + ): Attributes { + return Object.assign( + { + finalOperationStatus: finalOperationStatus, + streamingOperation: streamOperation, + }, + this.getBasicAttributes(projectId) + ); + } + + /** + * Assembles the attributes for final operation metrics. These attributes provide + * context about the Bigtable environment and the operation being performed. + * @param {string} projectId The Google Cloud project ID. + * @param {string} finalOperationStatus The final status of the operation. + * @returns {Attributes} An object containing the attributes for final operation metrics. + */ + private getFinalOpAttributes( + projectId: string, + finalOperationStatus: string + ): Attributes { + return Object.assign( + { + finalOperationStatus: finalOperationStatus, + }, + this.getBasicAttributes(projectId) + ); + } + + /** + * Assembles the attributes for attempt metrics. These attributes provide context + * about the Bigtable environment, the operation being performed, and the status of the attempt. + * Includes whether the operation was a streaming operation or not. + * @param {string} projectId The Google Cloud project ID. + * @param {string} attemptStatus The status of the attempt. + * @param {string} streamingOperation Whether the operation was a streaming operation or not. + * @returns {Attributes} An object containing the attributes for attempt metrics. + */ + private getAttemptAttributes( + projectId: string, + attemptStatus: string, + streamingOperation: string + ) { + return Object.assign( + { + attemptStatus: attemptStatus, + streamingOperation: streamingOperation, + }, + this.getBasicAttributes(projectId) + ); + } + + /** + * Assembles the attributes for attempt status metrics. These attributes provide context + * about the Bigtable environment and the operation being performed. + * @param {string} projectId The Google Cloud project ID. + * @param {string} attemptStatus The status of the attempt. + * @returns {Attributes} An object containing the attributes for attempt status metrics. + */ + private getAttemptStatusAttributes(projectId: string, attemptStatus: string) { + return Object.assign( + { + attemptStatus: attemptStatus, + }, + this.getBasicAttributes(projectId) + ); + } + + /** + * Called when the operation starts. Records the start time. + */ + onOperationStart() { + this.operationStartTime = this.dateProvider.getDate(); + } + + /** + * Called after the client reads a row. Records application blocking latencies. + */ + onRead() { + const currentTime = this.dateProvider.getDate(); + const projectId = this.projectId; + if (this.lastReadTime) { + if (projectId && this.lastReadTime) { + const attributes = this.getBasicAttributes(projectId); + const difference = currentTime.getTime() - this.lastReadTime.getTime(); + this.metrics.applicationBlockingLatencies.record( + difference, + attributes + ); + this.lastReadTime = currentTime; + } + } else { + this.lastReadTime = currentTime; + } + } + + /** + * Called when an attempt (e.g., an RPC attempt) completes. Records attempt latencies. + * @param {AttemptInfo} info Information about the completed attempt. + */ + onAttemptComplete(info: AttemptInfo) { + const endTime = this.dateProvider.getDate(); + const projectId = this.projectId; + if (projectId && this.attemptStartTime) { + const attributes = this.getAttemptAttributes( + projectId, + info.finalOperationStatus, + info.streamingOperation + ); + const totalTime = endTime.getTime() - this.attemptStartTime.getTime(); + this.metrics.attemptLatencies.record(totalTime, attributes); + } + } + + /** + * Called when a new attempt starts. Records the start time of the attempt. + */ + onAttemptStart() { + this.attemptStartTime = this.dateProvider.getDate(); + } + + /** + * Called when the first response is received. Records first response latencies. + * @param {string} finalOperationStatus The final status of the operation. + */ + onResponse(finalOperationStatus: string) { + const endTime = this.dateProvider.getDate(); + const projectId = this.projectId; + if (projectId && this.operationStartTime) { + const attributes = this.getFinalOpAttributes( + projectId, + finalOperationStatus + ); + const totalTime = endTime.getTime() - this.operationStartTime.getTime(); + if (!this.receivedFirstResponse) { + this.receivedFirstResponse = true; + this.metrics.firstResponseLatencies.record(totalTime, attributes); + } + } + } + + /** + * Called when an operation completes (successfully or unsuccessfully). + * Records operation latencies, retry counts, and connectivity error counts. + * @param {OperationInfo} info Information about the completed operation. + */ + onOperationComplete(info: OperationInfo) { + const endTime = this.dateProvider.getDate(); + const projectId = this.projectId; + this.onAttemptComplete(info); + if (projectId && this.operationStartTime) { + const totalTime = endTime.getTime() - this.operationStartTime.getTime(); + { + // This block records operation latency metrics. + const operationLatencyAttributes = this.getOperationLatencyAttributes( + projectId, + info.finalOperationStatus, + info.streamingOperation + ); + this.metrics.operationLatencies.record( + totalTime, + operationLatencyAttributes + ); + } + if (info.retries) { + // This block records the retry count metrics + const retryCountAttributes = this.getFinalOpAttributes( + projectId, + info.finalOperationStatus + ); + this.metrics.retryCount.add(info.retries, retryCountAttributes); + } + if (info.connectivityErrorCount) { + // This block records the connectivity error count metrics + const connectivityCountAttributes = this.getAttemptStatusAttributes( + projectId, + info.finalOperationStatus + ); + this.metrics.connectivityErrorCount.record( + info.connectivityErrorCount, + connectivityCountAttributes + ); + } + } + } + + /** + * Called when metadata is received. Extracts server timing information if available. + * @param {AttemptInfo} info Information about the completed attempt. + * @param {object} metadata The received metadata. + */ + onMetadataReceived( + info: AttemptInfo, + metadata: { + internalRepr: Map; + options: {}; + } + ) { + const mappedEntries = new Map( + Array.from(metadata.internalRepr.entries(), ([key, value]) => [ + key, + value.toString(), + ]) + ); + const durationValues = mappedEntries.get('server-timing')?.split('dur='); + if (durationValues && durationValues[1]) { + if (!this.serverTimeRead) { + this.serverTimeRead = true; + const serverTime = parseInt(durationValues[1]); + const projectId = this.projectId; + if (projectId) { + const attributes = this.getAttemptAttributes( + projectId, + info.finalOperationStatus, + info.streamingOperation + ); + this.metrics.serverLatencies.record(serverTime, attributes); + } + } + } + } + + /** + * Called when status information is received. Extracts zone and cluster information. + * @param {object} status The received status information. + */ + onStatusReceived(status: { + metadata: {internalRepr: Map; options: {}}; + }) { + const mappedEntries = new Map( + Array.from(status.metadata.internalRepr.entries(), ([key, value]) => [ + key, + value.toString(), + ]) + ); + const instanceInformation = mappedEntries + .get('x-goog-ext-425905942-bin') + ?.replace(new RegExp('\\n', 'g'), '') + .split('\r'); + if (instanceInformation && instanceInformation[0]) { + this.zone = instanceInformation[0]; + } + if (instanceInformation && instanceInformation[1]) { + this.cluster = instanceInformation[1]; + } + } +} + +/** + * A factory class for creating MetricsTracer instances. Initializes + * OpenTelemetry metrics instruments. + */ +export class MetricsTracerFactory { + private metrics?: Metrics; + private observabilityOptions?: ObservabilityOptions; + private dateProvider: DateProvider; + + /** + * @param {DateProvider} dateProvider An object that provides dates for latency measurement. + * @param {ObservabilityOptions} observabilityOptions Options for configuring client-side metrics observability. + */ + constructor( + dateProvider: DateProvider, + observabilityOptions?: ObservabilityOptions + ) { + this.observabilityOptions = observabilityOptions; + this.dateProvider = dateProvider; + } + + /** + * Initializes the OpenTelemetry metrics instruments if they haven't been already. + * If metrics already exist, this method returns early. Otherwise, it creates and registers + * metric instruments (histograms and counters) for various Bigtable client metrics. + * It handles the creation of a MeterProvider, either using a user-provided one or creating a default one, and + * configures a PeriodicExportingMetricReader for exporting metrics. + * @param {string} [projectId] The Google Cloud project ID. Used for metric export. + * @param {ObservabilityOptions} [observabilityOptions] Options for configuring client-side metrics observability, including a custom MeterProvider. + * @returns {Metrics} An object containing the initialized OpenTelemetry metric instruments. + */ + private initialize( + projectId?: string, + observabilityOptions?: ObservabilityOptions + ) { + if (this.metrics) { + return this.metrics; + } else { + // Use MeterProvider provided by user + // If MeterProvider was not provided then use the default meter provider. + const meterProvider = + observabilityOptions && observabilityOptions.meterProvider + ? observabilityOptions.meterProvider + : new MeterProvider({ + // This is the default meter provider + // Create a resource. Fill the `service.*` attributes in with real values for your service. + // GcpDetectorSync will add in resource information about the current environment if you are + // running on GCP. These resource attributes will be translated to a specific GCP monitored + // resource if running on GCP. Otherwise, metrics will be sent with monitored resource + // `generic_task`. + resource: new Resources.Resource({ + 'service.name': 'bigtable-metrics', + }).merge(new ResourceUtil.GcpDetectorSync().detect()), + readers: [ + // Register the exporter + new PeriodicExportingMetricReader({ + // Export metrics every 10 seconds. 5 seconds is the smallest sample period allowed by + // Cloud Monitoring. + exportIntervalMillis: 10_000, + exporter: new MetricExporter({ + projectId, + }), + }), + ], + }); + const meter = meterProvider.getMeter('bigtable.googleapis.com'); + this.metrics = { + operationLatencies: meter.createHistogram('operation_latencies', { + description: + "The total end-to-end latency across all RPC attempts associated with a Bigtable operation. This metric measures an operation's round trip from the client to Bigtable and back to the client and includes all retries.", + }), + attemptLatencies: meter.createHistogram('attempt_latencies', { + description: + 'The latencies of a client RPC attempt. Under normal circumstances, this value is identical to operation_latencies. If the client receives transient errors, however, then operation_latencies is the sum of all attempt_latencies and the exponential delays.', + unit: 'ms', + }), + retryCount: meter.createCounter('retry_count', { + description: + 'A counter that records the number of attempts that an operation required to complete. Under normal circumstances, this value is empty.', + }), + applicationBlockingLatencies: meter.createHistogram( + 'application_blocking_latencies', + { + description: + 'The time from when the client receives the response to a request until the application reads the response. This metric is most relevant for ReadRows requests. The start and stop times for this metric depend on the way that you send the read request; see Application blocking latencies timer examples for details.', + unit: 'ms', + } + ), + firstResponseLatencies: meter.createHistogram( + 'first_response_latencies', + { + description: + 'Latencies from when a client sends a request and receives the first row of the response.', + unit: 'ms', + } + ), + serverLatencies: meter.createHistogram('server_latencies', { + description: + 'Latencies between the time when the Google frontend receives an RPC and when it sends the first byte of the response.', + }), + connectivityErrorCount: meter.createHistogram( + 'connectivity_error_count', + { + description: + "The number of requests that failed to reach Google's network. In normal cases, this number is 0. When the number is not 0, it can indicate connectivity issues between the application and the Google network.", + } + ), + clientBlockingLatencies: meter.createHistogram( + 'client_blocking_latencies', + { + description: + 'Latencies introduced when the client blocks the sending of more requests to the server because of too many pending requests in a bulk operation.', + unit: 'ms', + } + ), + }; + return this.metrics; + } + } + + /** + * Creates a new MetricsTracer instance. + * @param tabularApiSurface The Bigtable table being accessed. + * @param methodName The name of the method being traced. + * @param dateProvider An optional DateProvider for testing purposes. + * @param {string} projectId The project id + * @returns A new MetricsTracer instance. + */ + getMetricsTracer( + tabularApiSurface: ITabularApiSurface, + methodName: string, + projectId?: string + ) { + const metrics = this.initialize(projectId, this.observabilityOptions); + return new MetricsTracer( + metrics, + tabularApiSurface, + methodName, + projectId, + this.dateProvider + ); + } +} diff --git a/src/client-side-metrics/observability-options.ts b/src/client-side-metrics/observability-options.ts new file mode 100644 index 000000000..031c169da --- /dev/null +++ b/src/client-side-metrics/observability-options.ts @@ -0,0 +1,81 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import {Attributes} from '../../common/client-side-metrics-attributes'; + +/** + * The Counter interface for recording increments of a metric. + */ +interface ICounter { + /** + * Adds a value to the counter. + * @param {number} retries The value to be added to the counter. + * @param {Attributes} attributes The attributes associated with this value. + */ + add(retries: number, attributes: Attributes): void; +} + +/** + * The Histogram interface for recording distributions of values of a metric. + */ +interface IHistogram { + /** + * Records a value in the histogram. + * @param {number} value The value to be recorded in the histogram. + * @param {Attributes} attributes The attributes associated with this value. + */ + record(value: number, attributes: Attributes): void; +} + +/** + * The Meter interface. Meters are responsible for creating and managing instruments (Counters, Histograms, etc.). + */ +interface IMeter { + /** + * Creates a Counter instrument, which counts increments of a given metric. + * @param {string} instrument The name of the counter instrument. + * @param {Attributes} attributes The attributes associated with this counter. + * @returns {ICounter} A Counter instance. + */ + createCounter(instrument: string, attributes: Attributes): ICounter; + /** + * Creates a Histogram instrument, which records distributions of values for a given metric. + * @param {string} instrument The name of the histogram instrument. + * @param {Attributes} attributes The attributes associated with this histogram. + * @returns {IHistogram} A Histogram instance. + */ + createHistogram(instrument: string, attributes: Attributes): IHistogram; +} + +/** + * The MeterProvider interface. A MeterProvider creates and manages Meters. + */ +interface IMeterProvider { + /** + * Returns a Meter, which can be used to create instruments for recording measurements. + * @param {string} name The name of the Meter. + * @returns {IMeter} A Meter instance. + */ + getMeter(name: string): IMeter; +} + +/** + * Options for configuring client-side metrics observability. Allows users to provide their own MeterProvider. + */ +export interface ObservabilityOptions { + /** + * The MeterProvider to use for recording metrics. If not provided, a default MeterProvider will be used. + */ + meterProvider: IMeterProvider; +} diff --git a/test/metrics-tracer/metrics-tracer.ts b/test/metrics-tracer/metrics-tracer.ts new file mode 100644 index 000000000..180ad1bf7 --- /dev/null +++ b/test/metrics-tracer/metrics-tracer.ts @@ -0,0 +1,213 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import {describe} from 'mocha'; +import {MetricsTracerFactory} from '../../src/client-side-metrics/metrics-tracer-factory'; +import {TestMeterProvider} from '../../common/test-meter-provider'; +import {TestDateProvider} from '../../common/test-date-provider'; +import * as assert from 'assert'; +import * as fs from 'fs'; +import {ObservabilityOptions} from '../../src/client-side-metrics/observability-options'; + +/** + * A basic logger class that stores log messages in an array. Useful for testing. + */ +class Logger { + private messages: string[] = []; + + /** + * Logs a message by adding it to the internal message array. + * @param {string} message The message to be logged. + */ + log(message: string) { + this.messages.push(message); + } + + /** + * Retrieves all logged messages. + * @returns {string[]} An array of logged messages. + */ + getMessages() { + return this.messages; + } +} + +/** + * A fake implementation of the Bigtable client for testing purposes. Provides a + * metricsTracerFactory and a stubbed getProjectId_ method. + */ +class FakeBigtable { + appProfileId?: string; + metricsTracerFactory: MetricsTracerFactory; + /** + * @param {ObservabilityOptions} observabilityOptions Options for configuring client-side metrics + * observability, including a TestMeterProvider. + */ + constructor( + observabilityOptions: ObservabilityOptions, + dateProvider: TestDateProvider + ) { + this.metricsTracerFactory = new MetricsTracerFactory(dateProvider, { + meterProvider: observabilityOptions.meterProvider, + }); + } + + /** + * A stubbed method that simulates retrieving the project ID. Always returns + * 'my-project'. + * @param {function} callback A callback function that receives the project ID (or an error). + */ + getProjectId_( + callback: (err: Error | null, projectId?: string) => void + ): void { + callback(null, 'my-project'); + } +} + +/** + * A fake implementation of a Bigtable instance for testing purposes. Provides only an ID. + */ +class FakeInstance { + /** + * The ID of the fake instance. + */ + id = 'fakeInstanceId'; +} + +describe('Bigtable/MetricsTracer', () => { + it('should record the right metrics with a typical method call', async () => { + const logger = new Logger(); + class FakeTable { + id = 'fakeTableId'; + instance = new FakeInstance(); + bigtable = new FakeBigtable( + { + meterProvider: new TestMeterProvider(logger), + }, + new TestDateProvider(logger) + ); + + async fakeMethod(): Promise { + return new Promise((resolve, reject) => { + this.bigtable.getProjectId_((err, projectId) => { + const standardAttemptInfo = { + finalOperationStatus: 'PENDING', + streamingOperation: 'YES', + }; + + function createMetadata(duration: string) { + return { + internalRepr: new Map([ + ['server-timing', Buffer.from(`gfet4t7; dur=${duration}`)], + ]), + options: {}, + }; + } + + const status = { + metadata: { + internalRepr: new Map([ + [ + 'x-goog-ext-425905942-bin', + Buffer.from('\n\nus-west1-c \rfake-cluster3'), + ], + ]), + options: {}, + }, + }; + const metricsTracer = + this.bigtable.metricsTracerFactory.getMetricsTracer( + this, + 'fakeMethod', + projectId + ); + // In this method we simulate a series of events that might happen + // when a user calls one of the Table methods. + // Here is an example of what might happen in a method call: + logger.log('1. The operation starts'); + metricsTracer.onOperationStart(); + logger.log('2. The attempt starts.'); + metricsTracer.onAttemptStart(); + logger.log('3. Client receives status information.'); + metricsTracer.onStatusReceived(status); + logger.log('4. Client receives metadata.'); + metricsTracer.onMetadataReceived( + standardAttemptInfo, + createMetadata('101') + ); + logger.log('5. Client receives first row.'); + metricsTracer.onResponse('PENDING'); + logger.log('6. Client receives metadata.'); + metricsTracer.onMetadataReceived( + standardAttemptInfo, + createMetadata('102') + ); + logger.log('7. Client receives second row.'); + metricsTracer.onResponse('PENDING'); + logger.log('8. A transient error occurs.'); + metricsTracer.onAttemptComplete({ + finalOperationStatus: 'ERROR', + streamingOperation: 'YES', + }); + logger.log('9. After a timeout, the second attempt is made.'); + metricsTracer.onAttemptStart(); + logger.log('10. Client receives status information.'); + metricsTracer.onStatusReceived(status); + logger.log('11. Client receives metadata.'); + metricsTracer.onMetadataReceived( + standardAttemptInfo, + createMetadata('103') + ); + logger.log('12. Client receives third row.'); + metricsTracer.onResponse('PENDING'); + logger.log('13. Client receives metadata.'); + metricsTracer.onMetadataReceived( + {finalOperationStatus: 'PENDING', streamingOperation: 'YES'}, + createMetadata('104') + ); + logger.log('14. Client receives fourth row.'); + metricsTracer.onResponse('PENDING'); + logger.log('15. User reads row 1'); + metricsTracer.onRead(); + logger.log('16. User reads row 2'); + metricsTracer.onRead(); + logger.log('17. User reads row 3'); + metricsTracer.onRead(); + logger.log('18. User reads row 4'); + metricsTracer.onRead(); + logger.log('19. Stream ends, operation completes'); + metricsTracer.onOperationComplete({ + retries: 1, + finalOperationStatus: 'SUCCESS', + connectivityErrorCount: 1, + streamingOperation: 'YES', + }); + resolve(); + }); + }); + } + } + const table = new FakeTable(); + await table.fakeMethod(); + const expectedOutput = fs.readFileSync( + './test/metrics-tracer/typical-method-call.txt', + 'utf8' + ); + // Ensure events occurred in the right order here: + assert.strictEqual( + logger.getMessages().join('\n') + '\n', + expectedOutput.replace(/\r/g, '') + ); + }); +}); diff --git a/test/metrics-tracer/typical-method-call.txt b/test/metrics-tracer/typical-method-call.txt new file mode 100644 index 000000000..4fa4fb6f4 --- /dev/null +++ b/test/metrics-tracer/typical-method-call.txt @@ -0,0 +1,43 @@ +1. The operation starts +getDate call returns 1000 ms +2. The attempt starts. +getDate call returns 2000 ms +3. Client receives status information. +4. Client receives metadata. +Value added to histogram bigtable.googleapis.com:server_latencies = 101 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;PENDING;;YES;nodejs-bigtable +5. Client receives first row. +getDate call returns 3000 ms +Value added to histogram bigtable.googleapis.com:first_response_latencies = 2000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;PENDING;;nodejs-bigtable +6. Client receives metadata. +7. Client receives second row. +getDate call returns 4000 ms +8. A transient error occurs. +getDate call returns 5000 ms +Value added to histogram bigtable.googleapis.com:attempt_latencies = 3000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;ERROR;;YES;nodejs-bigtable +9. After a timeout, the second attempt is made. +getDate call returns 6000 ms +10. Client receives status information. +11. Client receives metadata. +12. Client receives third row. +getDate call returns 7000 ms +13. Client receives metadata. +14. Client receives fourth row. +getDate call returns 8000 ms +15. User reads row 1 +getDate call returns 9000 ms +16. User reads row 2 +getDate call returns 10000 ms +Value added to histogram bigtable.googleapis.com:application_blocking_latencies = 1000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;;;nodejs-bigtable +17. User reads row 3 +getDate call returns 11000 ms +Value added to histogram bigtable.googleapis.com:application_blocking_latencies = 1000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;;;nodejs-bigtable +18. User reads row 4 +getDate call returns 12000 ms +Value added to histogram bigtable.googleapis.com:application_blocking_latencies = 1000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;;;nodejs-bigtable +19. Stream ends, operation completes +getDate call returns 13000 ms +getDate call returns 14000 ms +Value added to histogram bigtable.googleapis.com:attempt_latencies = 8000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;SUCCESS;;YES;nodejs-bigtable +Value added to histogram bigtable.googleapis.com:operation_latencies = 12000 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;SUCCESS;YES;nodejs-bigtable +Value added to counter bigtable.googleapis.com:retry_count = 1 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;;SUCCESS;;nodejs-bigtable +Value added to histogram bigtable.googleapis.com:connectivity_error_count = 1 with attributes my-project;fakeInstanceId;fakeTableId;fake-cluster3;us-west1-c ;;fakeMethod;SUCCESS;;;nodejs-bigtable diff --git a/testproxy/known_failures.txt b/testproxy/known_failures.txt index 979a31f98..81fade6bd 100644 --- a/testproxy/known_failures.txt +++ b/testproxy/known_failures.txt @@ -12,4 +12,9 @@ TestReadRows_Retry_WithRoutingCookie_MultipleErrorResponses\| TestReadRows_Retry_WithRetryInfo\| TestReadRows_Retry_WithRetryInfo_MultipleErrorResponse\| TestSampleRowKeys_Retry_WithRoutingCookie\| -TestSampleRowKeys_Generic_CloseClient +TestSampleRowKeys_Generic_CloseClient\| +TestSampleRowKeys_Generic_Headers\| +TestSampleRowKeys_Retry_WithRetryInfo\| +TestSampleRowKeys_NoRetry_NoEmptyKey\| +TestExecuteQuery_EmptyResponse\| +TestExecuteQuery_SingleSimpleRow