From e039e3348bd473bb6fb79ceca9c869608d4d499c Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Mon, 23 Dec 2024 10:30:52 +0000
Subject: [PATCH 1/7] process time

---
 .../mediapipegraphexecutor.hpp                | 19 ++++++++
 src/metric_config.cpp                         |  1 +
 src/metric_config.hpp                         |  2 +
 src/model_metric_reporter.cpp                 | 47 +++++++++++++++++++
 src/model_metric_reporter.hpp                 | 30 ++++++++++++
 src/test/metrics_flow_test.cpp                | 12 +++++
 6 files changed, 111 insertions(+)

diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index b52bc07037..def898599e 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -27,6 +27,7 @@
 #include "../model_metric_reporter.hpp"
 #include "../profiler.hpp"
 #include "../status.hpp"
+#include "../timer.hpp"
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "mediapipe/framework/calculator_graph.h"
@@ -104,6 +105,12 @@ class MediapipeGraphExecutor {
         MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get());
         ::mediapipe::CalculatorGraph graph;
         MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
+        enum : unsigned int {
+            PROCESS,
+            TIMER_END2
+        };
+        Timer<TIMER_END2> timer;
+        timer.start(PROCESS);
         std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
         for (auto& name : this->outputNames) {
             if (name.empty()) {
@@ -196,6 +203,9 @@ class MediapipeGraphExecutor {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
+        timer.stop(PROCESS);
+        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
@@ -218,6 +228,12 @@ class MediapipeGraphExecutor {
                 // Init
                 MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
             }
+            enum : unsigned int {
+                PROCESS,
+                TIMER_END2
+            };
+            Timer<TIMER_END2> timer;
+            timer.start(PROCESS);
             {
                 OVMS_PROFILE_SCOPE("Mediapipe graph installing packet observers");
                 // Installing observers
@@ -334,6 +350,9 @@ class MediapipeGraphExecutor {
                 MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
                 SPDLOG_DEBUG("Graph {}: Done execution", this->name);
             }
+            timer.stop(PROCESS);
+            double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+            OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
             return StatusCode::OK;
         } catch (...) {
             SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph", this->name);
diff --git a/src/metric_config.cpp b/src/metric_config.cpp
index b40f2db5ab..7fe8830e48 100644
--- a/src/metric_config.cpp
+++ b/src/metric_config.cpp
@@ -59,6 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted";
 const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected";
 
 const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error";
+const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time";
 
 bool MetricConfig::validateEndpointPath(const std::string& endpoint) {
     std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$");
diff --git a/src/metric_config.hpp b/src/metric_config.hpp
index b31c369c85..9c589300ec 100644
--- a/src/metric_config.hpp
+++ b/src/metric_config.hpp
@@ -45,6 +45,7 @@ extern const std::string METRIC_NAME_REQUESTS_ACCEPTED;
 extern const std::string METRIC_NAME_REQUESTS_REJECTED;
 
 extern const std::string METRIC_NAME_GRAPH_ERROR;
+extern const std::string METRIC_NAME_PROCESSING_TIME;
 
 class Status;
 /**
@@ -98,6 +99,7 @@ class MetricConfig {
         {METRIC_NAME_REQUESTS_ACCEPTED},
         {METRIC_NAME_REQUESTS_REJECTED},
         {METRIC_NAME_GRAPH_ERROR},
+        {METRIC_NAME_PROCESSING_TIME},
         {METRIC_NAME_RESPONSES}};
 };
 }  // namespace ovms
diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp
index 8792c0ec3c..34bf93f63d 100644
--- a/src/model_metric_reporter.cpp
+++ b/src/model_metric_reporter.cpp
@@ -333,6 +333,10 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
         return;
     }
 
+    for (int i = 0; i < NUMBER_OF_BUCKETS; i++) {
+        this->buckets.emplace_back(floor(BUCKET_MULTIPLIER * pow(BUCKET_POWER_BASE, i)));
+    }
+
     auto familyName = METRIC_NAME_CURRENT_GRAPHS;
     if (metricConfig->isFamilyEnabled(familyName)) {
         auto family = registry->createFamily<MetricGauge>(familyName,
@@ -552,6 +556,49 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
             {"interface", "REST"}});
         THROW_IF_NULL(this->requestSuccessRestModelReady, "cannot create metric");
     }
+    familyName = METRIC_NAME_PROCESSING_TIME;
+    if (metricConfig->isFamilyEnabled(familyName)) {
+        auto family = registry->createFamily<MetricHistogram>(familyName,
+            "Time packet spent in the MediaPipe graph.");
+        THROW_IF_NULL(family, "cannot create family");
+
+        // KFS
+        this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName},
+                                                                   {"api", "KServe"},
+                                                                   {"method", "ModelInfer"},
+                                                                   {"interface", "gRPC"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
+
+        this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName},
+                                                                         {"api", "KServe"},
+                                                                         {"method", "ModelInferStream"},
+                                                                         {"interface", "gRPC"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
+
+        this->processingTimeRestModelInfer = family->addMetric({{"name", graphName},
+                                                                   {"api", "KServe"},
+                                                                   {"method", "ModelInfer"},
+                                                                   {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric");
+
+        // V3
+        this->processingTimeRestV3Unary = family->addMetric({{"name", graphName},
+                                                                {"api", "V3"},
+                                                                {"method", "Unary"},
+                                                                {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric");
+
+        this->processingTimeRestV3Stream = family->addMetric({{"name", graphName},
+                                                                 {"api", "V3"},
+                                                                 {"method", "Stream"},
+                                                                 {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric");
+    }
 }
 
 }  // namespace ovms
diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp
index 1a2ffb3461..d820af7078 100644
--- a/src/model_metric_reporter.hpp
+++ b/src/model_metric_reporter.hpp
@@ -156,6 +156,9 @@ class ModelMetricReporter : public ServableMetricReporter {
 class MediapipeServableMetricReporter {
     MetricRegistry* registry;
 
+protected:
+    std::vector<double> buckets;
+
 public:
     std::unique_ptr<MetricGauge> currentGraphs;
 
@@ -205,6 +208,33 @@ class MediapipeServableMetricReporter {
     std::unique_ptr<MetricCounter> requestErrorRestV3Unary;
     std::unique_ptr<MetricCounter> requestErrorRestV3Stream;
 
+    std::unique_ptr<MetricHistogram> processingTimeGrpcModelInfer;
+    std::unique_ptr<MetricHistogram> processingTimeGrpcModelInferStream;
+    std::unique_ptr<MetricHistogram> processingTimeRestModelInfer;
+    std::unique_ptr<MetricHistogram> processingTimeRestV3Unary;
+    std::unique_ptr<MetricHistogram> processingTimeRestV3Stream;
+
+    inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) {
+        if (context.interface == ExecutionContext::Interface::GRPC) {
+            if (context.method == ExecutionContext::Method::ModelInfer)
+                return this->processingTimeGrpcModelInfer.get();
+            if (context.method == ExecutionContext::Method::ModelInferStream)
+                return this->processingTimeGrpcModelInferStream.get();
+            return nullptr;
+        } else if (context.interface == ExecutionContext::Interface::REST) {
+            if (context.method == ExecutionContext::Method::ModelInfer)
+                return this->processingTimeRestModelInfer.get();
+            if (context.method == ExecutionContext::Method::V3Unary)
+                return this->processingTimeRestV3Unary.get();
+            if (context.method == ExecutionContext::Method::V3Stream)
+                return this->processingTimeRestV3Stream.get();
+            return nullptr;
+        } else {
+            return nullptr;
+        }
+        return nullptr;
+    }
+
     inline MetricCounter* getRequestsMetric(const ExecutionContext& context, bool success = true) {
         if (context.interface == ExecutionContext::Interface::GRPC) {
             if (context.method == ExecutionContext::Method::ModelInfer)
diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp
index 82f0e6000b..64447e8e2f 100644
--- a/src/test/metrics_flow_test.cpp
+++ b/src/test/metrics_flow_test.cpp
@@ -398,6 +398,9 @@ TEST_F(MetricFlowTest, GrpcModelInfer) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "gRPC", "ModelInfer", "KServe", numberOfRejectedRequests);
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests)));
@@ -717,6 +720,9 @@ TEST_F(MetricFlowTest, RestModelInfer) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "REST", "ModelInfer", "KServe", numberOfRejectedRequests);
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0)));
@@ -838,6 +844,8 @@ TEST_F(MetricFlowTest, RestV3Unary) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_ACCEPTED, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests);
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 }
 #endif
 
@@ -890,6 +898,9 @@ TEST_F(MetricFlowTest, RestV3Stream) {
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Stream", "V3", numberOfRejectedRequests);
     const int numberOfMockedChunksPerRequest = 9;  // Defined in openai_chat_completions_mock_calculator.cpp
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+
     SPDLOG_ERROR(server.collect());
 }
 #endif
@@ -996,6 +1007,7 @@ std::string MetricFlowTest::prepareConfigContent() {
                          R"(",")" + METRIC_NAME_REQUESTS_REJECTED +
                          R"(",")" + METRIC_NAME_RESPONSES +
                          R"(",")" + METRIC_NAME_GRAPH_ERROR +
+                         R"(",")" + METRIC_NAME_PROCESSING_TIME +
                          R"("]
             }
         },

From 1672215e22a4d61870612d7ccf05e8b1744fd8d4 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Mon, 23 Dec 2024 11:20:14 +0000
Subject: [PATCH 2/7] review fix

---
 src/metric_config.cpp          |  2 +-
 src/model_metric_reporter.cpp  | 23 ++++-------------------
 src/model_metric_reporter.hpp  | 25 ++++++++-----------------
 src/test/metrics_flow_test.cpp | 10 ++++------
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/src/metric_config.cpp b/src/metric_config.cpp
index 7fe8830e48..200fded329 100644
--- a/src/metric_config.cpp
+++ b/src/metric_config.cpp
@@ -59,7 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted";
 const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected";
 
 const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error";
-const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time";
+const std::string METRIC_NAME_PROCESSING_TIME = "ovms_graph_processing_time_us";
 
 bool MetricConfig::validateEndpointPath(const std::string& endpoint) {
     std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$");
diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp
index 34bf93f63d..2b6c34c55a 100644
--- a/src/model_metric_reporter.cpp
+++ b/src/model_metric_reporter.cpp
@@ -564,38 +564,23 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
 
         // KFS
         this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName},
-                                                                   {"api", "KServe"},
-                                                                   {"method", "ModelInfer"},
-                                                                   {"interface", "gRPC"}},
+                                                                   {"method", "ModelInfer"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
 
         this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName},
-                                                                         {"api", "KServe"},
-                                                                         {"method", "ModelInferStream"},
-                                                                         {"interface", "gRPC"}},
+                                                                         {"method", "ModelInferStream"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
 
-        this->processingTimeRestModelInfer = family->addMetric({{"name", graphName},
-                                                                   {"api", "KServe"},
-                                                                   {"method", "ModelInfer"},
-                                                                   {"interface", "REST"}},
-            this->buckets);
-        THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric");
-
         // V3
         this->processingTimeRestV3Unary = family->addMetric({{"name", graphName},
-                                                                {"api", "V3"},
-                                                                {"method", "Unary"},
-                                                                {"interface", "REST"}},
+                                                                {"method", "Unary"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric");
 
         this->processingTimeRestV3Stream = family->addMetric({{"name", graphName},
-                                                                 {"api", "V3"},
-                                                                 {"method", "Stream"},
-                                                                 {"interface", "REST"}},
+                                                                 {"method", "Stream"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric");
     }
diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp
index d820af7078..4146431f32 100644
--- a/src/model_metric_reporter.hpp
+++ b/src/model_metric_reporter.hpp
@@ -215,23 +215,14 @@ class MediapipeServableMetricReporter {
     std::unique_ptr<MetricHistogram> processingTimeRestV3Stream;
 
     inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) {
-        if (context.interface == ExecutionContext::Interface::GRPC) {
-            if (context.method == ExecutionContext::Method::ModelInfer)
-                return this->processingTimeGrpcModelInfer.get();
-            if (context.method == ExecutionContext::Method::ModelInferStream)
-                return this->processingTimeGrpcModelInferStream.get();
-            return nullptr;
-        } else if (context.interface == ExecutionContext::Interface::REST) {
-            if (context.method == ExecutionContext::Method::ModelInfer)
-                return this->processingTimeRestModelInfer.get();
-            if (context.method == ExecutionContext::Method::V3Unary)
-                return this->processingTimeRestV3Unary.get();
-            if (context.method == ExecutionContext::Method::V3Stream)
-                return this->processingTimeRestV3Stream.get();
-            return nullptr;
-        } else {
-            return nullptr;
-        }
+        if (context.method == ExecutionContext::Method::ModelInfer)
+            return this->processingTimeGrpcModelInfer.get();
+        if (context.method == ExecutionContext::Method::ModelInferStream)
+            return this->processingTimeGrpcModelInferStream.get();
+        if (context.method == ExecutionContext::Method::V3Unary)
+            return this->processingTimeRestV3Unary.get();
+        if (context.method == ExecutionContext::Method::V3Stream)
+            return this->processingTimeRestV3Stream.get();
         return nullptr;
     }
 
diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp
index 64447e8e2f..8440c2c725 100644
--- a/src/test/metrics_flow_test.cpp
+++ b/src/test/metrics_flow_test.cpp
@@ -399,8 +399,7 @@ TEST_F(MetricFlowTest, GrpcModelInfer) {
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests)));
@@ -721,8 +720,7 @@ TEST_F(MetricFlowTest, RestModelInfer) {
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0)));
@@ -845,7 +843,7 @@ TEST_F(MetricFlowTest, RestV3Unary) {
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests);
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 }
 #endif
 
@@ -899,7 +897,7 @@ TEST_F(MetricFlowTest, RestV3Stream) {
     const int numberOfMockedChunksPerRequest = 9;  // Defined in openai_chat_completions_mock_calculator.cpp
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 
     SPDLOG_ERROR(server.collect());
 }

From fecc7d2da894656a24c36a483c97cd5060155f63 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Thu, 2 Jan 2025 13:38:55 +0000
Subject: [PATCH 3/7] documentation

---
 docs/metrics.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/metrics.md b/docs/metrics.md
index 99bcc10c5c..8a6e2b3b04 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,6 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 

From d95e3117bfe15ce61992add2e383e29f24e580b9 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Fri, 10 Jan 2025 10:27:47 +0000
Subject: [PATCH 4/7] fix

---
 docs/metrics.md                                   | 2 +-
 src/mediapipe_internal/mediapipegraphexecutor.hpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 8a6e2b3b04..162501be8d 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
-| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index def898599e..e3466dc380 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -203,13 +203,13 @@ class MediapipeGraphExecutor {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
-        timer.stop(PROCESS);
-        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
-        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
         }
+        timer.stop(PROCESS);
+        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;

From 8db5784f949f11879f59a4a9a2e5e7d6449b8be5 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Fri, 10 Jan 2025 10:33:17 +0000
Subject: [PATCH 5/7] fix wording

---
 docs/metrics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 162501be8d..143604c6b8 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
-| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and has been successfully closed. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 

From 17602827a566c1b8735134fb9c0262ab55c1a663 Mon Sep 17 00:00:00 2001
From: Bartosz Strzelecki <bartosz.strzelecki@intel.com>
Date: Tue, 28 Jan 2025 14:34:29 +0100
Subject: [PATCH 6/7] Update docs/metrics.md

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
---
 docs/metrics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 143604c6b8..29eec56bdb 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
-| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and has been successfully closed. |
+| histogram      | ovms_graph_processing_time_us | Tracks duration of successfully stared mediapipe graphs in us. It can represent pipeline processing time for unary calls or the session length for streamed requests |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 

From f8385499c969d98f0af79eb2b247e855a0354541 Mon Sep 17 00:00:00 2001
From: Bartosz Strzelecki <bartosz.strzelecki@intel.com>
Date: Tue, 28 Jan 2025 14:37:34 +0100
Subject: [PATCH 7/7] adjust count

---
 docs/metrics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 29eec56bdb..2c974fb07a 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -232,7 +232,7 @@ It means that each request to the DAG pipeline will update also the metrics for
 
 ## Metrics implementation for MediaPipe Graphs
 
-For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics which apply to all graphs:
+For [MediaPipe Graphs](./mediapipe.md) execution there are 6 generic metrics which apply to all graphs:
 
 | Type      | Name  | Description |
 | :---    |    :----   |    :----   |