diff --git a/src/BUILD b/src/BUILD index d783319fca..ae21bb673a 100644 --- a/src/BUILD +++ b/src/BUILD @@ -1833,6 +1833,7 @@ cc_test( # "test/python/ovms_py_tensor_test.cpp", # LLM logic uses Python for processing Jinja templates "test/llmnode_test.cpp", + "test/max_model_length_test.cpp", "test/llmtemplate_test.cpp", "test/pythonnode_test.cpp", "test/text_streamer_test.cpp", diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 176c1a4588..4828534e1b 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -388,11 +388,11 @@ void OpenAIChatCompletionsHandler::incrementProcessedTokens(int numTokens) { usage.completionTokens += numTokens; } -ov::genai::GenerationConfig OpenAIChatCompletionsHandler::createGenerationConfig() const { +ov::genai::GenerationConfig OpenAIChatCompletionsHandler::createGenerationConfig(std::optional maxModelLength) const { ov::genai::GenerationConfig config = request.createGenerationConfig(); if (maxModelLength.has_value()){ config.max_length = maxModelLength.value(); - SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Max model length {}", maxModelLength.value()); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed max model length {}", maxModelLength.value()); } return config; } @@ -411,33 +411,6 @@ absl::Status OpenAIChatCompletionsHandler::parseRequest(uint32_t maxTokensLimit, return status; } -void OpenAIChatCompletionsHandler::parseMaxModelLength(std::string& modelsPath){ - std::string configPath = modelsPath + "/config.json"; - if (std::filesystem::exists(configPath.c_str())) { - std::ifstream ifs(configPath); - if (!ifs.is_open()) { - return; - } - rapidjson::Document modelConfig; - rapidjson::IStreamWrapper isw(ifs); - rapidjson::ParseResult parseResult = modelConfig.ParseStream(isw); - if(parseResult.Code()){ - return; - } - std::vector maxLengthFields = {"max_position_embeddings", "n_positions", "seq_len", "seq_length", "n_ctx", "sliding_window"}; - for(auto field : maxLengthFields){ - if(modelConfig.HasMember(field.c_str()) && modelConfig[field.c_str()].IsUint()){ - maxModelLength = modelConfig[field.c_str()].GetUint(); - } - } - } - return; -} - -std::optional OpenAIChatCompletionsHandler::getMaxModelLength(){ - return maxModelLength; -} - std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vector& generationOutputs) { OVMS_PROFILE_FUNCTION(); StringBuffer buffer; diff --git a/src/llm/apis/openai_completions.hpp b/src/llm/apis/openai_completions.hpp index f64ccd68e7..3e03431f30 100644 --- a/src/llm/apis/openai_completions.hpp +++ b/src/llm/apis/openai_completions.hpp @@ -22,15 +22,12 @@ #include #include #include -#include #include #include #include #include #include -#include -#include #include "absl/status/status.h" @@ -184,13 +181,10 @@ class OpenAIChatCompletionsHandler { void incrementProcessedTokens(int numTokens = 1); - ov::genai::GenerationConfig createGenerationConfig() const; + ov::genai::GenerationConfig createGenerationConfig(std::optional maxModelLength) const; absl::Status parseRequest(uint32_t maxTokensLimit, uint32_t bestOfLimit); - void parseMaxModelLength(std::string& modelsPath); - std::optional getMaxModelLength(); - std::string serializeUnaryResponse(const std::vector& generationOutputs); std::string serializeStreamingChunk(const std::string& chunkResponse, ov::genai::GenerationFinishReason finishReason); std::string serializeStreamingUsageChunk(); diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index 6cdf87a15e..1b469193c9 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -161,11 +161,10 @@ class HttpLLMCalculator : public CalculatorBase { ov::Tensor finalPromptIds = nodeResources->cbPipe->get_tokenizer().encode(finalPrompt, ov::genai::add_special_tokens(encodeAddSpecialTokens)).input_ids; this->apiHandler->setPromptTokensUsage(finalPromptIds.get_size()); SPDLOG_LOGGER_TRACE(llm_calculator_logger, "{}", getPromptTokensString(finalPromptIds)); - this->apiHandler->parseMaxModelLength(this->nodeResources->modelsPath); this->generationHandle = nodeResources->cbPipe->add_request( currentRequestId++, /*to be removed from API?*/ finalPromptIds, - this->apiHandler->createGenerationConfig()); + this->apiHandler->createGenerationConfig(this->nodeResources->maxModelLength)); this->client->registerDisconnectionCallback([genHandle = this->generationHandle]() { genHandle->drop(); diff --git a/src/llm/llmnoderesources.cpp b/src/llm/llmnoderesources.cpp index dbfab92b28..822742495f 100644 --- a/src/llm/llmnoderesources.cpp +++ b/src/llm/llmnoderesources.cpp @@ -38,6 +38,9 @@ #include "src/llm/llm_calculator.pb.h" #include "src/llm/llm_executor.hpp" #include "src/llm/text_processor.hpp" +#include +#include +#include namespace ovms { @@ -119,6 +122,30 @@ void LLMNodeResources::loadTextProcessor(LLMNodeResources& nodeResources, const } } +std::optional LLMNodeResources::parseMaxModelLength(std::string& modelsPath) { + std::string configPath = modelsPath + "/config.json"; + std::optional maxModelLength; + if (std::filesystem::exists(configPath.c_str())) { + std::ifstream ifs(configPath); + if (!ifs.is_open()) { + return maxModelLength; + } + rapidjson::Document modelConfig; + rapidjson::IStreamWrapper isw(ifs); + rapidjson::ParseResult parseResult = modelConfig.ParseStream(isw); + if(parseResult.Code()){ + return maxModelLength; + } + std::vector maxLengthFields = {"max_position_embeddings", "n_positions", "seq_len", "seq_length", "n_ctx", "sliding_window"}; + for(auto field : maxLengthFields){ + if(modelConfig.HasMember(field.c_str()) && modelConfig[field.c_str()].IsUint()){ + maxModelLength = modelConfig[field.c_str()].GetUint(); + } + } + } + return maxModelLength; +} + Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResources, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath) { mediapipe::LLMCalculatorOptions nodeOptions; graphNodeConfig.node_options(0).UnpackTo(&nodeOptions); @@ -144,6 +171,7 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node models_path: {} is not a directory. ", basePath); return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST; } + nodeResources.maxModelLength = parseMaxModelLength(basePath); nodeResources.schedulerConfig = { .max_num_batched_tokens = nodeOptions.max_num_batched_tokens(), diff --git a/src/llm/llmnoderesources.hpp b/src/llm/llmnoderesources.hpp index debe5942e1..134a1356b7 100644 --- a/src/llm/llmnoderesources.hpp +++ b/src/llm/llmnoderesources.hpp @@ -112,9 +112,11 @@ struct LLMNodeResources { TextProcessor textProcessor; int maxTokensLimit; int bestOfLimit; + std::optional maxModelLength; static Status initializeLLMNodeResources(LLMNodeResources& nodeResources, const ::mediapipe::CalculatorGraphConfig::Node& graphNode, std::string graphPath); static void loadTextProcessor(LLMNodeResources& nodeResources, const std::string& chatTemplateDirectory); + static std::optional parseMaxModelLength(std::string& modelsPath); LLMNodeResources(const LLMNodeResources&) = delete; LLMNodeResources& operator=(LLMNodeResources&) = delete; diff --git a/src/test/max_model_length_test.cpp b/src/test/max_model_length_test.cpp new file mode 100644 index 0000000000..ef3bd3ce04 --- /dev/null +++ b/src/test/max_model_length_test.cpp @@ -0,0 +1,142 @@ +//***************************************************************************** +// Copyright 2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include + +#include + +#include "../llm/llmnoderesources.hpp" + +#include "test_utils.hpp" + +using namespace ovms; + +class MaxModelLengthTest : public TestWithTempDir { +protected: + std::string configFilePath; + rapidjson::Document doc; + ov::genai::Tokenizer dummyTokenizer; + + void SetUp() { + TestWithTempDir::SetUp(); + configFilePath = directoryPath + "/config.json"; + } +}; + +TEST_F(MaxModelLengthTest, maxModelLength_MaxPositionEmbeddings_VALID) { + std::string modelConfigContent = R"({"max_position_embeddings" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_MaxPositionEmbeddings_INVALID) { + std::string modelConfigContent = R"({"max_position_embeddings" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_nPositions_VALID) { + std::string modelConfigContent = R"({"n_positions" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_nPositions_INVALID) { + std::string modelConfigContent = R"({"n_positions" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_seqLen_VALID) { + std::string modelConfigContent = R"({"seq_len" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_seqLen_INVALID) { + std::string modelConfigContent = R"({"seq_len" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_seqLength_VALID) { + std::string modelConfigContent = R"({"seq_length" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_seqLength_INVALID) { + std::string modelConfigContent = R"({"seq_length" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_nCtx_VALID) { + std::string modelConfigContent = R"({"n_ctx" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_nCtx_INVALID) { + std::string modelConfigContent = R"({"n_ctx" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_slidingWindow_VALID) { + std::string modelConfigContent = R"({"sliding_window" : 5})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(),5); +} + +TEST_F(MaxModelLengthTest, maxModelLength_slidingWindow_INVALID) { + std::string modelConfigContent = R"({"sliding_window" : "INVALID"})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_emptyConfig) { + std::string modelConfigContent = R"({})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + EXPECT_FALSE(maxModelLength.has_value()); +} + +TEST_F(MaxModelLengthTest, maxModelLength_parsingOrder) { + std::string modelConfigContent = R"({"max_position_embeddings" : 5, "seq_length" : 6, "n_positions" : 7, "sliding_window" : 8, "seq_len" : 9, "n_ctx" : 10})"; + createConfigFileWithContent(modelConfigContent, configFilePath); + auto maxModelLength = LLMNodeResources::parseMaxModelLength(directoryPath); + ASSERT_TRUE(maxModelLength.has_value()); + EXPECT_EQ(maxModelLength.value(), 8); +} \ No newline at end of file