From b7b57e72e0f4af1f4d0a07e240485c0fe7342daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=C5=82osz=20=C5=BBeglarski?= Date: Thu, 21 Nov 2024 12:08:17 +0100 Subject: [PATCH] Updates for 2025.0 (#2807) --- Dockerfile.redhat | 14 ++--- Dockerfile.ubuntu | 10 ++-- Makefile | 14 ++--- create_package.sh | 4 +- demos/common/export_models/requirements.txt | 4 +- demos/embeddings/README.md | 12 ----- .../download_model_requirements.txt | 2 +- demos/rerank/README.md | 12 ----- docs/developer_guide.md | 8 +-- external/cb.patch | 28 +++++++--- .../SampleCpuExtension/Dockerfile.redhat | 2 +- .../SampleCpuExtension/Dockerfile.ubuntu | 2 +- src/example/SampleCpuExtension/Makefile | 2 +- src/llm/apis/openai_completions.cpp | 8 +-- src/llm/http_llm_calculator.cc | 4 -- src/llm/llm_calculator.proto | 17 +++--- src/llm/llmnoderesources.cpp | 5 -- src/test/llmnode_test.cpp | 4 -- tests/file_lists/lib_files.txt | 52 +++++++++--------- tests/file_lists/lib_files_python.txt | 54 +++++++++---------- third_party/llm_engine/llm_engine.bzl | 6 +-- 21 files changed, 116 insertions(+), 148 deletions(-) diff --git a/Dockerfile.redhat b/Dockerfile.redhat index ac190607d6..487738adba 100644 --- a/Dockerfile.redhat +++ b/Dockerfile.redhat @@ -199,7 +199,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \ ln -s /usr/local/runtime /opt/intel/openvino && \ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ - ln -s /opt/intel/openvino /opt/intel/openvino_2024 + ln -s /opt/intel/openvino /opt/intel/openvino_2025 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/Release/python /opt/intel/openvino/ RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python ################## END OF OPENVINO SOURCE BUILD ###################### @@ -212,7 +212,7 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 # update oneTBB RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \ @@ -300,13 +300,13 @@ WORKDIR /ovms/src/example/SampleCpuExtension/ RUN make RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \ - mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \ - echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA + mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \ + echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding WORKDIR /ovms -ARG PROJECT_VERSION="2024.5" +ARG PROJECT_VERSION="2025.0" ARG PROJECT_NAME="OpenVINO Model Server" LABEL description=${PROJECT_NAME} ARG minitrace_flags @@ -398,8 +398,8 @@ RUN if [ -f /ovms_release/lib/libovms_shared.so ] ; then mv /ovms_release/lib/li FROM $RELEASE_BASE_IMAGE as release LABEL "name"="OVMS" LABEL "vendor"="Intel Corporation" -LABEL "version"="2024.5" -LABEL "release"="2024" +LABEL "version"="2025.0" +LABEL "release"="2025" LABEL "summary"="OpenVINO(TM) Model Server" LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models" ARG INSTALL_RPMS_FROM_URL= diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu index b9069ea10d..f37599f40b 100644 --- a/Dockerfile.ubuntu +++ b/Dockerfile.ubuntu @@ -167,7 +167,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ mkdir -p /opt/intel/openvino && \ ln -s /usr/local/runtime /opt/intel/openvino && \ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ - ln -s /opt/intel/openvino /opt/intel/openvino_2024 + ln -s /opt/intel/openvino /opt/intel/openvino_2025 RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; mkdir -p /opt/intel/openvino && cp -r /openvino/bin/intel64/${CMAKE_BUILD_TYPE}/python /opt/intel/openvino/ RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; cp -r /openvino/tools/ovc/* /opt/intel/openvino/python @@ -184,7 +184,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 # update oneTBB RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.13.0/oneapi-tbb-2021.13.0-lin.tgz && \ @@ -305,12 +305,12 @@ WORKDIR /ovms/src/example/SampleCpuExtension/ RUN make && cp libcustom_relu_cpu_extension.so /opt RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \ - mkdir -p /opt/intel/openvino/python/openvino-2024.5.dist-info && \ - echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2024.5' > /opt/intel/openvino/python/openvino-2024.5.dist-info/METADATA + mkdir -p /opt/intel/openvino/python/openvino-2025.0.dist-info && \ + echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.0' > /opt/intel/openvino/python/openvino-2025.0.dist-info/METADATA WORKDIR /ovms -ARG PROJECT_VERSION="2024.5" +ARG PROJECT_VERSION="2025.0" ARG PROJECT_NAME="OpenVINO Model Server" LABEL description=${PROJECT_NAME} ARG minitrace_flags diff --git a/Makefile b/Makefile index d5bbf6553e..b14acdd158 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ FUZZER_BUILD ?= 0 # NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand: # - uncomment source build section, comment binary section # - adjust binary version path - version variable is not passed to WORKSPACE file! -OV_SOURCE_BRANCH ?= db64e5c66a9fdede7ecb8473b399ac94210f5136 # releases/2024/5 2024-11-09 -OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # releases/2024/5 2024-10-31 -OV_TOKENIZERS_BRANCH ?= 92bec551bdb1eed94aa375415c82b4068520c7b9 # releases/2024/5 2024-11-04 +OV_SOURCE_BRANCH ?= 6733cc320915ca6bfad9036940bf5ca244b41a8b # master / 2024-11-18 +OV_CONTRIB_BRANCH ?= c39462ca8d7c550266dc70cdbfbe4fc8c5be0677 # master / 2024-10-31 +OV_TOKENIZERS_BRANCH ?= 5ccd56de5a7da65ad7ea7c5c42cec55be97df312 # master / 2024-11-14 OV_SOURCE_ORG ?= openvinotoolkit OV_CONTRIB_ORG ?= openvinotoolkit @@ -166,11 +166,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu) ifeq ($(BASE_OS_TAG),20.04) OS=ubuntu20 INSTALL_DRIVER_VERSION ?= "22.43.24595" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2025.0.0.17377.6733cc32091_x86_64.tgz else ifeq ($(BASE_OS_TAG),22.04) OS=ubuntu22 INSTALL_DRIVER_VERSION ?= "24.26.30049" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2025.0.0.17377.6733cc32091_x86_64.tgz endif endif ifeq ($(BASE_OS),redhat) @@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat) endif DIST_OS=redhat INSTALL_DRIVER_VERSION ?= "23.22.26516" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz + DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2025.0.0.17377.6733cc32091_x86_64.tgz endif OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server @@ -201,7 +201,7 @@ endif OVMS_PYTHON_IMAGE_TAG ?= py PRODUCT_NAME = "OpenVINO Model Server" -PRODUCT_VERSION ?= "2024.5" +PRODUCT_VERSION ?= "2025.0" PROJECT_VER_PATCH = $(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`) diff --git a/create_package.sh b/create_package.sh index b957e6f973..206e6d9459 100755 --- a/create_package.sh +++ b/create_package.sh @@ -14,7 +14,7 @@ # limitations under the License. # -# This script should be used inside the build image to create a binary package based on the compiled artefacts +# This script should be used inside the build image to create a binary package based on the compiled artifacts env mkdir -vp /ovms_release/bin @@ -28,7 +28,7 @@ find /ovms/bazel-out/k8-*/bin -iname '*.so*' ! -type d ! -name "libgtest.so" ! - mv /ovms_release/lib/libcustom_node* /ovms_release/lib/custom_nodes/ cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5 cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10 -if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2450 ; ln -s libopenvino_genai.so.2024.5.0.0 libopenvino_genai.so.2450 ; fi +if [ -f /ovms_release/lib/libopenvino_genai.so ]; then cd /ovms_release/lib/ ; rm -f libopenvino_genai.so.* ; ln -s libopenvino_genai.so libopenvino_genai.so.2500 ; ln -s libopenvino_genai.so.2025.0.0.0 libopenvino_genai.so.2500 ; fi rm -f /ovms_release/lib/libssl.so rm -f /ovms_release/lib/libsampleloader* diff --git a/demos/common/export_models/requirements.txt b/demos/common/export_models/requirements.txt index ce9e5221ec..9c704e9a89 100644 --- a/demos/common/export_models/requirements.txt +++ b/demos/common/export_models/requirements.txt @@ -2,8 +2,8 @@ --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly" --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git -openvino-tokenizers[transformers]==2024.5.* -openvino==2024.5.* +openvino-tokenizers[transformers]==2025.0.* +openvino==2025.0.* nncf>=2.11.0 sentence_transformers==3.1.1 openai diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md index 16341ead6f..9479e1f697 100644 --- a/demos/embeddings/README.md +++ b/demos/embeddings/README.md @@ -2,18 +2,6 @@ This demo shows how to deploy embeddings models in the OpenVINO Model Server for text feature extractions. Text generation use case is exposed via OpenAI API `embeddings` endpoint. -## Get the docker image - -Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5. -```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make release_image GPU=1 -``` -It will create an image called `openvino/model_server:latest`. -> **Note:** This operation might take 40min or more depending on your build host. -> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device. - ## Model preparation > **Note** Python 3.9 or higher is needed for that step > diff --git a/demos/python_demos/clip_image_classification/download_model_requirements.txt b/demos/python_demos/clip_image_classification/download_model_requirements.txt index 4b647a6501..60d5608cb1 100644 --- a/demos/python_demos/clip_image_classification/download_model_requirements.txt +++ b/demos/python_demos/clip_image_classification/download_model_requirements.txt @@ -1,7 +1,7 @@ --extra-index-url "https://download.pytorch.org/whl/cpu" --extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly" --pre -openvino==2024.5.* +openvino==2025.0.* numpy<2.0 transformers==4.40.2 pillow==10.3.0 diff --git a/demos/rerank/README.md b/demos/rerank/README.md index 1d76a35980..a28e836ac1 100644 --- a/demos/rerank/README.md +++ b/demos/rerank/README.md @@ -1,17 +1,5 @@ # How to serve Rerank models via Cohere API {#ovms_demos_rerank} -## Get the docker image - -Build the image from source to try this new feature. It will be included in the public image in the coming version 2024.5. -```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make release_image GPU=1 -``` -It will create an image called `openvino/model_server:latest`. -> **Note:** This operation might take 40min or more depending on your build host. -> **Note:** `GPU` parameter in image build command is needed to include dependencies for GPU device. - ## Model preparation > **Note** Python 3.9 or higher is needed for that step Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized. diff --git a/docs/developer_guide.md b/docs/developer_guide.md index fdd6576db3..8a8bdbf2ec 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -61,15 +61,15 @@ In-case of problems, see [Debugging](#debugging). docker run -it -v ${PWD}:/ovms --entrypoint bash -p 9178:9178 openvino/model_server-build:latest ``` -4. In the docker container context compile the source code via : +4. In the docker container context compile the source code via (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel build --config=mp_on_py_on //src:ovms + bazel build --//:distro=ubuntu --config=mp_on_py_on //src:ovms > **NOTE**: There are several options that would disable specific parts of OVMS. For details check ovms bazel build files. ``` -5. From the container, run a single unit test : +5. From the container, run a single unit test (choose distro `ubuntu` or `redhat` depending on the image type): ```bash - bazel test --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test + bazel test --//:distro=ubuntu --config=mp_on_py_on --test_summary=detailed --test_output=all --test_filter='ModelVersionStatus.*' //src:ovms_test ``` | Argument | Description | diff --git a/external/cb.patch b/external/cb.patch index 110765cc09..f33322a49c 100644 --- a/external/cb.patch +++ b/external/cb.patch @@ -1,17 +1,29 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index b08debb..4171092 100644 +index da39b5c..d3fa72b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -62,9 +62,9 @@ endif() +@@ -81,15 +81,15 @@ endif() add_subdirectory(thirdparty) add_subdirectory(src) --add_subdirectory(samples) --add_subdirectory(tools/continuous_batching) --add_subdirectory(tests/cpp) -+#add_subdirectory(samples) -+#add_subdirectory(tools/continuous_batching) -+#add_subdirectory(tests/cpp) +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples") +- add_subdirectory(samples) +-endif() +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching") +- add_subdirectory(tools/continuous_batching) +-endif() +-if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp") +- add_subdirectory(tests/cpp) +-endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples") ++# add_subdirectory(samples) ++#endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching") ++# add_subdirectory(tools/continuous_batching) ++#endif() ++#if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp") ++# add_subdirectory(tests/cpp) ++#endif() install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) diff --git a/src/example/SampleCpuExtension/Dockerfile.redhat b/src/example/SampleCpuExtension/Dockerfile.redhat index f14a16a97d..5eecb5c4d7 100644 --- a/src/example/SampleCpuExtension/Dockerfile.redhat +++ b/src/example/SampleCpuExtension/Dockerfile.redhat @@ -31,7 +31,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2025 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Dockerfile.ubuntu b/src/example/SampleCpuExtension/Dockerfile.ubuntu index 6de6822438..b6e480e85e 100644 --- a/src/example/SampleCpuExtension/Dockerfile.ubuntu +++ b/src/example/SampleCpuExtension/Dockerfile.ubuntu @@ -32,7 +32,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2024 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2025 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Makefile b/src/example/SampleCpuExtension/Makefile index bb09ff7218..93cd64f109 100644 --- a/src/example/SampleCpuExtension/Makefile +++ b/src/example/SampleCpuExtension/Makefile @@ -14,7 +14,7 @@ # limitations under the License. # -OPENVINO_PATH ?= /opt/intel/openvino_2024 +OPENVINO_PATH ?= /opt/intel/openvino_2025 all: $(eval SHELL:=/bin/bash) /usr/bin/g++ --version diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 446fb4dc7b..5b92d5ed87 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -416,13 +416,9 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect // choices: array of size N, where N is related to n request parameter writer.String("choices"); writer.StartArray(); // [ - int i = 0; - int n = request.numReturnSequences.value_or(1); + int index = 0; usage.completionTokens = 0; for (const ov::genai::GenerationOutput& generationOutput : generationOutputs) { - if (i >= n) - break; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", generationOutput.generated_ids); usage.completionTokens += generationOutput.generated_ids.size(); if (request.echo) @@ -445,7 +441,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect } // index: integer; Choice index, only n=1 supported anyway writer.String("index"); - writer.Int(i++); + writer.Int(index++); // logprobs: object/null; Log probability information for the choice. TODO writer.String("logprobs"); if (this->request.logprobschat || this->request.logprobs > 0) { diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index 0e144385ff..ece7359090 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -189,10 +189,6 @@ class HttpLLMCalculator : public CalculatorBase { return absl::CancelledError(); } RET_CHECK(generationOutputs.size() >= 1); - std::sort(generationOutputs.begin(), generationOutputs.end(), [](ov::genai::GenerationOutput& r1, ov::genai::GenerationOutput& r2) { - return r1.score > r2.score; - }); - std::string response = this->apiHandler->serializeUnaryResponse(generationOutputs); SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Complete unary response: {}", response); cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp); diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto index e6d3b8c01c..43c7c031fb 100644 --- a/src/llm/llm_calculator.proto +++ b/src/llm/llm_calculator.proto @@ -32,20 +32,17 @@ message LLMCalculatorOptions { // model memory cache to allocate in GB optional uint64 cache_size = 3 [default = 8]; - // DEPRECATED - this option is not effective starting with 2024.5 and will be removed in 2025.0 release - optional uint64 block_size = 4 [default = 32]; + optional uint64 max_num_seqs = 4 [default = 256]; - optional uint64 max_num_seqs = 5 [default = 256]; + optional bool dynamic_split_fuse = 5 [default = true]; - optional bool dynamic_split_fuse = 7 [default = true]; + optional string device = 6 [default = "CPU"]; - optional string device = 8 [default = "CPU"]; + optional string plugin_config = 7 [default = ""]; - optional string plugin_config = 9 [default = ""]; + optional uint32 best_of_limit = 8 [default = 20]; - optional uint32 best_of_limit = 10 [default = 20]; + optional uint32 max_tokens_limit = 9 [default = 4096]; - optional uint32 max_tokens_limit = 11 [default = 4096]; - - optional bool enable_prefix_caching = 12 [default = false]; + optional bool enable_prefix_caching = 10 [default = false]; } \ No newline at end of file diff --git a/src/llm/llmnoderesources.cpp b/src/llm/llmnoderesources.cpp index 2e892b58a2..dbfab92b28 100644 --- a/src/llm/llmnoderesources.cpp +++ b/src/llm/llmnoderesources.cpp @@ -145,11 +145,6 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST; } - // TODO: Remove along with block_size option in the proto in 2025.x release - if (nodeOptions.has_block_size()) { - SPDLOG_LOGGER_WARN(modelmanager_logger, "Since 2024.5, block_size is selected automatically and setting it explicitly is ineffective. " - "Please remove it from the configuration as in 2025.0 it will cause error."); - } nodeResources.schedulerConfig = { .max_num_batched_tokens = nodeOptions.max_num_batched_tokens(), .cache_size = nodeOptions.cache_size(), diff --git a/src/test/llmnode_test.cpp b/src/test/llmnode_test.cpp index ad7d14b4c7..9c73308966 100644 --- a/src/test/llmnode_test.cpp +++ b/src/test/llmnode_test.cpp @@ -2734,7 +2734,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) { models_path: "/ovms/src/test/llm_testing/facebook/opt-125m" max_num_batched_tokens: 1024 cache_size: 1 - block_size: 8 max_num_seqs: 95 dynamic_split_fuse: false enable_prefix_caching: true @@ -2762,9 +2761,6 @@ TEST_F(LLMOptionsHttpTest, LLMNodeOptionsCheckNonDefault) { ASSERT_EQ(nodeResources.schedulerConfig.max_num_batched_tokens, 1024); ASSERT_EQ(nodeResources.schedulerConfig.cache_size, 1); - // We create graph with block_size set in graph config to make sure setting it does not result in error - // TODO: Remove below commented assertion as well as block_size from the testPbtxt when block_size is removed from options proto. - // ASSERT_EQ(nodeResources.schedulerConfig.block_size, 8); ASSERT_EQ(nodeResources.schedulerConfig.dynamic_split_fuse, false); ASSERT_EQ(nodeResources.schedulerConfig.max_num_seqs, 95); ASSERT_EQ(nodeResources.schedulerConfig.enable_prefix_caching, true); diff --git a/tests/file_lists/lib_files.txt b/tests/file_lists/lib_files.txt index ebf7c3fafa..8492f5c369 100644 --- a/tests/file_lists/lib_files.txt +++ b/tests/file_lists/lib_files.txt @@ -43,38 +43,38 @@ libopencv_videoio.so.410->libopencv_videoio.so.4.10.0 libopencv_ximgproc.so->libopencv_ximgproc.so.410 libopencv_ximgproc.so.4.10.0 libopencv_ximgproc.so.410->libopencv_ximgproc.so.4.10.0 -libopenvino.so->libopenvino.so.2450 -libopenvino.so.2024.5.0 -libopenvino.so.2450->libopenvino.so.2024.5.0 +libopenvino.so->libopenvino.so.2500 +libopenvino.so.2025.0.0 +libopenvino.so.2500->libopenvino.so.2025.0.0 libopenvino_auto_batch_plugin.so libopenvino_auto_plugin.so -libopenvino_c.so->libopenvino_c.so.2450 -libopenvino_c.so.2024.5.0 -libopenvino_c.so.2450->libopenvino_c.so.2024.5.0 +libopenvino_c.so->libopenvino_c.so.2500 +libopenvino_c.so.2025.0.0 +libopenvino_c.so.2500->libopenvino_c.so.2025.0.0 libopenvino_hetero_plugin.so libopenvino_intel_cpu_plugin.so libopenvino_intel_gpu_plugin.so libopenvino_intel_npu_plugin.so -libopenvino_ir_frontend.so.2024.5.0 -libopenvino_ir_frontend.so.2450->libopenvino_ir_frontend.so.2024.5.0 -libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2450 -libopenvino_jax_frontend.so.2024.5.0 -libopenvino_jax_frontend.so.2450->libopenvino_jax_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2450 -libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so.2450->libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2450 -libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so.2450->libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2450 -libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so.2450->libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2450 -libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so.2450->libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2450 -libopenvino_tensorflow_lite_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so.2450->libopenvino_tensorflow_lite_frontend.so.2024.5.0 +libopenvino_ir_frontend.so.2025.0.0 +libopenvino_ir_frontend.so.2500->libopenvino_ir_frontend.so.2025.0.0 +libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2500 +libopenvino_jax_frontend.so.2025.0.0 +libopenvino_jax_frontend.so.2500->libopenvino_jax_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2500 +libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so.2500->libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2500 +libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so.2500->libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2500 +libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so.2500->libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2500 +libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so.2500->libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2500 +libopenvino_tensorflow_lite_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so.2500->libopenvino_tensorflow_lite_frontend.so.2025.0.0 libopenvino_tokenizers.so libtbb.so->libtbb.so.12 libtbb.so.12->libtbb.so.12.13 diff --git a/tests/file_lists/lib_files_python.txt b/tests/file_lists/lib_files_python.txt index b40a7de327..b4a34286f7 100644 --- a/tests/file_lists/lib_files_python.txt +++ b/tests/file_lists/lib_files_python.txt @@ -43,40 +43,40 @@ libopencv_videoio.so.410->libopencv_videoio.so.4.10.0 libopencv_ximgproc.so->libopencv_ximgproc.so.410 libopencv_ximgproc.so.4.10.0 libopencv_ximgproc.so.410->libopencv_ximgproc.so.4.10.0 -libopenvino.so->libopenvino.so.2450 -libopenvino.so.2024.5.0 -libopenvino.so.2450->libopenvino.so.2024.5.0 +libopenvino.so->libopenvino.so.2500 +libopenvino.so.2025.0.0 +libopenvino.so.2500->libopenvino.so.2025.0.0 libopenvino_auto_batch_plugin.so libopenvino_auto_plugin.so -libopenvino_c.so->libopenvino_c.so.2450 -libopenvino_c.so.2024.5.0 -libopenvino_c.so.2450->libopenvino_c.so.2024.5.0 +libopenvino_c.so->libopenvino_c.so.2500 +libopenvino_c.so.2025.0.0 +libopenvino_c.so.2500->libopenvino_c.so.2025.0.0 libopenvino_genai.so -libopenvino_genai.so.2450->libopenvino_genai.so +libopenvino_genai.so.2500->libopenvino_genai.so libopenvino_hetero_plugin.so libopenvino_intel_cpu_plugin.so libopenvino_intel_gpu_plugin.so libopenvino_intel_npu_plugin.so -libopenvino_ir_frontend.so.2024.5.0 -libopenvino_ir_frontend.so.2450->libopenvino_ir_frontend.so.2024.5.0 -libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2450 -libopenvino_jax_frontend.so.2024.5.0 -libopenvino_jax_frontend.so.2450->libopenvino_jax_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2450 -libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_onnx_frontend.so.2450->libopenvino_onnx_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2450 -libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_paddle_frontend.so.2450->libopenvino_paddle_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2450 -libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_pytorch_frontend.so.2450->libopenvino_pytorch_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2450 -libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_frontend.so.2450->libopenvino_tensorflow_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2450 -libopenvino_tensorflow_lite_frontend.so.2024.5.0 -libopenvino_tensorflow_lite_frontend.so.2450->libopenvino_tensorflow_lite_frontend.so.2024.5.0 +libopenvino_ir_frontend.so.2025.0.0 +libopenvino_ir_frontend.so.2500->libopenvino_ir_frontend.so.2025.0.0 +libopenvino_jax_frontend.so->libopenvino_jax_frontend.so.2500 +libopenvino_jax_frontend.so.2025.0.0 +libopenvino_jax_frontend.so.2500->libopenvino_jax_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so->libopenvino_onnx_frontend.so.2500 +libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_onnx_frontend.so.2500->libopenvino_onnx_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so->libopenvino_paddle_frontend.so.2500 +libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_paddle_frontend.so.2500->libopenvino_paddle_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so->libopenvino_pytorch_frontend.so.2500 +libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_pytorch_frontend.so.2500->libopenvino_pytorch_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so->libopenvino_tensorflow_frontend.so.2500 +libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_frontend.so.2500->libopenvino_tensorflow_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so->libopenvino_tensorflow_lite_frontend.so.2500 +libopenvino_tensorflow_lite_frontend.so.2025.0.0 +libopenvino_tensorflow_lite_frontend.so.2500->libopenvino_tensorflow_lite_frontend.so.2025.0.0 libopenvino_tokenizers.so libtbb.so->libtbb.so.12 libtbb.so.12->libtbb.so.12.13 diff --git a/third_party/llm_engine/llm_engine.bzl b/third_party/llm_engine/llm_engine.bzl index a337f325e9..8c66288280 100644 --- a/third_party/llm_engine/llm_engine.bzl +++ b/third_party/llm_engine/llm_engine.bzl @@ -20,7 +20,7 @@ def llm_engine(): new_git_repository( name = "llm_engine", remote = "https://github.com/openvinotoolkit/openvino.genai", - commit = "366662bc19f7f52db0975549ffc4d403021e89d5", # releases/2024/5 + commit = "17536724b9f798bea871c8775fb1a97f69714d35", # master / Nov 20 build_file = "@_llm_engine//:BUILD", init_submodules = True, recursive_init_submodules = True, @@ -98,11 +98,11 @@ cmake( out_include_dir = "runtime/include", # linking order out_shared_libs = [ - "libopenvino_genai.so.2450", + "libopenvino_genai.so.2500", ], tags = ["requires-network"], visibility = ["//visibility:public"], - lib_name = "libopenvino_genai.so.2450", + lib_name = "libopenvino_genai.so.2500", ) cc_library(