diff --git a/common_settings.bzl b/common_settings.bzl index 2a995d59c5..c5bc6ddcc6 100644 --- a/common_settings.bzl +++ b/common_settings.bzl @@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({ "-Wall", "-Wno-unknown-pragmas", "-Werror", - # ov::Tensor::data method call results in deprecated warning and we use it in multiple places - "-Wno-deprecated-declarations", "-Isrc", "-fconcepts", # for gmock related utils "-fvisibility=hidden",# Needed for pybind targets diff --git a/demos/benchmark/v3/benchmark.py b/demos/benchmark/v3/benchmark.py index eec806da09..88c9aa8f18 100644 --- a/demos/benchmark/v3/benchmark.py +++ b/demos/benchmark/v3/benchmark.py @@ -438,4 +438,8 @@ async def limited_request_func(request_func_input, pbar): print(f"Throughput - Tokens per second: {num_tokens / benchmark_results['duration']:^,.1f}") print(f"Mean latency: {np.mean(benchmark_results['latencies'])*1000:.2f} ms") print(f"Median latency: {np.median(benchmark_results['latencies'])*1000:.2f} ms") +# add printing 10 percentiles of latency to better understand latency distribution +percentiles = [10, 25, 50, 75, 90, 95, 99] +for p in percentiles: + print(f"{p}th percentile latency: {np.percentile(benchmark_results['latencies'], p)*1000:.2f} ms") print(f"Average document length: {num_tokens / len(docs)} tokens") diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 5aa81b0c81..91ef8b2edb 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -101,6 +101,17 @@ def add_common_arguments(parser): parser_speech2text.add_argument('--enable_word_timestamps', default=False, action='store_true', help='Load model with word timestamps support.', dest='enable_word_timestamps') args = vars(parser.parse_args()) + +def _default_graph_queue_size(task_name): + if task_name == 'image_generation': + return 1 + return 'AUTO' + + +def _prepend_graph_queue_directive(graph_content, task_name): + queue_size = _default_graph_queue_size(task_name) + return f"# OVMS_GRAPH_QUEUE_SIZE: {queue_size}\n{graph_content}" + t2s_graph_template = """ input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" @@ -488,6 +499,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(text_generation_graph_template) print("task_parameters", task_parameters) graph_content = gtemplate.render(model_path=model_path, draft_model_dir_name=draft_model_dir_name, **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'text_generation') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -495,7 +507,19 @@ def export_text_generation_model(model_repository_path, source_model, model_name def export_embeddings_model_ov(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, truncate=True): set_max_context_length = "" - destination_path = os.path.join(model_repository_path, model_name) + destination_path = os.path.join(model_repository_path, model_name)ERROR: /ovms/src/llm/BUILD:196:16: Compiling src/llm/language_model/continuous_batching/servable.cpp failed: (Exit 1): gcc failed: error executing command (from target //src/llm:genai_servables) /usr/bin/gcc -U_FORTIFY_SOURCE -fstack-protector -Wall -Wunused-but-set-parameter -Wno-free-nonheap-object -fno-omit-frame-pointer -g0 -O2 '-D_FORTIFY_SOURCE=1' -DNDEBUG -ffunction-sections ... (remaining 156 arguments skipped) +In file included from src/llm/language_model/continuous_batching/../../../logging.hpp:24, + from src/llm/language_model/continuous_batching/servable.cpp:22: +src/llm/language_model/continuous_batching/llm_executor.hpp: In member function 'void ovms::LLMExecutor::printMetrics()': +src/llm/language_model/continuous_batching/llm_executor.hpp:105:104: error: 'struct ov::genai::PipelineMetrics' has no member named 'kv_cache_size_in_bytes' + 105 | metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache)); + | ^~~~~~~~~~~~~~~~~~~~~~ +Target //src:ovms failed to build +Use --verbose_failures to see the command lines of failed build steps. +INFO: Elapsed time: 9.590s, Critical Path: 8.22s +INFO: 64 processes: 64 internal. +FAILED: Build did NOT complete successfully +root@b6674760ad87:/ovms# bazel build --config mp_on_py_off //src:ovms print("Exporting embeddings model to ",destination_path) if not os.path.isdir(destination_path) or args['overwrite_models']: optimum_command = "optimum-cli export openvino --model {} --disable-convert-tokenizer --task feature-extraction --weight-format {} {} --trust-remote-code {}".format(source_model, precision, task_parameters['extra_quantization_params'], destination_path) @@ -509,6 +533,7 @@ def export_embeddings_model_ov(model_repository_path, source_model, model_name, raise ValueError("Failed to export tokenizer model", source_model) gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(embedding_graph_ov_template) graph_content = gtemplate.render(model_path="./", **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'embeddings_ov') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -523,6 +548,7 @@ def export_text2speech_model(model_repository_path, source_model, model_name, pr raise ValueError("Failed to export text2speech model", source_model) gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(t2s_graph_template) graph_content = gtemplate.render(model_path="./", **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'text2speech') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -537,6 +563,7 @@ def export_speech2text_model(model_repository_path, source_model, model_name, pr raise ValueError("Failed to export speech2text model", source_model) gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(s2t_graph_template) graph_content = gtemplate.render(model_path="./", **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'speech2text') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -553,6 +580,7 @@ def export_rerank_model_ov(model_repository_path, source_model, model_name, prec export_rerank_tokenizer(source_model, destination_path, max_doc_length) gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_ov_template) graph_content = gtemplate.render(model_path="./", **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'rerank_ov') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -589,6 +617,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.bin'), os.path.join(tokenizer_path, 'model.bin')) gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_template) graph_content = gtemplate.render(model_name=model_name, **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'rerank') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) @@ -635,6 +664,7 @@ def export_image_generation_model(model_repository_path, source_model, model_nam gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(image_generation_graph_template) graph_content = gtemplate.render(model_path=model_path, **task_parameters) + graph_content = _prepend_graph_queue_directive(graph_content, 'image_generation') with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f: f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) diff --git a/docs/mediapipe.md b/docs/mediapipe.md index 73f0eb1f15..e19cc44418 100644 --- a/docs/mediapipe.md +++ b/docs/mediapipe.md @@ -215,6 +215,48 @@ Nodes in the MediaPipe graphs can reference both the models configured in model_ Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md). +### Graph Pool (Pre-initialized Graph Queue) + +OpenVINO Model Server can pre-initialize a pool of MediaPipe `CalculatorGraph` instances for a graph definition. Graphs in the pool are started once during server initialization and reused across inference requests, eliminating per-request graph initialization and teardown overhead. This is especially beneficial for graphs that involve expensive setup, done in calculators `Open()` method. + +#### How it works + +Without graph pool (legacy behavior), each incoming request creates a new `CalculatorGraph`, calls `StartRun()` with side packets, processes the request, then tears down the graph via `CloseAllPacketSources()` and `WaitUntilDone()`. + +With graph pool enabled, a fixed number of graphs are pre-initialized and kept in a queue. When a request arrives, an idle graph is acquired from the queue. After processing, the graph is returned to the queue for the next request. The graph is never torn down — instead, `WaitUntilIdle()` is called between requests and the internal timestamp is incremented. + +#### Configuration + +The graph pool size is controlled via a comment directive in the graph `.pbtxt` file: + +``` +# OVMS_GRAPH_QUEUE_SIZE: AUTO +``` + +| Value | Behavior | +|:------|:---------| +| `AUTO` | Pool size is set to the number of hardware threads (`std::thread::hardware_concurrency()`), or 16 if detection fails | +| Positive integer (e.g. `4`) | Pool size set to the given value (must not exceed hardware thread count) | +| `0` or `-1` | Graph pool disabled — falls back to per-request graph creation | +| *(directive absent)* | Default: graph pool is disabled | + +**Default behavior:** graph pool stays disabled unless `OVMS_GRAPH_QUEUE_SIZE` is explicitly present in `graph.pbtxt`. + +**Generated graphs from exporters:** +- `demos/common/export_models/export_model.py` and OVMS `--pull --task ...` graph export emit `OVMS_GRAPH_QUEUE_SIZE` automatically. +- In `export_model.py`: image generation graphs use `1`, and all other graph types use `AUTO`. +- In OVMS `--task ...` graph export: image generation graphs use `1`, and other graph types use `min(physical_cores, rest_workers)` (with OVMS default REST worker calculation when `rest_workers` is not provided explicitly). + +#### Important considerations for graph developers + +**Stateful calculators:** +Since graphs in the pool are reused across requests, any state held by a calculator between `Process()` calls will persist across requests. If your calculator accumulates state (e.g. counters, buffers, history), that state will carry over to the next request that reuses the same graph instance. Design your calculators to either: +- Be stateless (reset any per-request state at the beginning of each `Process()` call), or +- Explicitly handle the fact that the graph may have already processed prior requests. + +**Input side packets from requests are not supported:** +When graph pool is enabled, side packets are set once at pool construction time and cannot be overridden per request. If a client sends request parameters that would normally become input side packets (e.g. KServe request parameters other than `OVMS_MP_TIMESTAMP`), the request will be rejected with an error. If your graph relies on per-request side packets to configure calculator behavior, either disable the graph pool (`# OVMS_GRAPH_QUEUE_SIZE: 0`) or redesign the graph to accept such parameters as regular input stream packets instead of side packets. + ## Deployment testing ### Debug logs diff --git a/src/BUILD b/src/BUILD index d3e5af3861..41477c7822 100644 --- a/src/BUILD +++ b/src/BUILD @@ -150,6 +150,39 @@ ovms_cc_library( hdrs = ["queue.hpp"], visibility = ["//visibility:public",], ) +ovms_cc_library( + name = "mediapipe_internal_graph_side_packets", + hdrs = ["mediapipe_internal/graph_side_packets.hpp"], + visibility = ["//visibility:public",], +) +ovms_cc_library( + name = "mediapipe_internal_graph_executor_constants", + hdrs = ["mediapipe_internal/graph_executor_constants.hpp"], + visibility = ["//visibility:public"], +) +ovms_cc_library( + name = "mediapipe_internal_graphqueue", + hdrs = [ + "mediapipe_internal/graphqueue.hpp", + "mediapipe_internal/outputstreamobserver.hpp", + ], # TODO FIXME + srcs = ["mediapipe_internal/graphqueue.cpp"], + deps = [ + "libovms_queue", + "libovmslogging", + "libovms_execution_context", + "libovmstimer", + "libovmsmetrics", + "model_metric_reporter", + "mediapipe_internal_graph_executor_constants", + "mediapipe_internal_graph_side_packets", + "//third_party:openvino", + "@mediapipe//mediapipe/framework:calculator_graph", + "//src/python:libovmspythonmodule", # TODO not split + "//src/llm:genai_servables", # TODO split! + ], + visibility = ["//visibility:public",], +) ovms_cc_library( name = "libovms_ovinferrequestsqueue", hdrs = ["ovinferrequestsqueue.hpp"], @@ -542,6 +575,7 @@ ovms_cc_library( "mediapipe_internal/mediapipegraphconfig.cpp", "mediapipe_internal/mediapipegraphdefinition.cpp", "mediapipe_internal/mediapipegraphdefinition.hpp", + "mediapipe_internal/outputstreamobserver.hpp", "mediapipe_internal/mediapipegraphexecutor.cpp", "mediapipe_internal/mediapipegraphexecutor.hpp", "mediapipe_internal/packettypes.hpp", @@ -682,6 +716,8 @@ ovms_cc_library( }) + select({ "//conditions:default": [ + "mediapipe_internal_graph_executor_constants", + "mediapipe_internal_graphqueue", "@mediapipe_calculators//:mediapipe_calculators", # Need this dependencies here because we use ovms/src - cannot add in ovms_dependencies because we copy src directory later in Dockerfile "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data", "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", @@ -3016,6 +3052,7 @@ cc_library( ":test_test_with_temp_dir", "//src/graph_export:graph_export", "//src:libovms_server_settings", + "//src:libovms_systeminfo", "@com_google_googletest//:gtest", ], local_defines = COMMON_LOCAL_DEFINES, diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp index 5b8a3dce54..77645dda7a 100644 --- a/src/capi_frontend/server_settings.hpp +++ b/src/capi_frontend/server_settings.hpp @@ -158,6 +158,7 @@ struct ExportSettings { std::string modelName = ""; std::string modelPath = "./"; std::string targetDevice = "CPU"; + std::optional restWorkers; std::optional extraQuantizationParams; std::optional vocoder; std::string precision = "int8"; diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp index dd7141b1ce..4122bc3e14 100644 --- a/src/cli_parser.cpp +++ b/src/cli_parser.cpp @@ -728,6 +728,7 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings.exportSettings.extraQuantizationParams = result->operator[]("extra_quantization_params").as(); if (result->count("vocoder")) hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as(); + hfSettings.exportSettings.restWorkers = serverSettings.restWorkers; hfSettings.downloadPath = result->operator[]("model_repository_path").as(); if (result->count("task")) { hfSettings.task = stringToEnum(result->operator[]("task").as()); diff --git a/src/graph_export/BUILD b/src/graph_export/BUILD index 47dc407a1e..57179bced7 100644 --- a/src/graph_export/BUILD +++ b/src/graph_export/BUILD @@ -29,6 +29,7 @@ ovms_cc_library( "@ovms//src:libovms_module", "@ovms//src:libovmsfilesystem", "@ovms//src:libovmslocalfilesystem", + "@ovms//src:libovms_systeminfo", "@com_github_tencent_rapidjson//:rapidjson", "@ovms//src:libovmsschema", "@ovms//src:libovms_version", diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp index dadbd57777..0ca05875e1 100644 --- a/src/graph_export/graph_export.cpp +++ b/src/graph_export/graph_export.cpp @@ -53,6 +53,22 @@ namespace ovms { static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n"); +static const std::string OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_SIZE: "; +static const std::string OVMS_GRAPH_QUEUE_SIZE_AUTO = "AUTO"; + +static std::string getDefaultGraphQueueSizeDirective(const HFSettingsImpl& hfSettings) { + if (hfSettings.task == IMAGE_GENERATION_GRAPH) { + return "1"; + } + return OVMS_GRAPH_QUEUE_SIZE_AUTO; +} + +static std::string buildGraphHeader(const HFSettingsImpl& hfSettings) { + std::ostringstream oss; + oss << OVMS_VERSION_GRAPH_LINE; + oss << OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX << getDefaultGraphQueueSizeDirective(hfSettings) << "\n"; + return oss.str(); +} static std::string constructModelsPath(const std::string& modelPath, const std::optional& ggufFilenameOpt) { std::string modelsPath; @@ -116,7 +132,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -211,7 +227,7 @@ static Status createRerankGraphTemplate(const std::string& directoryPath, const auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); // Windows path creation - graph parser needs forward slashes in paths std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); @@ -255,7 +271,7 @@ static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, co auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -301,7 +317,7 @@ static Status createTextToSpeechGraphTemplate(const std::string& directoryPath, auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -352,7 +368,7 @@ static Status createSpeechToTextGraphTemplate(const std::string& directoryPath, auto& exportSettings = hfSettings.exportSettings; std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename); SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt")); GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); @@ -406,7 +422,7 @@ static Status createImageGenerationGraphTemplate(const std::string& directoryPat GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings); std::ostringstream oss; - oss << OVMS_VERSION_GRAPH_LINE; + oss << buildGraphHeader(hfSettings); // clang-format off oss << R"( input_stream: "HTTP_REQUEST_PAYLOAD:input" diff --git a/src/http_frontend/http_graph_executor_impl.cpp b/src/http_frontend/http_graph_executor_impl.cpp index b970f62594..4848f3760a 100644 --- a/src/http_frontend/http_graph_executor_impl.cpp +++ b/src/http_frontend/http_graph_executor_impl.cpp @@ -38,6 +38,10 @@ namespace ovms { static const std::string UNUSED_REQUEST_ID = ""; +bool requestHasInputSidePackets(const HttpPayload& request) { + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, // out const HttpPayload& request) { // in diff --git a/src/http_frontend/http_graph_executor_impl.hpp b/src/http_frontend/http_graph_executor_impl.hpp index 9846b10158..205d428a1b 100644 --- a/src/http_frontend/http_graph_executor_impl.hpp +++ b/src/http_frontend/http_graph_executor_impl.hpp @@ -48,6 +48,9 @@ class PythonBackend; using HttpReaderWriter = HttpAsyncWriter; +// Checks whether the request contains user-provided input side packets. +bool requestHasInputSidePackets(const HttpPayload& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 034f6f0907..b5033501d9 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -24,6 +24,7 @@ #include "../kfs_frontend/kfs_utils.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_executor_constants.hpp" #include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" #include "../predict_request_validation_utils.hpp" @@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_ } std::unique_ptr inputTensor; OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend)); + SPDLOG_TRACE("Current Timestamp before actual pushing:{}", timestamp.Value()); MP_RETURN_ON_FAIL(graph.AddPacketToInputStream( name, ::mediapipe::packet_internal::Create( @@ -1152,10 +1154,19 @@ Status createAndPushPacketsImpl( return StatusCode::OK; } +bool requestHasInputSidePackets(const KFSRequest& request) { + static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"}; + for (const auto& [name, valueChoice] : request.parameters()) { + if (name != TIMESTAMP_PARAM) { + return true; + } + } + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, const KFSRequest& request) { - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"}; for (const auto& [name, valueChoice] : request.parameters()) { SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name()); if (name == TIMESTAMP_PARAMETER_NAME) { diff --git a/src/kfs_frontend/kfs_graph_executor_impl.hpp b/src/kfs_frontend/kfs_graph_executor_impl.hpp index cfa65b6a57..1c6e697455 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.hpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.hpp @@ -36,6 +36,10 @@ namespace ovms { class PythonBackend; class Status; +// Checks whether the request contains user-provided input side packets +// (parameters other than the reserved OVMS_MP_TIMESTAMP). +bool requestHasInputSidePackets(const KFSRequest& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/llm/BUILD b/src/llm/BUILD index ae37d936ca..5f64ad197f 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -24,6 +24,7 @@ ovms_cc_library( "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", "@com_github_tencent_rapidjson//:rapidjson", + "//src:mediapipe_internal_graph_side_packets", "//src/kfserving_api:kfserving_api_cpp", "//src:libovmsprofiler", ":genai_servables", diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index ae6461c61a..2415ae08da 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #pragma warning(push) @@ -27,6 +28,7 @@ #include "../http_payload.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_side_packets.hpp" #include "../profiler.hpp" #include "apis/openai_completions.hpp" #include "servable.hpp" @@ -36,9 +38,11 @@ using namespace ovms; namespace mediapipe { const std::string LLM_SESSION_SIDE_PACKET_TAG = "LLM_NODE_RESOURCES"; +const std::string LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG = "LLM_NODE_EXECUTION_CONTEXTS"; class HttpLLMCalculator : public CalculatorBase { std::shared_ptr servable; + std::shared_ptr executionContextHolder; std::shared_ptr executionContext; static const std::string INPUT_TAG_NAME; @@ -54,6 +58,9 @@ class HttpLLMCalculator : public CalculatorBase { cc->Inputs().Tag(INPUT_TAG_NAME).Set(); cc->Inputs().Tag(LOOPBACK_TAG_NAME).Set(); cc->InputSidePackets().Tag(LLM_SESSION_SIDE_PACKET_TAG).Set(); + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG)) { + cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Set(); + } cc->Outputs().Tag(OUTPUT_TAG_NAME).Set(); cc->Outputs().Tag(LOOPBACK_TAG_NAME).Set(); return absl::OkStatus(); @@ -72,7 +79,17 @@ class HttpLLMCalculator : public CalculatorBase { auto it = servableMap.find(cc->NodeName()); RET_CHECK(it != servableMap.end()) << "Could not find initialized LLM node named: " << cc->NodeName(); this->servable = it->second; - this->executionContext = servable->createExecutionContext(); + + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG) && !cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).IsEmpty()) { + ovms::GenAiExecutionContextMap executionContextMap = cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Get(); + auto contextIt = executionContextMap.find(cc->NodeName()); + RET_CHECK(contextIt != executionContextMap.end()) << "Could not find LLM execution context holder for node named: " << cc->NodeName(); + this->executionContextHolder = contextIt->second; + } + + if (!this->executionContextHolder) { + this->executionContext = servable->createExecutionContext(); + } SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "LLMCalculator [Node: {}] Open end", cc->NodeName()); return absl::OkStatus(); } @@ -81,6 +98,12 @@ class HttpLLMCalculator : public CalculatorBase { OVMS_PROFILE_FUNCTION(); RET_CHECK(this->servable != nullptr); + if (this->executionContextHolder) { + std::lock_guard lock(this->executionContextHolder->mutex); + this->executionContext = this->executionContextHolder->executionContext; + } + RET_CHECK(this->executionContext != nullptr) << "LLM execution context not initialized for node: " << cc->NodeName(); + // For cases where MediaPipe decides to trigger Process() when there are no inputs if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty() && cc->Inputs().Tag(LOOPBACK_TAG_NAME).IsEmpty()) { return absl::OkStatus(); diff --git a/src/logging.cpp b/src/logging.cpp index e89fce9a07..aee9e4bc2e 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -41,7 +41,8 @@ std::shared_ptr rerank_calculator_logger = std::make_shared ov_logger = std::make_shared("openvino"); #endif -const std::string default_pattern = "[%Y-%m-%d %T.%e][%t][%n][%l][%s:%#] %v"; +// const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; +const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; static void set_log_level(const std::string log_level, std::shared_ptr logger) { logger->set_level(spdlog::level::info); diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp new file mode 100644 index 0000000000..55e3af7f59 --- /dev/null +++ b/src/mediapipe_internal/graph_executor_constants.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +namespace ovms { + +inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py"; +inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm"; +inline const std::string LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG = "llm_ctx"; +inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; +inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; +inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; +inline const std::string STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; +inline const std::string TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; +inline const std::string PYTHON_SIDE_PACKET_NAME = "py"; +inline const std::string LLM_SESSION_PACKET_NAME = "llm"; +inline constexpr int64_t STARTING_TIMESTAMP_VALUE = 0; + +} // namespace ovms diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp new file mode 100644 index 0000000000..8b67bd3bc0 --- /dev/null +++ b/src/mediapipe_internal/graph_side_packets.hpp @@ -0,0 +1,80 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +namespace ovms { + +// Forward declarations - only shared_ptrs are stored so full definitions are not needed +class PythonNodeResources; +class GenAiServable; +struct GenAiServableExecutionContext; +struct ImageGenerationPipelines; +struct EmbeddingsServable; +struct RerankServable; +struct SttServable; +class TtsServable; + +using PythonNodeResourcesMap = std::unordered_map>; +using GenAiServableMap = std::unordered_map>; +using RerankServableMap = std::unordered_map>; +using SttServableMap = std::unordered_map>; +using TtsServableMap = std::unordered_map>; +using EmbeddingsServableMap = std::unordered_map>; +using ImageGenerationPipelinesMap = std::unordered_map>; + +struct GenAiExecutionContextHolder { + std::mutex mutex; + std::shared_ptr executionContext; +}; +using GenAiExecutionContextMap = std::unordered_map>; + +struct GraphSidePackets { + PythonNodeResourcesMap pythonNodeResourcesMap; + GenAiServableMap genAiServableMap; + GenAiExecutionContextMap genAiExecutionContextMap; + ImageGenerationPipelinesMap imageGenPipelinesMap; + EmbeddingsServableMap embeddingsServableMap; + RerankServableMap rerankServableMap; + SttServableMap sttServableMap; + TtsServableMap ttsServableMap; + void clear() { + pythonNodeResourcesMap.clear(); + genAiServableMap.clear(); + genAiExecutionContextMap.clear(); + imageGenPipelinesMap.clear(); + embeddingsServableMap.clear(); + rerankServableMap.clear(); + sttServableMap.clear(); + ttsServableMap.clear(); + } + bool empty() { + return (pythonNodeResourcesMap.empty() && + genAiServableMap.empty() && + genAiExecutionContextMap.empty() && + imageGenPipelinesMap.empty() && + embeddingsServableMap.empty() && + rerankServableMap.empty() && + sttServableMap.empty() && + ttsServableMap.empty()); + } +}; + +} // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp new file mode 100644 index 0000000000..01d04425c6 --- /dev/null +++ b/src/mediapipe_internal/graphqueue.cpp @@ -0,0 +1,114 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "graphqueue.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../queue.hpp" +#include "src/python/pythonnoderesources.hpp" +#include "src/llm/servable.hpp" + +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma warning(pop) + +#include "graph_executor_constants.hpp" +#include "outputstreamobserver.hpp" +namespace ovms { +GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength) : + Queue(streamsLength), + sidePacketMaps(sidePacketMaps) { + inferRequests.reserve(streamsLength); + for (auto i = 0; i < streamsLength; ++i) { + // Build observer map locally before constructing GraphHelper (const map) + std::unordered_map> observers; + for (auto& name : config.output_stream()) { + std::string streamName = getStreamName(name); + auto holder = std::make_shared(); + holder->current = std::make_shared(); + observers[streamName] = holder; + } + + auto gh = std::make_shared(std::move(observers)); + gh->graph = std::make_unique<::mediapipe::CalculatorGraph>(); + gh->currentTimestamp = ::mediapipe::Timestamp(0); + + auto absStatus = gh->graph->Initialize(config); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue initialization failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + for (const auto& [streamName, holder] : gh->outStreamObservers) { + // Lambda captures holder (shared_ptr) by value — safe regardless of map layout + absStatus = gh->graph->ObserveOutputStream(streamName, [holder](const ::mediapipe::Packet& packet) -> absl::Status { return holder->current->handlePacket(packet); }); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue ObserveOutputStream failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + } + for (const auto& [nodeName, _] : sidePacketMaps->genAiServableMap) { + gh->genAiExecutionContextMap[nodeName] = std::make_shared(); + } + std::map inputSidePackets; +#if (PYTHON_DISABLE == 0) + inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); +#endif + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(gh->genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + absStatus = gh->graph->StartRun(inputSidePackets); + if (!absStatus.ok()) { + SPDLOG_ERROR("Graph queue StartRun failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); + } + inferRequests.emplace_back(std::move(gh)); + } +} +GraphQueue::~GraphQueue() { + for (auto& graphHelper : inferRequests) { + auto absStatus = graphHelper->graph->WaitUntilIdle(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue WaitUntilIdle error: {}", absStatus.ToString()); + } + absStatus = graphHelper->graph->CloseAllPacketSources(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue CloseAllPacketSources error: {}", absStatus.ToString()); + } + absStatus = graphHelper->graph->WaitUntilDone(); + if (!absStatus.ok()) { + SPDLOG_DEBUG("Graph queue WaitUntilDone error: {}", absStatus.ToString()); + } + graphHelper->graph->Cancel(); + graphHelper->graph.reset(); + } +} +} // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp new file mode 100644 index 0000000000..ab80e6d095 --- /dev/null +++ b/src/mediapipe_internal/graphqueue.hpp @@ -0,0 +1,102 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../queue.hpp" + +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) + +#include "graph_executor_constants.hpp" +#include "graph_side_packets.hpp" +#include "outputstreamobserver.hpp" +namespace ovms { +class OutputStreamObserverI; +class NullOutputStreamObserver; +struct ObserverHolder; +struct GraphHelper { + std::unique_ptr<::mediapipe::CalculatorGraph> graph; + // const after construction: keys are fixed, but observer implementations + // can be swapped via the mutable ObserverHolder inside each shared_ptr. + const std::unordered_map> outStreamObservers; + GenAiExecutionContextMap genAiExecutionContextMap; + ::mediapipe::Timestamp currentTimestamp; + GraphHelper() = default; + // Constructor that takes the pre-built observer map + GraphHelper(std::unordered_map>&& observers) : + outStreamObservers(std::move(observers)) {} + GraphHelper(const GraphHelper&) = delete; + GraphHelper& operator=(const GraphHelper&) = delete; + GraphHelper(GraphHelper&& gh) : + graph(std::move(gh.graph)), + outStreamObservers(std::move(const_cast>&>(gh.outStreamObservers))), + genAiExecutionContextMap(std::move(gh.genAiExecutionContextMap)), + currentTimestamp(gh.currentTimestamp) {} + GraphHelper& operator=(GraphHelper&&) = delete; +}; +// we need to keep Graph alive during MP reload hence shared_ptr +class GraphQueue : public Queue> { +public: // XXX TODO make private? we need to access in mediapipegraphdefinition to set side packets though + std::shared_ptr sidePacketMaps; + +public: + GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength); + ~GraphQueue(); +}; + +struct GraphIdGuard { + std::weak_ptr weakQueue; + const int id; + // shared_ptr because GraphIdGuard (and the executor holding it) must keep + // the GraphHelper alive even after the GraphQueue is destroyed during + // mediapipe graph reload/retire — the in-flight request continues using + // the old graph until completion. + std::shared_ptr gh; + ::mediapipe::CalculatorGraph& graph; + GraphIdGuard(std::shared_ptr& queue) : + weakQueue(queue), + id(queue->getIdleStream().get()), + gh((queue->getInferRequest(id))), + graph(*gh->graph) { + } + GraphIdGuard(GraphIdGuard&&) = default; + GraphIdGuard(const GraphIdGuard&) = delete; + ~GraphIdGuard() { + auto existingQueue = weakQueue.lock(); + if (existingQueue) + existingQueue->returnStream(this->id); + } +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp index 2e4f3d428e..193576b416 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.hpp +++ b/src/mediapipe_internal/mediapipegraphconfig.hpp @@ -15,7 +15,12 @@ //***************************************************************************** #pragma once +#include #include +#include +#include + +#include #pragma warning(push) #pragma warning(disable : 6313) #include @@ -27,6 +32,22 @@ extern const std::string DEFAULT_GRAPH_FILENAME; extern const std::string DEFAULT_SUBCONFIG_FILENAME; extern const std::string DEFAULT_MODELMESH_SUBCONFIG_FILENAME; +/** + * @brief Tag type representing AUTO graph queue size (determined at runtime). + */ +struct GraphQueueAutoTag { + bool operator==(const GraphQueueAutoTag&) const { return true; } +}; + +/** + * @brief Represents the user's graph_queue_size setting. + * + * - std::nullopt => user did not set this field + * - int => user explicitly set a numeric value + * - GraphQueueAutoTag => user explicitly set "AUTO" + */ +using GraphQueueSizeValue = std::optional>; + class Status; /** @@ -69,6 +90,15 @@ class MediapipeGraphConfig { */ std::string currentGraphPbTxtMD5; + /** + * @brief Graph queue size configuration. + * + * - std::nullopt => user did not set this field + * - int => user explicitly set a numeric size + * - GraphQueueAutoTag => user explicitly set "AUTO" + */ + GraphQueueSizeValue graphQueueSize; + public: /** * @brief Construct a new Mediapie Graph configuration object @@ -206,6 +236,55 @@ class MediapipeGraphConfig { this->currentGraphPbTxtMD5 = currentGraphPbTxtMD5; } + /** + * @brief Get the graph queue size setting. + * + * @return const GraphQueueSizeValue& - nullopt if not set, int or GraphQueueAutoTag + */ + const GraphQueueSizeValue& getGraphQueueSize() const { + return this->graphQueueSize; + } + + /** + * @brief Set the graph queue size to an explicit numeric value. + */ + void setGraphQueueSize(int size) { + this->graphQueueSize = size; + } + + /** + * @brief Set the graph queue size to AUTO. + */ + void setGraphQueueSizeAuto() { + this->graphQueueSize = GraphQueueAutoTag{}; + } + + /** + * @brief Resolve the graph queue size setting to a concrete integer. + * + * Returns: + * -1 => queue creation disabled (user set -1 or not set) + * >0 => explicit size or resolved AUTO + * + * Value 0 is rejected at parse time (resolveGraphQueueSize). + * When not set (nullopt): returns -1 (queue disabled). + * When AUTO: returns hardware_concurrency() or 16 as fallback. + */ + int getInitialQueueSize() const { + if (!this->graphQueueSize.has_value()) { + return -1; // not set - queue disabled by default + } + if (std::holds_alternative(*this->graphQueueSize)) { + unsigned int hwThreads = std::thread::hardware_concurrency(); + if (hwThreads == 0) { + SPDLOG_WARN("std::thread::hardware_concurrency() returned 0 (unknown). Falling back to graph queue size 16."); + return 16; + } + return static_cast(hwThreads); + } + return std::get(*this->graphQueueSize); + } + bool isReloadRequired(const MediapipeGraphConfig& rhs) const; /** diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 9047765e75..7057dc5898 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -18,31 +18,33 @@ #include #include #include +#include #include #include +#include #include #include #include -#include "../execution_context.hpp" -#include "../filesystem.hpp" -#include "../kfs_frontend/kfs_utils.hpp" -#include "../kfs_frontend/kfs_request_utils.hpp" -#include "../deserialization_main.hpp" -#include "../metric.hpp" -#include "../model_metric_reporter.hpp" -#include "../modelmanager.hpp" -#include "../ov_utils.hpp" -#include "../llm/servable.hpp" -#include "../llm/servable_initializer.hpp" +#include "src/execution_context.hpp" +#include "src/filesystem.hpp" +#include "src/kfs_frontend/kfs_utils.hpp" +#include "src/kfs_frontend/kfs_request_utils.hpp" +#include "src/deserialization_main.hpp" +#include "src/metric.hpp" +#include "src/model_metric_reporter.hpp" +#include "src/modelmanager.hpp" +#include "src/ov_utils.hpp" +#include "src/llm/servable.hpp" +#include "src/llm/servable_initializer.hpp" #if (PYTHON_DISABLE == 0) -#include "../python/pythonnoderesources.hpp" +#include "src/python/pythonnoderesources.hpp" #endif -#include "../status.hpp" -#include "../stringutils.hpp" -#include "../tensorinfo.hpp" -#include "../timer.hpp" -#include "../version.hpp" +#include "src/status.hpp" +#include "src/stringutils.hpp" +#include "src/tensorinfo.hpp" +#include "src/timer.hpp" +#include "src/version.hpp" #include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/status.h" #include "mediapipe_utils.hpp" @@ -54,6 +56,12 @@ #include "src/image_gen/imagegen_init.hpp" #include "src/image_gen/image_gen_calculator.pb.h" +#include "src/sidepacket_servable.hpp" +#include "src/embeddings/embeddings_servable.hpp" +#include "src/rerank/rerank_servable.hpp" +#include "src/audio/speech_to_text/s2t_servable.hpp" +#include "src/audio/text_to_speech/t2s_servable.hpp" + namespace ovms { MediapipeGraphConfig MediapipeGraphDefinition::MGC; @@ -95,6 +103,48 @@ Status MediapipeGraphDefinition::validateForConfigFileExistence() { return StatusCode::OK; } +Status MediapipeGraphDefinition::resolveGraphQueueSize() { + // 1. Explicit pbtxt directive: # OVMS_GRAPH_QUEUE_SIZE: + // Always honored regardless of env var or calculator checks. + // Value -1 disables the queue, AUTO or positive integer enables it. + // Value 0 is rejected as invalid. + static const std::regex directiveRegex( + R"((?:^|\n)\s*#\s*OVMS_GRAPH_QUEUE_SIZE\s*:\s*(\S+)\s*(?:\r?\n|$))"); + std::smatch match; + if (std::regex_search(this->chosenConfig, match, directiveRegex)) { + std::string value = match[1].str(); + if (value == "AUTO") { + this->mgconfig.setGraphQueueSizeAuto(); + return StatusCode::OK; + } + auto parsed = stoi32(value); + if (!parsed.has_value()) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: '{}'. Expected integer or 'AUTO'.", value); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + int queueSize = parsed.value(); + if (queueSize < -1 || queueSize == 0) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Must be -1 (disabled) or a positive integer.", queueSize); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + if (queueSize == -1) { + SPDLOG_DEBUG("Graph queue explicitly disabled (OVMS_GRAPH_QUEUE_SIZE=-1) for mediapipe: {}", getName()); + return StatusCode::OK; + } + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads > 0 && queueSize > static_cast(maxThreads)) { + SPDLOG_WARN("OVMS_GRAPH_QUEUE_SIZE value: {} exceeds available hardware threads: {}. Clamping to {}.", queueSize, maxThreads, maxThreads); + queueSize = static_cast(maxThreads); + } + this->mgconfig.setGraphQueueSize(queueSize); + return StatusCode::OK; + } + + // 2. Default: queue disabled unless graph explicitly provides directive. + SPDLOG_DEBUG("Graph queue disabled by default for mediapipe: {}. Add '# OVMS_GRAPH_QUEUE_SIZE: ' directive in graph.pbtxt to enable.", getName()); + return StatusCode::OK; +} + Status MediapipeGraphDefinition::validateForConfigLoadableness() { if (chosenConfig.empty()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse empty mediapipe graph definition: {} failed", this->getName(), this->chosenConfig); @@ -129,7 +179,7 @@ Status MediapipeGraphDefinition::dryInitializeTest() { } Status MediapipeGraphDefinition::validate(ModelManager& manager) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName()); - if (!this->sidePacketMaps.empty()) { + if (!this->sidePacketMaps->empty()) { SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state."); return StatusCode::INTERNAL_ERROR; } @@ -177,6 +227,14 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!status.ok()) { return status; } + status = this->resolveGraphQueueSize(); + if (!status.ok()) { + return status; + } + status = this->initializeQueueIfRequired(); + if (!status.ok()) { + return status; + } lock.unlock(); notifier.passed = true; @@ -187,11 +245,31 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { return StatusCode::OK; } +Status MediapipeGraphDefinition::initializeQueueIfRequired() { + int initialQueueSize = this->mgconfig.getInitialQueueSize(); + if (initialQueueSize < 0) { + SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize); + return StatusCode::OK; + } + try { + this->queue = std::make_shared(this->config, this->sidePacketMaps, initialQueueSize); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} error: {}", getName(), e.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} unknown error", getName()); + return StatusCode::INTERNAL_ERROR; + } + SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName()); + return StatusCode::OK; +} + MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, const MediapipeGraphConfig& config, MetricRegistry* registry, const MetricConfig* metricConfig, PythonBackend* pythonBackend) : + sidePacketMaps(std::make_shared()), name(name), status(SCHEDULER_CLASS_NAME, this->name), pythonBackend(pythonBackend), @@ -261,11 +339,19 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& return status; } SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); - - pipeline = std::make_unique(getName(), std::to_string(getVersion()), - this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, - this->sidePacketMaps, - this->pythonBackend, this->reporter.get()); + if (this->queue) { + GraphIdGuard graphIdGuard(this->queue); + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get(), std::move(graphIdGuard)); + } else { + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get()); + } + SPDLOG_DEBUG("Created Mediapipe graph executor: {}", getName()); return status; } @@ -339,12 +425,15 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr std::this_thread::sleep_for(std::chrono::microseconds(1)); } this->mgconfig = config; - this->sidePacketMaps.clear(); + this->queue.reset(); + this->sidePacketMaps = std::make_shared(); return validate(manager); } void MediapipeGraphDefinition::retire(ModelManager& manager) { - this->sidePacketMaps.clear(); + this->queue.reset(); + // now we reset shared ptr maps so ongoing executions can continue + this->sidePacketMaps.reset(); this->status.handle(RetireEvent()); } @@ -411,7 +500,7 @@ class ResourcesCleaningGuard { resources(resources) {} ~ResourcesCleaningGuard() { if (shouldCleanup) { - resources.clear(); + resources.clear(); // TODO FIXME @atobisze check } } void disableCleaning() { @@ -423,7 +512,7 @@ Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); for (int i = 0; i < config.node().size(); i++) { #if (PYTHON_DISABLE == 0) - auto& pythonNodeResourcesMap = this->sidePacketMaps.pythonNodeResourcesMap; + auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap; if (config.node(i).calculator() == PYTHON_NODE_CALCULATOR_NAME) { ResourcesCleaningGuard pythonResourcesCleaningGuard(pythonNodeResourcesMap); if (!config.node(i).node_options().size()) { @@ -453,7 +542,8 @@ Status MediapipeGraphDefinition::initializeNodes() { #endif // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI) if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) { - auto& genAiServableMap = this->sidePacketMaps.genAiServableMap; + auto& genAiServableMap = this->sidePacketMaps->genAiServableMap; + auto& genAiExecutionContextMap = this->sidePacketMaps->genAiExecutionContextMap; ResourcesCleaningGuard genAiServablesCleaningGuard(genAiServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name); @@ -468,6 +558,10 @@ Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name: {} already used in graph: {}. ", nodeName, this->name); return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; } + if (genAiExecutionContextMap.find(nodeName) != genAiExecutionContextMap.end()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM execution context holder for node name: {} already exists in graph: {}. ", nodeName, this->name); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } std::shared_ptr servable; Status status = initializeGenAiServable(servable, config.node(i), mgconfig.getBasePath()); if (!status.ok()) { @@ -475,11 +569,12 @@ Status MediapipeGraphDefinition::initializeNodes() { return status; } genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); + genAiExecutionContextMap.insert(std::pair>(nodeName, std::make_shared())); genAiServablesCleaningGuard.disableCleaning(); } // Passed to both calculators that require Image Generation pipelines if (endsWith(config.node(i).calculator(), IMAGE_GEN_CALCULATOR_NAME)) { - auto& imageGenPipelinesMap = this->sidePacketMaps.imageGenPipelinesMap; + auto& imageGenPipelinesMap = this->sidePacketMaps->imageGenPipelinesMap; ResourcesCleaningGuard guard(imageGenPipelinesMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node missing options in graph: {}. ", this->name); @@ -513,7 +608,7 @@ Status MediapipeGraphDefinition::initializeNodes() { guard.disableCleaning(); } if (endsWith(config.node(i).calculator(), EMBEDDINGS_NODE_CALCULATOR_NAME)) { - auto& embeddingsServableMap = this->sidePacketMaps.embeddingsServableMap; + auto& embeddingsServableMap = this->sidePacketMaps->embeddingsServableMap; ResourcesCleaningGuard embeddingsServablesCleaningGuard(embeddingsServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node missing options in graph: {}. ", this->name); @@ -546,7 +641,7 @@ Status MediapipeGraphDefinition::initializeNodes() { embeddingsServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), RERANK_NODE_CALCULATOR_NAME)) { - auto& rerankServableMap = this->sidePacketMaps.rerankServableMap; + auto& rerankServableMap = this->sidePacketMaps->rerankServableMap; ResourcesCleaningGuard rerankServablesCleaningGuard(rerankServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node missing options in graph: {}. ", this->name); @@ -569,7 +664,7 @@ Status MediapipeGraphDefinition::initializeNodes() { rerankServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), STT_NODE_CALCULATOR_NAME)) { - auto& sttServableMap = this->sidePacketMaps.sttServableMap; + auto& sttServableMap = this->sidePacketMaps->sttServableMap; ResourcesCleaningGuard sttServablesCleaningGuard(sttServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node missing options in graph: {}. ", this->name); @@ -595,7 +690,7 @@ Status MediapipeGraphDefinition::initializeNodes() { sttServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), TTS_NODE_CALCULATOR_NAME)) { - auto& ttsServableMap = this->sidePacketMaps.ttsServableMap; + auto& ttsServableMap = this->sidePacketMaps->ttsServableMap; ResourcesCleaningGuard ttsServablesCleaningGuard(ttsServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node missing options in graph: {}. ", this->name); diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 14c9e0679f..e94e89c802 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -40,14 +40,10 @@ #pragma GCC diagnostic pop #pragma warning(pop) +#include "graph_side_packets.hpp" #include "mediapipegraphconfig.hpp" #include "packettypes.hpp" - -#include "../sidepacket_servable.hpp" -#include "../embeddings/embeddings_servable.hpp" -#include "../rerank/rerank_servable.hpp" -#include "../audio/speech_to_text/s2t_servable.hpp" -#include "../audio/text_to_speech/t2s_servable.hpp" +#include "graphqueue.hpp" namespace ovms { class MediapipeGraphDefinitionUnloadGuard; @@ -58,44 +54,6 @@ class ModelManager; class MediapipeGraphExecutor; class Status; class PythonBackend; -class PythonNodeResources; -class GenAiServable; -struct ImageGenerationPipelines; -using PythonNodeResourcesMap = std::unordered_map>; -using GenAiServableMap = std::unordered_map>; -using RerankServableMap = std::unordered_map>; -using SttServableMap = std::unordered_map>; -using TtsServableMap = std::unordered_map>; -using EmbeddingsServableMap = std::unordered_map>; -using ImageGenerationPipelinesMap = std::unordered_map>; - -struct GraphSidePackets { - PythonNodeResourcesMap pythonNodeResourcesMap; - GenAiServableMap genAiServableMap; - ImageGenerationPipelinesMap imageGenPipelinesMap; - EmbeddingsServableMap embeddingsServableMap; - RerankServableMap rerankServableMap; - SttServableMap sttServableMap; - TtsServableMap ttsServableMap; - void clear() { - pythonNodeResourcesMap.clear(); - genAiServableMap.clear(); - imageGenPipelinesMap.clear(); - embeddingsServableMap.clear(); - rerankServableMap.clear(); - sttServableMap.clear(); - ttsServableMap.clear(); - } - bool empty() { - return (pythonNodeResourcesMap.empty() && - genAiServableMap.empty() && - imageGenPipelinesMap.empty() && - embeddingsServableMap.empty() && - rerankServableMap.empty() && - sttServableMap.empty() && - ttsServableMap.empty()); - } -}; class MediapipeGraphDefinition { friend MediapipeGraphDefinitionUnloadGuard; @@ -142,7 +100,7 @@ class MediapipeGraphDefinition { static constexpr model_version_t VERSION = 1; protected: - GraphSidePackets sidePacketMaps; + std::shared_ptr sidePacketMaps; struct ValidationResultNotifier { ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) : @@ -165,10 +123,13 @@ class MediapipeGraphDefinition { }; virtual Status validateForConfigFileExistence(); + Status resolveGraphQueueSize(); Status validateForConfigLoadableness(); Status setStreamTypes(); Status dryInitializeTest(); + Status initializeQueueIfRequired(); + std::string chosenConfig; static MediapipeGraphConfig MGC; const std::string name; @@ -179,7 +140,7 @@ class MediapipeGraphDefinition { PipelineDefinitionStatus status; MediapipeGraphConfig mgconfig; - ::mediapipe::CalculatorGraphConfig config; + ::mediapipe::CalculatorGraphConfig config; // TODO rename configs Status createInputsInfo(); Status createOutputsInfo(); @@ -209,6 +170,7 @@ class MediapipeGraphDefinition { PythonBackend* pythonBackend; std::unique_ptr reporter; + std::shared_ptr queue; }; class MediapipeGraphDefinitionUnloadGuard { diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 93b53fdf8e..b821d1fef1 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -19,6 +19,8 @@ #include #include +#include "graph_executor_constants.hpp" + #pragma warning(push) #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) #pragma GCC diagnostic push @@ -28,10 +30,11 @@ #pragma warning(pop) #if (PYTHON_DISABLE == 0) -#include "../python/python_backend.hpp" +#include "src/python/python_backend.hpp" #endif -#include "../image_gen/pipelines.hpp" +#include "src/image_gen/pipelines.hpp" +#include "src/llm/servable.hpp" namespace ovms { @@ -43,14 +46,10 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, + const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : + MediapipeServableMetricReporter* mediapipeServableMetricReporter, + GraphIdGuard&& guard) : name(name), version(version), config(config), @@ -58,10 +57,11 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputTypes(std::move(outputTypes)), inputNames(std::move(inputNames)), outputNames(std::move(outputNames)), - sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}), + sidePacketMaps(sidePacketMaps), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), - mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), + mediapipeServableMetricReporter(mediapipeServableMetricReporter), + guard(std::move(guard)) {} MediapipeGraphExecutor::MediapipeGraphExecutor( const std::string& name, const std::string& version, @@ -82,16 +82,35 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputNames(std::move(outputNames)), sidePacketMaps(sidePacketMaps), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} -const std::string MediapipeGraphExecutor::PYTHON_SESSION_SIDE_PACKET_TAG = "py"; -const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm"; -const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; -const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; -const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; -const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; -const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; -const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0); +Status MediapipeGraphExecutor::initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) { + for (const auto& [nodeName, servable] : this->sidePacketMaps.genAiServableMap) { + auto it = executionContextMap.find(nodeName); + if (it == executionContextMap.end() || !it->second) { + SPDLOG_DEBUG("Missing LLM execution context holder for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + auto& holder = it->second; + std::lock_guard lock(holder->mutex); + holder->executionContext = servable->createExecutionContext(); + if (!holder->executionContext) { + SPDLOG_DEBUG("Failed to create LLM execution context for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + } + return StatusCode::OK; +} + +void MediapipeGraphExecutor::resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) { + for (auto& [_, holder] : executionContextMap) { + if (!holder) { + continue; + } + std::lock_guard lock(holder->mutex); + holder->executionContext.reset(); + } +} } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index c165469395..57f8b659b9 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -16,6 +16,7 @@ #pragma once #include #include +#include #include #include #include @@ -36,9 +37,11 @@ #include "mediapipe/framework/port/status.h" #pragma GCC diagnostic pop #pragma warning(pop) +#include "graph_executor_constants.hpp" #include "mediapipe_utils.hpp" #include "mediapipegraphdefinition.hpp" // for version in response and PythonNodeResourceMap #include "packettypes.hpp" +#include "graphqueue.hpp" namespace ovms { class PythonBackend; @@ -71,9 +74,56 @@ inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) { } \ _Pragma("warning(pop)") +template +struct MyFunctor : public OutputStreamObserverI { + const std::string& requestId; + MediapipeGraphExecutor& exec; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ResponseType& response; + MyFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, MediapipeGraphExecutor& exec, const RequestType& request, ResponseType& response) : + requestId(getRequestId(request)), + exec(exec), + outputStreamName(outputStreamName), + packetType(packetType), + response(response) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~MyFunctor() = default; +}; + +template +struct StreamingFunctor : public OutputStreamObserverI { + ReaderWriterType& serverReaderWriter; + std::mutex& sendMutex; + const std::string& executorName; + const std::string& executorVersion; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ExecutionContext executionContext; + MediapipeServableMetricReporter* metricReporter; + StreamingFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, + const std::string& executorName, const std::string& executorVersion, + ReaderWriterType& serverReaderWriter, std::mutex& sendMutex, + ExecutionContext executionContext, MediapipeServableMetricReporter* metricReporter) : + serverReaderWriter(serverReaderWriter), + sendMutex(sendMutex), + executorName(executorName), + executorVersion(executorVersion), + outputStreamName(outputStreamName), + packetType(packetType), + executionContext(executionContext), + metricReporter(metricReporter) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~StreamingFunctor() = default; +}; class MediapipeGraphExecutor { +public: const std::string name; const std::string version; + +private: const ::mediapipe::CalculatorGraphConfig config; stream_types_mapping_t inputTypes; stream_types_mapping_t outputTypes; @@ -86,30 +136,22 @@ class MediapipeGraphExecutor { ::mediapipe::Timestamp currentStreamTimestamp; MediapipeServableMetricReporter* mediapipeServableMetricReporter; + std::optional guard; public: - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG; - static const std::string LLM_SESSION_SIDE_PACKET_TAG; - static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG; - static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG; - static const std::string RERANK_SESSION_SIDE_PACKET_TAG; - static const std::string STT_SESSION_SIDE_PACKET_TAG; - static const std::string TTS_SESSION_SIDE_PACKET_TAG; - static const ::mediapipe::Timestamp STARTING_TIMESTAMP; - - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, + const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter); - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard); + // Constructor without graph queue (old path - graph created per-request) + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, @@ -117,18 +159,82 @@ class MediapipeGraphExecutor { PythonBackend* pythonBackend, MediapipeServableMetricReporter* mediapipeServableMetricReporter); + Status initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap); + + void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap); + template Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) { OVMS_PROFILE_FUNCTION(); SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name); MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false)); MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get()); + if (this->guard.has_value()) { + return inferWithQueue(request, response, executionContext, failedRequestsGuard); + } else { + return inferWithoutQueue(request, response, executionContext, failedRequestsGuard); + } + } + + template + Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { + ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + for (auto& name : this->outputNames) { + if (name.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } + guard->gh->outStreamObservers.at(name)->current = std::make_shared>(name, this->outputTypes.at(name), *this, *request, *response); + } + + size_t numberOfPacketsCreated = 0; + auto ovms_status = createAndPushPacketsImpl( + std::shared_ptr(request, [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated); + if (!ovms_status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + return ovms_status; + } + + if (this->inputNames.size() > numberOfPacketsCreated) { + SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", + this->inputNames.size(), numberOfPacketsCreated, this->name); + return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); + } + + failedRequestsGuard.disable(); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); + + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1); + SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); + return StatusCode::OK; + } + + template + Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { ::mediapipe::CalculatorGraph graph; MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); - enum : unsigned int { - PROCESS, - TIMER_END2 - }; + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + enum : unsigned int { PROCESS, + TIMER_END2 }; Timer timer; timer.start(PROCESS); std::unordered_map outputPollers; @@ -148,15 +254,15 @@ class MediapipeGraphExecutor { std::map inputSidePackets; OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request)); #if (PYTHON_DISABLE == 0) - inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(STARTING_TIMESTAMP); + inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); - - inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP); + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR); @@ -165,11 +271,7 @@ class MediapipeGraphExecutor { size_t numberOfPacketsCreated = 0; auto ovms_status = createAndPushPacketsImpl( - std::shared_ptr(request, - // Custom deleter to avoid deallocation by custom holder - // Conversion to shared_ptr is required for unified deserialization method - // for first and subsequent requests - [](const RequestType*) {}), + std::shared_ptr(request, [](const RequestType*) {}), this->inputTypes, this->pythonBackend, graph, @@ -180,25 +282,20 @@ class MediapipeGraphExecutor { return ovms_status; } - // This differs from inferStream - we require user to feed all streams if (this->inputNames.size() > numberOfPacketsCreated) { SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", - this->inputNames.size(), - numberOfPacketsCreated, - this->name); + this->inputNames.size(), numberOfPacketsCreated, this->name); return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); } failedRequestsGuard.disable(); INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); - // we wait idle since some calculators could hold ownership on packet content while nodes further down the graph - // can be still processing those. Closing packet sources triggers Calculator::Close() on nodes that do not expect - // new packets auto status = graph.WaitUntilIdle(); - if (!status.ok()) { // Collect error metric after Open() + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); @@ -226,7 +323,7 @@ class MediapipeGraphExecutor { SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName); } status = graph.WaitUntilDone(); - if (!status.ok()) { // Collect error metric after Process() + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); @@ -245,6 +342,131 @@ class MediapipeGraphExecutor { template Status inferStream(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { OVMS_PROFILE_FUNCTION(); + if (this->guard.has_value()) { + return inferStreamWithQueue(req, serverReaderWriter, executionContext); + } else { + return inferStreamWithoutQueue(req, serverReaderWriter, executionContext); + } + } + + template + Status inferStreamWithQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { + SPDLOG_DEBUG("Start streaming mediapipe graph: {} execution (queue path)", this->name); + std::mutex sendMutex; + try { + // Graph queue does not support user-provided input side packets. + // Side packets are set at queue construction time. + if (requestHasInputSidePackets(req)) { + SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. " + "Side packets are set at graph queue construction time. Graph: {}", + this->name); + return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, + "Input side packets are not supported for graphs with queue enabled"); + } + MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get()); + ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + + enum : unsigned int { + PROCESS, + TIMER_END2 + }; + Timer timer; + timer.start(PROCESS); + + // Swap output stream observers to streaming functors. + // Observers are already installed on the graph at queue construction time; + // we only replace the functor implementation to serialize+send to the client. + // Lifetime: sendMutex and serverReaderWriter are stack-local in this method + // and outlive all callbacks because we WaitUntilIdle() before returning. + for (const auto& outputName : this->outputNames) { + if (outputName.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", outputName); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } + guard->gh->outStreamObservers.at(outputName)->current = std::make_shared>( + outputName, this->outputTypes.at(outputName), + this->name, this->version, + serverReaderWriter, sendMutex, + executionContext, this->mediapipeServableMetricReporter); + } + + size_t numberOfPacketsCreated = 0; + { + OVMS_PROFILE_SCOPE("Mediapipe graph deserializing first request"); + bool isSuccess = true; + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + std::shared_ptr(&req, + [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of first request", isSuccess); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + } + + // Read loop + auto newReq = std::make_shared(); + while (waitForNewRequest(serverReaderWriter, *newReq)) { + auto pstatus = validateSubsequentRequestImpl( + *newReq, + this->name, + this->version, + this->inputTypes); + bool isSuccess = true; + if (pstatus.ok()) { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + newReq, + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of subsequent requests", isSuccess); + } else { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(std::move(pstatus), "validate subsequent requests", isSuccess); + } + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + + if (graph.HasError()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + SPDLOG_DEBUG("Graph {}: encountered an error, stopping the execution", this->name); + break; + } + + newReq = std::make_shared(); + } + + // Do NOT CloseAllPacketSources or WaitUntilDone - graph stays alive for reuse + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1); + SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name); + + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); + return StatusCode::OK; + } catch (...) { + SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph (queue path)", this->name); + return Status(StatusCode::UNKNOWN_ERROR, "Exception while processing MediaPipe graph"); + } + } + + template + Status inferStreamWithoutQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { SPDLOG_DEBUG("Start MediapipeGraphExecutor::inferEx mediapipe graph: {} execution", this->name); std::mutex sendMutex; try { @@ -255,6 +477,10 @@ class MediapipeGraphExecutor { // Init MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); } + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } enum : unsigned int { PROCESS, TIMER_END2 @@ -299,10 +525,11 @@ class MediapipeGraphExecutor { OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, req)); #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap) - .At(STARTING_TIMESTAMP); + .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); // Add image generation side packet in case image generation allow for streaming } @@ -380,6 +607,7 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); SPDLOG_DEBUG("Graph {}: Done execution", this->name); } @@ -394,4 +622,44 @@ class MediapipeGraphExecutor { } }; +template +absl::Status MyFunctor::handlePacket(const ::mediapipe::Packet& packet) { + auto status = onPacketReadySerializeImpl( + this->requestId, + this->exec.name, + this->exec.version, + this->outputStreamName, + this->packetType, + packet, + response); + return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error"); +} + +template +absl::Status StreamingFunctor::handlePacket(const ::mediapipe::Packet& packet) { + OVMS_PROFILE_SCOPE("Mediapipe Packet Ready Callback"); + try { + std::lock_guard lock(sendMutex); + auto status = onPacketReadySerializeAndSendImpl( + "" /*no ids for streaming*/, + executorName, + executorVersion, + outputStreamName, + packetType, + packet, + serverReaderWriter); + if (!status.ok()) { + SPDLOG_DEBUG("error in send packet routine {}", status.string()); + return absl::Status(absl::StatusCode::kInternal, "error in send packet routine"); + } + auto now = std::chrono::system_clock::now(); + auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast(now.time_since_epoch()).count()); + OBSERVE_IF_ENABLED(metricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds()); + INCREMENT_IF_ENABLED(metricReporter->getResponsesMetric(executionContext)); + return absl::OkStatus(); + } catch (...) { + SPDLOG_DEBUG("Error occurred during packet serialization in mediapipe graph: {}", executorName); + return absl::Status(absl::StatusCode::kCancelled, "error in serialization"); + } +} } // namespace ovms diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp new file mode 100644 index 0000000000..5c267e4187 --- /dev/null +++ b/src/mediapipe_internal/outputstreamobserver.hpp @@ -0,0 +1,64 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../execution_context.hpp" +#include "../model_metric_reporter.hpp" +#include "../profiler.hpp" +#include "../status.hpp" +#include "../timer.hpp" +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) +#include "mediapipe_utils.hpp" +#include "packettypes.hpp" +#include "graphqueue.hpp" + +namespace ovms { +class PythonBackend; +class ServableMetricReporter; +class OutputStreamObserverI { +public: + virtual absl::Status handlePacket(const ::mediapipe::Packet& packet) = 0; + virtual ~OutputStreamObserverI() = default; +}; +class NullOutputStreamObserver : public OutputStreamObserverI { +public: + NullOutputStreamObserver() = default; + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("NullOutputStreamObserver::handlePacket called - graph observer was not replaced before execution"); + throw std::runtime_error("NullOutputStreamObserver should have been replaced before graph execution"); + } +}; +// Mutable holder for an observer, allowing the observer implementation to be +// swapped while the map that owns this holder remains const. +struct ObserverHolder { + std::shared_ptr current; +}; +} // namespace ovms diff --git a/src/python/BUILD b/src/python/BUILD index f4fd4c571e..539abaf355 100644 --- a/src/python/BUILD +++ b/src/python/BUILD @@ -75,7 +75,7 @@ ovms_cc_library( "pythonexecutorcalculator_cc_proto", "utils", ], - visibility = ["//visibility:private"], + visibility = ["//visibility:public"], # TODO FIXME? alwayslink = 1, data = ["//src/python/binding:pyovms.so"], ) diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index 16531df9b6..ff0a6461fe 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -15,16 +15,12 @@ //***************************************************************************** #include "systeminfo.hpp" -#include -#include -#include +#include #include -#include "logging.hpp" -#include "status.hpp" - namespace ovms { uint16_t getCoreCount() { - return std::thread::hardware_concurrency(); + auto cores = std::thread::hardware_concurrency(); + return cores == 0 ? 1 : static_cast(cores); } } // namespace ovms diff --git a/src/systeminfo.hpp b/src/systeminfo.hpp index 2dc66fffe9..87d91d018f 100644 --- a/src/systeminfo.hpp +++ b/src/systeminfo.hpp @@ -22,4 +22,5 @@ namespace ovms { * @return uint16_t Available number of cores in the system */ uint16_t getCoreCount(); + } // namespace ovms diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp index 7fa5a70d31..6ebaeb0e18 100644 --- a/src/test/ensemble_config_change_stress.cpp +++ b/src/test/ensemble_config_change_stress.cpp @@ -813,7 +813,8 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { SetUpConfig(basicMediapipeConfig); bool performWholeConfigReload = true; std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation - std::set allowedLoadResults = {}; + // Graph path change triggers real reload, briefly entering NOT_LOADED_YET state + std::set allowedLoadResults = {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET}; performStressTest( &ConfigChangeStressTest::triggerKFSGetPipelineMetadataInALoop, &ConfigChangeStressTest::reloadMediapipeGraph, @@ -821,4 +822,90 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { requiredLoadResults, allowedLoadResults); } + +class StressMediapipeQueueChanges : public StressPipelineConfigChanges { + const std::string modelName = PIPELINE_1_DUMMY_NAME; + const std::string modelInputName = "b"; + const std::string modelOutputName = "a"; + +public: + std::string getServableName() override { + return modelName; + } + void SetUp() override { + SetUpCAPIServerInstance(createStressTestPipelineOneDummyConfig()); + } +}; +TEST_F(StressMediapipeQueueChanges, AddGraphDuringPredictLoad) { + // we add another graph definition during load (queue-enabled graph) + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::addNewMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK, + StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + // With queue path, pre-initialized graphs may keep working with cached sessions + // even after model removal, so MEDIAPIPE_PRECONDITION_FAILED may not occur + std::set requiredLoadResults = { + StatusCode::OK, + }; + std::set allowedLoadResults = { + StatusCode::MEDIAPIPE_EXECUTION_ERROR, + StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, + StatusCode::MEDIAPIPE_PRECONDITION_FAILED, + }; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadMediapipeGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +// Status and metadata tests are not duplicated for queue fixture because +// neither status nor metadata operations exercise the graph queue path. #endif diff --git a/src/test/graph_export_test.cpp b/src/test/graph_export_test.cpp index 777792e7d3..26a1684596 100644 --- a/src/test/graph_export_test.cpp +++ b/src/test/graph_export_test.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #include @@ -504,10 +505,65 @@ class GraphCreationTest : public TestWithTempDir { TestWithTempDir::TearDown(); } - // Removes # OpenVINO Model Server REPLACE_PROJECT_VERSION comment added for debug purpose in graph export at the begging of graph.pbtxt - // This string differs per build and setup - std::string removeVersionString(std::string input) { - return input.erase(0, input.find("\n") + 1); + std::string getExpectedGraphQueueSizeDirective(const ovms::HFSettingsImpl& hfSettings) const { + if (hfSettings.task == ovms::IMAGE_GENERATION_GRAPH) { + return "1"; + } + return "AUTO"; + } + + std::string createGraphAndReadContents(const ovms::HFSettingsImpl& hfSettings) { + std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; + std::unique_ptr graphExporter = std::make_unique(); + auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); + if (status != ovms::StatusCode::OK) { + ADD_FAILURE() << status.string(); + return ""; + } + return GetFileContents(graphPath); + } + + void assertGraphQueueHeader(const std::string& graphContents, const ovms::HFSettingsImpl& hfSettings) { + const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE: "; + auto firstLineEnd = graphContents.find("\n"); + ASSERT_NE(firstLineEnd, std::string::npos) << graphContents; + auto queueLineStart = firstLineEnd + 1; + auto queueLineEnd = graphContents.find("\n", queueLineStart); + ASSERT_NE(queueLineEnd, std::string::npos) << graphContents; + + std::string actualQueueLine = graphContents.substr(queueLineStart, queueLineEnd - queueLineStart); + ASSERT_EQ(0, actualQueueLine.rfind(queueLinePrefix, 0)) << graphContents; + std::string expectedQueueLine = queueLinePrefix + getExpectedGraphQueueSizeDirective(hfSettings); + ASSERT_EQ(expectedQueueLine, actualQueueLine) << graphContents; + } + + void assertCreatedGraphEquals(const ovms::HFSettingsImpl& hfSettings, const std::string& expectedGraphContents, bool assertVersion = false) { + std::string graphContents = createGraphAndReadContents(hfSettings); + if (assertVersion) { + ASSERT_EQ(0, graphContents.find(getVersionString())) << graphContents; + } + assertGraphQueueHeader(graphContents, hfSettings); + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; + } + + // Removes generated graph header lines (version and optional queue size directive) + // which differ across build/runtime setup. + std::string removeGeneratedGraphHeaders(std::string input) { + auto firstLineEnd = input.find("\n"); + if (firstLineEnd == std::string::npos) { + return ""; + } + input.erase(0, firstLineEnd + 1); + + const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE:"; + if (input.rfind(queueLinePrefix, 0) == 0) { + auto secondLineEnd = input.find("\n"); + if (secondLineEnd == std::string::npos) { + return ""; + } + input.erase(0, secondLineEnd + 1); + } + return input; } std::string getVersionString() { @@ -519,14 +575,7 @@ class GraphCreationTest : public TestWithTempDir { TEST_F(GraphCreationTest, positiveDefaultWithVersionString) { ovms::HFSettingsImpl hfSettings; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedDefaultGraphContents; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents, true); } TEST_F(GraphCreationTest, positiveRerankWithVersionString) { @@ -535,14 +584,7 @@ TEST_F(GraphCreationTest, positiveRerankWithVersionString) { hfSettings.task = ovms::RERANK_GRAPH; ovms::RerankGraphSettingsImpl rerankGraphSettings; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedRerankGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) { @@ -551,14 +593,7 @@ TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) { hfSettings.task = ovms::EMBEDDINGS_GRAPH; ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings; hfSettings.graphSettings = std::move(embeddingsGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedEmbeddingsGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) { @@ -566,14 +601,7 @@ TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) { hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH; ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedTextToSpeechGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveSTTWithVersionString) { @@ -581,14 +609,7 @@ TEST_F(GraphCreationTest, positiveSTTWithVersionString) { hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH; ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedSpeechToTextGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveImageGenWithVersionString) { @@ -596,25 +617,12 @@ TEST_F(GraphCreationTest, positiveImageGenWithVersionString) { hfSettings.task = ovms::IMAGE_GENERATION_GRAPH; ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - std::string expected = getVersionString() + expectedImageGenerationGraphContentsDefault; - ASSERT_EQ(expected, graphContents) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault, true); } TEST_F(GraphCreationTest, positiveDefault) { ovms::HFSettingsImpl hfSettings; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedDefaultGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents); } TEST_F(GraphCreationTest, positiveDraftAndFuse) { @@ -624,45 +632,24 @@ TEST_F(GraphCreationTest, positiveDraftAndFuse) { graphSettings.dynamicSplitFuse = "false"; hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedDraftAndFuseGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedDraftAndFuseGraphContents); } TEST_F(GraphCreationTest, positiveGGUF) { this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt"); ovms::HFSettingsImpl hfSettings; hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf"; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents); } TEST_F(GraphCreationTest, WillOverwriteExistingGraphPbtxtGGUF) { this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt"); ovms::HFSettingsImpl hfSettings; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf"; - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents); hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL_Q8-00001-of-20000.gguf"; - status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGGUFGraphContents2, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents2); } TEST_F(GraphCreationTest, rerankPositiveNonDefault) { @@ -677,13 +664,7 @@ TEST_F(GraphCreationTest, rerankPositiveNonDefault) { rerankGraphSettings.maxAllowedChunks = 18; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedRerankGraphContentsNonDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsNonDefault); } TEST_F(GraphCreationTest, rerankPositiveDefault) { @@ -693,13 +674,7 @@ TEST_F(GraphCreationTest, rerankPositiveDefault) { ovms::RerankGraphSettingsImpl rerankGraphSettings; hfSettings.graphSettings = std::move(rerankGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedRerankGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault); } TEST_F(GraphCreationTest, rerankCreatedPbtxtInvalid) { @@ -733,13 +708,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveNonDefault) { embeddingsGraphSettings.truncate = "true"; embeddingsGraphSettings.pooling = "LAST"; hfSettings.graphSettings = std::move(embeddingsGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedEmbeddingsGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContents); } TEST_F(GraphCreationTest, embeddingsPositiveDefault) { @@ -748,13 +717,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveDefault) { ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings; hfSettings.graphSettings = std::move(embeddingsGraphSettings); hfSettings.exportSettings.pluginConfig.numStreams = 1; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedEmbeddingsGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault); } TEST_F(GraphCreationTest, embeddingsCreatedPbtxtInvalid) { @@ -808,13 +771,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveNonDefault) { hfSettings.exportSettings.modelPath = "/model1/path"; hfSettings.exportSettings.pluginConfig.numStreams = 2; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedTextToSpeechGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContents); } TEST_F(GraphCreationTest, textToSpeechPositiveDefault) { @@ -822,13 +779,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveDefault) { hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH; ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings; hfSettings.graphSettings = std::move(textToSpeechGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedTextToSpeechGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault); } TEST_F(GraphCreationTest, textToSpeechCreatedPbtxtInvalid) { @@ -857,13 +808,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveNonDefault) { hfSettings.exportSettings.modelPath = "/model1/path"; hfSettings.exportSettings.pluginConfig.numStreams = 2; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedSpeechToTextGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContents); } TEST_F(GraphCreationTest, speechToTextPositiveDefault) { @@ -871,13 +816,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveDefault) { hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH; ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings; hfSettings.graphSettings = std::move(speechToTextGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedSpeechToTextGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault); } TEST_F(GraphCreationTest, speechToTextCreatedPbtxtInvalid) { @@ -906,13 +845,7 @@ TEST_F(GraphCreationTest, positivePluginConfigAll) { hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedFullPluginGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedFullPluginGraphContents); } TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { @@ -924,13 +857,7 @@ TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) { hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContentsWithResponseParser, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedGraphContentsWithResponseParser); } TEST_F(GraphCreationTest, positivePluginConfigOne) { @@ -939,13 +866,7 @@ TEST_F(GraphCreationTest, positivePluginConfigOne) { hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8"; hfSettings.graphSettings = std::move(graphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedOneSettingPluginGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedOneSettingPluginGraphContents); } TEST_F(GraphCreationTest, negativeCreateFileWrongDirectoryPaths) { @@ -1016,11 +937,8 @@ TEST_F(GraphCreationTest, positiveTextGeneration) { hfSettings.graphSettings = std::move(graphSettings); hfSettings.exportSettings.targetDevice = "NPU"; hfSettings.exportSettings.pluginConfig.useNpuPrefixCaching = true; - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::string subconfigPath = ovms::FileSystem::appendSlash(this->directoryPath) + "subconfig.json"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); + std::string graphContents = createGraphAndReadContents(hfSettings); + assertGraphQueueHeader(graphContents, hfSettings); } TEST_F(GraphCreationTest, imageGenerationPositiveDefault) { @@ -1028,13 +946,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveDefault) { hfSettings.task = ovms::IMAGE_GENERATION_GRAPH; ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenerationGraphContentsDefault, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault); } TEST_F(GraphCreationTest, imageGenerationPositiveFull) { @@ -1050,13 +962,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveFull) { imageGenerationGraphSettings.defaultNumInferenceSteps = 2; imageGenerationGraphSettings.maxNumInferenceSteps = 3; hfSettings.graphSettings = std::move(imageGenerationGraphSettings); - std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt"; - std::unique_ptr graphExporter = std::make_unique(); - auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings); - ASSERT_EQ(status, ovms::StatusCode::OK); - - std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedImageGenerationGraphContents, removeVersionString(graphContents)) << graphContents; + assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContents); } TEST_F(GraphCreationTest, pluginConfigAsString) { ovms::ExportSettings exportSettings; diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 94648d0e68..5f03176a25 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -212,7 +212,7 @@ Key: content-type; Value: application/json } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -244,7 +244,7 @@ Key: test2; Value: header } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -1777,6 +1777,98 @@ TEST_F(HttpOpenAIHandlerParsingTest, responseFormatNullValue) { EXPECT_FALSE(apiHandler->getResponseFormat().has_value()); } +// ==================== HttpOpenAIHandlerWithQueueTest ==================== +// Same as HttpOpenAIHandlerTest but uses config with graph_queue_size=1 +// to verify the graph pool (GraphQueue) path works correctly. +class HttpOpenAIHandlerWithQueueTest : public ::testing::Test { +protected: + ovms::Server& server = ovms::Server::instance(); + std::unique_ptr handler; + + std::unique_ptr t; + std::string port = "9173"; + + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + std::string endpoint = "/v3/chat/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + ovms::HttpResponseComponents responseComponents; + + void SetUpServer(const char* configPath) { + ::SetUpServer(this->t, this->server, this->port, configPath); + EnsureServerStartedWithTimeout(this->server, 5); + handler = std::make_unique(server, 5); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json").c_str()); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpoint, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } +}; + +TEST_F(HttpOpenAIHandlerWithQueueTest, UnaryWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": false, + "messages": [] + } + )"; + + const std::string URI = "/v3/something"; + ASSERT_EQ( + handler->dispatchToProcessor(URI, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + + std::string expectedResponse = R"(URI: /v3/something +Key: content-type; Value: application/json +Body: + + { + "model": "gpt", + "stream": false, + "messages": [] + } + +JSON Parser: +{"model":"gpt","stream":false,"messages":[]}012345678)"; + ASSERT_EQ(response, expectedResponse); +} + +TEST_F(HttpOpenAIHandlerWithQueueTest, StreamWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": true, + "messages": [] + } + )"; + + EXPECT_CALL(*writer, PartialReplyBegin(::testing::_)).WillOnce(testing::Invoke([](std::function fn) { fn(); })); + EXPECT_CALL(*writer, PartialReplyEnd()).Times(1); + // The calculator produces 9 packets (timestamps 0-8) via loopback, + // each containing the accumulated body + timestamp. The '8' in the body stops the loop. + EXPECT_CALL(*writer, PartialReply(::testing::_)).Times(9); + EXPECT_CALL(*writer, IsDisconnected()).Times(9); + + ASSERT_EQ( + handler->dispatchToProcessor("/v3/completions", requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); + + // For streaming, the response body stays empty (content goes through PartialReply callbacks) + ASSERT_EQ(response, ""); +} TEST_F(HttpOpenAIHandlerParsingTest, parseChatTemplateKwargsWithBooleanValue) { std::string json = R"({ "model": "llama", diff --git a/src/test/llm/config_queue.json b/src/test/llm/config_queue.json new file mode 100644 index 0000000000..1e16802ed9 --- /dev/null +++ b/src/test/llm/config_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"lm_cb_regular_queue", + "graph_path":"/ovms/src/test/llm/lm_cb_regular_queue.pbtxt" + } + ] +} diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index 2e52e4fa59..a3def1a82b 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -174,6 +174,51 @@ std::shared_ptr LLMFlowHttpTest::cbPipe; std::shared_ptr LLMFlowHttpTest::llmExecutorWrapper; std::unique_ptr LLMFlowHttpTest::t; +class LLMFlowHttpQueueGraphTest : public ::testing::Test { +protected: + static std::unique_ptr t; + +public: + std::unique_ptr handler; + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + const std::string endpointChatCompletions = "/v3/chat/completions"; + const std::string endpointCompletions = "/v3/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + rapidjson::Document parsedResponse; + ovms::HttpResponseComponents responseComponents; + + static void SetUpTestSuite() { + std::string port = "9173"; + ovms::Server& server = ovms::Server::instance(); + ::SetUpServer(t, server, port, getGenericFullPathForSrcTest("/ovms/src/test/llm/config_queue.json").c_str(), 60); + } + + static void TearDownTestSuite() { + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + ON_CALL(*writer, PartialReplyBegin(::testing::_)).WillByDefault(testing::Invoke([](std::function fn) { fn(); })); + ovms::Server& server = ovms::Server::instance(); + handler = std::make_unique(server, 5); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointCompletions, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + } +}; + +std::unique_ptr LLMFlowHttpQueueGraphTest::t; + // --------------------------------------- OVMS LLM nodes tests /* @@ -249,6 +294,157 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJson) { } } +TEST_F(LLMFlowHttpQueueGraphTest, unaryCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "best_of": 16, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_FALSE(choice["logprobs"].IsObject()); + ASSERT_TRUE(choice["text"].IsString()); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "text_completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, unaryChatCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_TRUE(choice["message"].IsObject()); + ASSERT_TRUE(choice["message"]["content"].IsString()); + EXPECT_STREQ(choice["message"]["role"].GetString(), "assistant"); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "chat.completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, streamChatCompletionsQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": true, + "seed" : 1, + "max_tokens": 5, + "ignore_eos": true, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + ON_CALL(*writer, PartialReply).WillByDefault([this](std::string response) { + rapidjson::Document d; + std::string dataPrefix = "data:"; + ASSERT_STREQ(response.substr(0, dataPrefix.size()).c_str(), dataPrefix.c_str()); + size_t pos = response.find("\n"); + ASSERT_NE(pos, response.npos); + rapidjson::ParseResult parsingSucceeded = d.Parse(response.substr(dataPrefix.size(), (pos - dataPrefix.size())).c_str()); + ASSERT_EQ(parsingSucceeded.Code(), 0); + ASSERT_TRUE(d["choices"].IsArray()); + ASSERT_EQ(d["choices"].Capacity(), 1); + int i = 0; + for (auto& choice : d["choices"].GetArray()) { + if (choice["finish_reason"].IsString()) { + EXPECT_STREQ(choice["finish_reason"].GetString(), "length"); + } else { + ASSERT_TRUE(choice["finish_reason"].IsNull()); + } + ASSERT_EQ(choice["index"], i++); + ASSERT_TRUE(choice["delta"].IsObject()); + ASSERT_TRUE(choice["delta"]["content"].IsString()); + } + EXPECT_STREQ(d["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(d["object"].GetString(), "chat.completion.chunk"); + }); + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); +} + +// Test that verifies graph reuse works correctly with queue size 1 +// Sends 2 sequential requests to ensure the same graph instance is reused +TEST_F(LLMFlowHttpQueueGraphTest, queueGraphReuseTwoRequests) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + // First request + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + + // Second request - reuses the same graph from the queue + // This validates that timestamp increment works for graph reuse + response.clear(); + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + // Note: Responses may differ due to KV cache state despite same seed +} + TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoWithCompletion) { auto params = GetParam(); // TODO: In the next step we should break this suite into smaller ones, use proper configuration instead of skipping diff --git a/src/test/llm/lm_cb_regular_queue.pbtxt b/src/test/llm/lm_cb_regular_queue.pbtxt new file mode 100644 index 0000000000..60ef13f6b7 --- /dev/null +++ b/src/test/llm/lm_cb_regular_queue.pbtxt @@ -0,0 +1,47 @@ +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node { + name: "llmNode1" + calculator: "HttpLLMCalculator" + input_side_packet: "LLM_NODE_RESOURCES:llm" + input_side_packet: "LLM_NODE_EXECUTION_CONTEXTS:llm_ctx" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + node_options: { + [type.googleapis.com/mediapipe.LLMCalculatorOptions]: { + models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct" + cache_size: 1 + } + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json index 5137dbea92..d2803b795f 100644 --- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json @@ -6,4 +6,4 @@ "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt" } ] -} \ No newline at end of file +} diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json new file mode 100644 index 0000000000..ea25079556 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name": "gpt", + "graph_path": "/ovms/src/test/mediapipe/graph_gpt_with_queue.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/graph_gpt_with_queue.pbtxt b/src/test/mediapipe/graph_gpt_with_queue.pbtxt new file mode 100644 index 0000000000..43c2ef68c1 --- /dev/null +++ b/src/test/mediapipe/graph_gpt_with_queue.pbtxt @@ -0,0 +1,40 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" + +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt new file mode 100644 index 0000000000..2a5016a7fb --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..2a5016a7fb --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..01521b1c08 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,45 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp index 85abfbd519..a0bd350e83 100644 --- a/src/test/mediapipe_framework_test.cpp +++ b/src/test/mediapipe_framework_test.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,8 +30,11 @@ #include "../grpcservermodule.hpp" #include "../http_rest_api_handler.hpp" #include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../mediapipe_internal/outputstreamobserver.hpp" #include "../mediapipe_internal/mediapipefactory.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" +#include "../mediapipe_internal/mediapipe_utils.hpp" +#include "mediapipe/framework/thread_pool_executor.h" #include "../metric_config.hpp" #include "../metric_module.hpp" #include "../model_service.hpp" @@ -79,9 +83,385 @@ class MediapipeFrameworkTest : public TestWithTempDir { class MediapipeNegativeFrameworkTest : public MediapipeFrameworkTest { }; -// purpose of this test is to ensure there is no hang in case of one of the graph nodes -// not producing output packet +using mediapipe::Adopt; +using mediapipe::CalculatorGraphConfig; +using mediapipe::Packet; +using mediapipe::ParseTextProtoOrDie; +using mediapipe::Timestamp; + +#define MP_ERROR_STOP(A) \ + { \ + absStatus = A; \ + if (!absStatus.ok()) { \ + const std::string absMessage = absStatus.ToString(); \ + SPDLOG_DEBUG("{}", absMessage); \ + ASSERT_TRUE(false); \ + } \ + } +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + SPDLOG_TRACE("MyFunctor observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_TRACE("my functor:{}", (void*)this); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_TRACE("Now swap Functor, we don't have to call ObserverOutputStream"); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + // MP_ERROR_STOP(graph.AddPacketToInputStream( + // inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + // MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_standard_dummy.json")); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retrieving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + float expVal = 13.5; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + ovms::Timer<3> timer; + const std::string outputName{"output"}; + int N = 1000; + + absl::Status absStatus; + // here starts new case of ovms + { // new case of ovms + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + absStatus = graph.StartRun({}); + { + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + timer.start(0); + for (auto i = 0; i < N; ++i) { // iter begin + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + } // iter end + timer.stop(0); + } // end of new case ovms + { // current ovms case + timer.start(1); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(1); + } + { // thread pool case + // auto sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); + auto sharedThreadPool = std::make_shared(24); + timer.start(2); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + MP_ERROR_STOP(graph.SetExecutor("", sharedThreadPool)); + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(2); + } // end of thread pool case + double ms = timer.elapsed(0) / 1000; + SPDLOG_DEBUG("{} iterations of new flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(1) / 1000; + SPDLOG_DEBUG("{} iterations of old flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(2) / 1000; + SPDLOG_DEBUG("{} iterations of thread pool flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + SPDLOG_DEBUG("Threads: {}", std::thread::hardware_concurrency()); +} + TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) { + // purpose of this test is to ensure there is no hang in case of one of the graph nodes + // not producing output packet SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_no_calc_output_stream.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -99,7 +479,7 @@ TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) { } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) { - GTEST_SKIP() << "Terminate called otherwise"; + GTEST_SKIP() << "Terminate called otherwise"; // TODO FIXME check SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_process.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -116,12 +496,12 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } -TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { +TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { // TODO FIXME add checks to exception handling? SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_getcontract.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -138,9 +518,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::UNAVAILABLE) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) { @@ -161,9 +541,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) { @@ -184,8 +564,8 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) { auto status = impl.ModelInfer(nullptr, &request, &response); ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message(); } catch (std::exception& e) { - SPDLOG_ERROR("ERs"); + SPDLOG_ERROR("ER: {}", e.what()); } catch (...) { - SPDLOG_ERROR("ER"); + SPDLOG_ERROR("ER: unknown exception"); } } diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index 55b6ab96ed..19875915f7 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -232,9 +232,11 @@ class MediapipeFlowTest : public ::testing::TestWithParam { void SetUp() override { } void TearDown() { - server.setShutdownRequest(1); - t->join(); - server.setShutdownRequest(0); + if (t) { + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } } }; @@ -1724,7 +1726,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { ASSERT_EQ(it->shape_size(), 1); ASSERT_EQ(it->shape(0), stringParamValue.size()); const std::string& content = response.raw_output_contents(outputId); - SPDLOG_ERROR("Received output size:{} content:{}", content.size(), content); + SPDLOG_DEBUG("Received output size:{} content:{}", content.size(), content); EXPECT_EQ(content, stringParamValue); break; } @@ -1743,7 +1745,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { const std::string& content = response.raw_output_contents(outputId); ASSERT_EQ(content.size(), sizeof(bool)); const bool castContent = *((bool*)content.data()); - SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); EXPECT_EQ(castContent, boolParamValue); break; } @@ -1762,7 +1764,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) { const std::string& content = response.raw_output_contents(outputId); ASSERT_EQ(content.size(), sizeof(int64_t)); const int64_t castContent = *((int64_t*)content.data()); - SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); EXPECT_EQ(castContent, int64ParamValue); break; } @@ -1991,6 +1993,28 @@ TEST(Mediapipe, MetadataDummyInputTypes) { } } } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out2" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out3" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } )"; ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""}; @@ -2681,13 +2705,17 @@ class MediapipeSerialization : public ::testing::Test { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter) {} + const GraphSidePackets& sidePackets, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + nullptr, mediapipeServableMetricReporter, std::move(guard)) {} }; protected: std::unique_ptr reporter; + std::shared_ptr sidePackets; + std::shared_ptr queue; std::unique_ptr executor; ::inference::ModelInferResponse mp_response; void SetUp() { @@ -2700,9 +2728,11 @@ class MediapipeSerialization : public ::testing::Test { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; this->reporter = std::make_unique(nullptr, nullptr, ""); // disabled reporter - executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, this->reporter.get()); + sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, this->reporter.get(), std::move(guard)); } }; @@ -3099,7 +3129,7 @@ class MediapipeFlowStartTest : public TestWithTempDir { auto start = std::chrono::high_resolution_clock::now(); while (!isMpReady(waitForServable) && (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS)) { - std::this_thread::sleep_for(std::chrono::microseconds(100)); + std::this_thread::sleep_for(std::chrono::microseconds(1000)); } const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); if (!grpcModule) { @@ -4036,3 +4066,119 @@ TEST(WhitelistRegistered, MediapipeSubgraphList) { ASSERT_THAT(mediapipe::SubgraphRegistry::GetRegisteredNames(), UnorderedElementsAreArray(expected)) << readableSetError(mediapipe::SubgraphRegistry::GetRegisteredNames(), expected); } + +// --- OVMS_GRAPH_QUEUE_SIZE pbtxt directive tests --- + +// Minimal valid pbtxt that MediaPipe can parse (uses a registered test calculator) +static const char* MINIMAL_PBTXT_TEMPLATE = R"( +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} +)"; + +static std::string makePbtxtWithDirective(const std::string& directive) { + return directive + "\n" + MINIMAL_PBTXT_TEMPLATE; +} + +TEST(MediapipeGraphQueueSizeDirective, NoDirectiveMeansDisabled) { + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, MINIMAL_PBTXT_TEMPLATE); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_FALSE(mgc.getGraphQueueSize().has_value()); + // getInitialQueueSize on default mgc returns -1 + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1); +} + +TEST(MediapipeGraphQueueSizeDirective, ExplicitPositiveValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 4"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 4); +} + +TEST(MediapipeGraphQueueSizeDirective, DisabledExplicitly) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -1"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1); +} + +TEST(MediapipeGraphQueueSizeDirective, AutoValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: AUTO"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, ZeroRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 0"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, NegativeBelowMinusOneRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -2"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, ExceedsHardwareThreads) { + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads == 0) { + GTEST_SKIP() << "hardware_concurrency() returned 0, cannot test thread limit"; + } + int oversized = static_cast(maxThreads) + 1; + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: " + std::to_string(oversized)); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + // Queue size is clamped to hardware_concurrency with a warning, not rejected + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TEST(MediapipeGraphQueueSizeDirective, InvalidStringRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: INVALID"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b29bbee326..a4047680b2 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -66,10 +66,24 @@ class HfDownloaderPullHfModel : public TestWithTempDir { TestWithTempDir::TearDown(); } - // Removes # OpenVINO Model Server REPLACE_PROJECT_VERSION comment added for debug purpose in graph export at the begging of graph.pbtxt - // This string differs per build and setup - std::string removeVersionString(std::string input) { - return input.erase(0, input.find("\n") + 1); + // Removes generated graph header lines (version and optional queue size directive) + // which differ across build/runtime setup. + std::string removeGeneratedGraphHeaders(std::string input) { + auto firstLineEnd = input.find("\n"); + if (firstLineEnd == std::string::npos) { + return ""; + } + input.erase(0, firstLineEnd + 1); + + const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE:"; + if (input.rfind(queueLinePrefix, 0) == 0) { + auto secondLineEnd = input.find("\n"); + if (secondLineEnd == std::string::npos) { + return ""; + } + input.erase(0, secondLineEnd + 1); + } + return input; } }; @@ -165,7 +179,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownload) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) { @@ -189,7 +203,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(HfDownloaderPullHfModel, ModelOutOfOvOrg) { @@ -217,7 +231,7 @@ TEST_F(HfDownloaderPullHfModel, ModelOutOfOvOrg) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; std::string changePath = ovms::FileSystem::joinPath({this->directoryPath, "OpenVINO"}); std::string newPath = ovms::FileSystem::joinPath({this->directoryPath, "META"}); @@ -253,7 +267,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStartModelOutsideOvOrg) { ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents; } TEST_F(HfDownloaderPullHfModel, DownloadDraftModel) { @@ -276,7 +290,7 @@ TEST_F(HfDownloaderPullHfModel, DownloadDraftModel) { ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); std::string graphContents = GetFileContents(graphPath); - ASSERT_EQ(expectedGraphContentsDraft, removeVersionString(graphContents)) << graphContents; + ASSERT_EQ(expectedGraphContentsDraft, removeGeneratedGraphHeaders(graphContents)) << graphContents; std::string basePath2 = ovms::FileSystem::joinPath({basePath, "OpenVINO-distil-small.en-int4-ov"}); std::string modelPath2 = ovms::FileSystem::appendSlash(basePath2) + "openvino_tokenizer.bin"; diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp index 54c9acbfa1..6f9dc6bfa8 100644 --- a/src/test/pythonnode_test.cpp +++ b/src/test/pythonnode_test.cpp @@ -1002,10 +1002,12 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, + const GraphSidePackets& sidePackets, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter) {} + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {} }; TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { @@ -1014,8 +1016,10 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; - auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, getPythonBackend(), this->reporter.get()); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, getPythonBackend(), this->reporter.get(), std::move(guard)); std::string datatype = "FP32"; std::string name = "python_result"; diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 02e7c4178a..b61d8a48ef 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -70,6 +70,35 @@ class StreamingTest : public Test { } }; +class StreamingQueueTest : public StreamingTest { +protected: + std::shared_ptr queue; + + MediapipeGraphExecutor createQueueExecutor( + const ::mediapipe::CalculatorGraphConfig& config, + stream_types_mapping_t inputTypes, + stream_types_mapping_t outputTypes, + std::vector inputNames, + std::vector outputNames, + int queueSize = 1) { + auto sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, queueSize); + GraphIdGuard graphIdGuard(queue); + return MediapipeGraphExecutor{ + this->name, + this->version, + config, + std::move(inputTypes), + std::move(outputTypes), + std::move(inputNames), + std::move(outputNames), + *sidePackets, + nullptr, + this->reporter.get(), + std::move(graphIdGuard)}; + } +}; + #if (PYTHON_DISABLE == 0) class PythonStreamingTest : public StreamingTest { protected: @@ -359,7 +388,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::KFS_REQUEST}}, {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -416,7 +445,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); // no timestamp specified, server will assign one @@ -559,7 +588,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3); // first request with timestamp 3 @@ -604,7 +633,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock only 1 request and disconnect immediately prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -621,6 +650,184 @@ node { ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); } +TEST_F(StreamingQueueTest, SingleStreamSend3Receive3AutomaticTimestamp) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Receive({{"in", 7.2f}})) + .WillOnce(Receive({{"in", 102.4f}})) + .WillOnce(Disconnect()); + + auto timestamp = std::make_shared(-1); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 4.5f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 8.2f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 103.4f}}, timestamp)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, SingleStreamSend1Receive3) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOne3CycleIterationsTestCalculator" + input_stream: "in" + input_stream: "signal" + input_stream_info: { + tag_index: ':1', + back_edge: true + } + input_stream_handler { + input_stream_handler: 'ImmediateInputStreamHandler' + } + output_stream: "out" + output_stream: "signal" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithTimestamp({{"out", 4.5f}}, 1)) + .WillOnce(SendWithTimestamp({{"out", 5.5f}}, 2)) + .WillOnce(SendWithTimestamp({{"out", 6.5f}}, 3)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ExitOnDisconnectionDuringRead) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareRequest(this->firstRequest, {}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)).Times(0); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ErrorOnDisconnectionDuringWrite) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(DisconnectOnWriteAndNotifyEnd(signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_EXECUTION_ERROR); +} + +TEST_F(StreamingQueueTest, ErrorDuringFirstRequestDeserialization) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 1); + + prepareInvalidRequest(this->firstRequest, {"in"}); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendErrorAndNotifyEnd( + Status(StatusCode::INVALID_CONTENT_SIZE).string() + std::string{" - Expected: 4 bytes; Actual: 0 bytes; input name: in; partial deserialization of first request"}, + signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + // PYTHON CALCULATOR CASES #if (PYTHON_DISABLE == 0) @@ -1230,7 +1437,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1282,7 +1489,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1317,7 +1524,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1351,7 +1558,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) + {"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) EXPECT_CALL(this->stream, Read(_)).Times(0); EXPECT_CALL(this->stream, Write(_, _)).Times(0); @@ -1376,7 +1583,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -1404,7 +1611,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1440,7 +1647,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); @@ -1463,7 +1670,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Invalid request - missing data in buffer prepareInvalidRequest(this->firstRequest, {"in"}); // no timestamp specified, server will assign one @@ -1498,7 +1705,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise[3]; std::future signalFuture[3] = { @@ -1545,7 +1752,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0); EXPECT_CALL(this->stream, Read(_)) @@ -1573,7 +1780,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); setRequestTimestamp(this->firstRequest, std::string("not an int")); @@ -1608,7 +1815,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Timestamps not allowed in stream // Expect continuity of operation and response with error message @@ -1650,7 +1857,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Allowed in stream for (auto timestamp : std::vector<::mediapipe::Timestamp>{ @@ -1686,7 +1893,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65}); // request with parameter val @@ -1723,7 +1930,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving the invalid request and disconnection // Request with invalid param py (special pythons session side packet) @@ -1752,7 +1959,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); // missing required request param EXPECT_CALL(this->stream, Read(_)).Times(0); @@ -1778,7 +1985,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 2 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version); // no timestamp specified, server will assign one @@ -1812,7 +2019,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp index ccbdd60758..740e8af1d1 100644 --- a/src/test/stress_test_utils.hpp +++ b/src/test/stress_test_utils.hpp @@ -50,6 +50,7 @@ #include "../server.hpp" #include "../status.hpp" #include "../stringutils.hpp" +#include "src/timer.hpp" #include "../tfs_frontend/tfs_utils.hpp" #include "c_api_test_utils.hpp" #include "test_utils.hpp" @@ -1067,7 +1068,99 @@ static const std::string basicMediapipeConfigWithNewGraphPath = R"({ "mediapipe_config_list": [ { "name":"pipeline1Dummy", - "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt" + } + ] +})"; + +const std::string basicMediapipeQueueConfig = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithAddedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + }, + { + "name":"pipeline2Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedModel = R"({ + "model_config_list": [ + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithReloadedModel = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "nireq": 47 + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithNewGraphPath = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt" } ] })"; @@ -1094,9 +1187,16 @@ static void mediacreate(std::unique_ptr& executorPtr, ov sc = static_cast(code); \ } +enum StressTimerSlot : unsigned int { + STRESS_LOOP, + CREATE, + EXECUTE, + TIMER_END +}; + class ConfigChangeStressTest : public TestWithTempDir { protected: - const uint32_t loadThreadCount = 20; + const uint32_t loadThreadCount = 16; const uint32_t beforeConfigChangeLoadTimeMs = 30; const uint32_t afterConfigChangeLoadTimeMs = 50; const int stressIterationsLimit = 10000; @@ -1291,6 +1391,12 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void addNewMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithAddedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void removeMediapipeGraph() { SPDLOG_INFO("{} start", __FUNCTION__); SetUpConfig(basicMediapipeConfigWithRemovedGraph); @@ -1315,6 +1421,30 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void removeMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void removeMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithReloadedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithNewGraphPath); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) { ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n" << metricOutput; @@ -1706,6 +1836,8 @@ class ConfigChangeStressTest : public TestWithTempDir { auto stressIterationsCounter = stressIterationsLimit; bool breakLoop = false; while (stressIterationsCounter-- > 0) { + ovms::Timer timer; + timer.start(STRESS_LOOP); auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0)); if (true == breakLoop) { SPDLOG_INFO("Ending Load"); @@ -1725,6 +1857,7 @@ class ConfigChangeStressTest : public TestWithTempDir { RequestType request2; RequestType request = preparePipelinePredictRequest(request2); ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(CREATE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { createPipelineStatus = this->manager->createPipeline(pipelinePtr, pipelineName, &request, &response); #if (MEDIAPIPE_DISABLE == 0) @@ -1732,6 +1865,8 @@ class ConfigChangeStressTest : public TestWithTempDir { mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus); #endif } + timer.stop(CREATE); + SPDLOG_TRACE("XYZ creation time: {} us", timer.elapsed(CREATE)); // we need to make sure that expected status happened and still accept // some that could happen but we may not hit them EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) || @@ -1743,6 +1878,7 @@ class ConfigChangeStressTest : public TestWithTempDir { } ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(EXECUTE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext( ovms::ExecutionContext::Interface::GRPC, @@ -1752,6 +1888,7 @@ class ConfigChangeStressTest : public TestWithTempDir { mediaexec(executorPtr, *(this->manager), request, response, executePipelineStatus); #endif } + timer.stop(EXECUTE); createPipelineRetCodesCounters[executePipelineStatus.getCode()]++; EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) || (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end())) @@ -1763,6 +1900,7 @@ class ConfigChangeStressTest : public TestWithTempDir { SPDLOG_INFO("Earlier fail detected. Stopping execution"); break; } + timer.stop(STRESS_LOOP); } for (auto& [retCode, counter] : createPipelineRetCodesCounters) { if (counter > 0) { diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 879ab1313e..8a1e7dfd19 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -816,8 +816,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { std::string inputConfig; #if (PYTHON_DISABLE == 0) ovms::PythonNodeResources* getPythonNodeResources(const std::string& nodeName) { - auto it = this->sidePacketMaps.pythonNodeResourcesMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.pythonNodeResourcesMap)) { + auto it = this->sidePacketMaps->pythonNodeResourcesMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->pythonNodeResourcesMap)) { return nullptr; } else { return it->second.get(); @@ -826,8 +826,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { #endif ovms::GenAiServable* getGenAiServable(const std::string& nodeName) { - auto it = this->sidePacketMaps.genAiServableMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.genAiServableMap)) { + auto it = this->sidePacketMaps->genAiServableMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->genAiServableMap)) { return nullptr; } else { return it->second.get(); @@ -838,13 +838,15 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { return this->validateForConfigLoadableness(); } - ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps.genAiServableMap; } + ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps->genAiServableMap; } DummyMediapipeGraphDefinition(const std::string name, const ovms::MediapipeGraphConfig& config, std::string inputConfig, ovms::PythonBackend* pythonBackend = nullptr) : - ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; } + ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { + this->inputConfig = inputConfig; + } // Do not read from path - use predefined config contents ovms::Status validateForConfigFileExistence() override {