Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 0 additions & 94 deletions demos/common/export_models/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,6 @@ def add_common_arguments(parser):
parser_embeddings_ov.add_argument('--truncate', default=False, action='store_true', help='Truncate the prompts to fit to the embeddings model', dest='truncate')
parser_embeddings_ov.add_argument('--num_streams', default=1,type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')

parser_rerank = subparsers.add_parser('rerank', help='[deprecated] export model for rerank endpoint with models split into separate, versioned directories')
add_common_arguments(parser_rerank)
parser_rerank.add_argument('--num_streams', default=1, type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')
parser_rerank.add_argument('--max_doc_length', default=16000, type=int, help='Maximum length of input documents in tokens', dest='max_doc_length')
parser_rerank.add_argument('--version', default="1", help='version of the model', dest='version')

parser_rerank_ov = subparsers.add_parser('rerank_ov', help='export model for rerank endpoint with directory structure aligned with OpenVINO tools')
add_common_arguments(parser_rerank_ov)
parser_rerank_ov.add_argument('--num_streams', default=1, type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')
Expand Down Expand Up @@ -190,34 +184,6 @@ def add_common_arguments(parser):
}
"""

rerank_graph_template = """input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
node {
calculator: "OpenVINOModelServerSessionCalculator"
output_side_packet: "SESSION:tokenizer"
node_options: {
[type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
servable_name: "{{model_name}}_tokenizer_model"
}
}
}
node {
calculator: "OpenVINOModelServerSessionCalculator"
output_side_packet: "SESSION:rerank"
node_options: {
[type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
servable_name: "{{model_name}}_rerank_model"
}
}
}
node {
input_side_packet: "TOKENIZER_SESSION:tokenizer"
input_side_packet: "RERANK_SESSION:rerank"
calculator: "RerankCalculator"
input_stream: "REQUEST_PAYLOAD:input"
output_stream: "RESPONSE_PAYLOAD:output"
}
"""

text_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"
Expand Down Expand Up @@ -273,24 +239,6 @@ def add_common_arguments(parser):
}
}"""

rerank_subconfig_template = """{
"model_config_list": [
{ "config":
{
"name": "{{model_name}}_tokenizer_model",
"base_path": "tokenizer"
}
},
{ "config":
{
"name": "{{model_name}}_rerank_model",
"base_path": "rerank",
"target_device": "{{target_device|default("CPU", true)}}",
"plugin_config": { "NUM_STREAMS": "{{num_streams|default(1, true)}}" }
}
}
]
}"""

image_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
output_stream: "HTTP_RESPONSE_PAYLOAD:output"
Expand Down Expand Up @@ -558,46 +506,6 @@ def export_rerank_model_ov(model_repository_path, source_model, model_name, prec
print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))

def export_rerank_model(model_repository_path, source_model, model_name, precision, task_parameters, version, config_file_path, max_doc_length):
if os.path.isfile(os.path.join(model_name, 'openvino_model.xml')):
print("OV model is source folder. Skipping conversion.")
os.makedirs(os.path.join(model_repository_path, model_name, 'rerank', version), exist_ok=True)
os.makedirs(os.path.join(model_repository_path, model_name, 'tokenizer', version), exist_ok=True)
shutil.move(os.path.join(model_repository_path, model_name, 'openvino_tokenizer.xml'), os.path.join(model_repository_path, model_name, 'tokenizer', version, 'model.xml'))
shutil.move(os.path.join(model_repository_path, model_name, 'openvino_tokenizer.bin'), os.path.join(model_repository_path, model_name, 'tokenizer', version, 'model.bin'))
shutil.move(os.path.join(model_repository_path, model_name, 'openvino_model.xml'), os.path.join(model_repository_path, model_name, 'rerank', version, 'model.xml'))
shutil.move(os.path.join(model_repository_path, model_name, 'openvino_model.bin'), os.path.join(model_repository_path, model_name, 'rerank', version, 'model.bin'))
else: # assume HF model name
with tempfile.TemporaryDirectory() as tmpdirname:
embeddings_path = os.path.join(model_repository_path, model_name, 'rerank', version)
print("Exporting rerank model to ",embeddings_path)
if not os.path.isdir(embeddings_path) or args['overwrite_models']:
optimum_command = "optimum-cli export openvino --disable-convert-tokenizer --model {} --task text-classification --weight-format {} {} --trust-remote-code {}".format(source_model, precision, task_parameters['extra_quantization_params'], tmpdirname)
if os.system(optimum_command):
raise ValueError("Failed to export rerank model", source_model)
set_rt_info(tmpdirname, 'openvino_model.xml', 'config.json')
os.makedirs(embeddings_path, exist_ok=True)
shutil.move(os.path.join(tmpdirname, 'openvino_model.xml'), os.path.join(embeddings_path, 'model.xml'))
shutil.move(os.path.join(tmpdirname, 'openvino_model.bin'), os.path.join(embeddings_path, 'model.bin'))
tokenizer_path = os.path.join(model_repository_path, model_name,'tokenizer', version)
print("Exporting tokenizer to ",tokenizer_path)
if not os.path.isdir(tokenizer_path) or args['overwrite_models']:
export_rerank_tokenizer(source_model, tmpdirname, max_doc_length)
set_rt_info(tmpdirname, 'openvino_tokenizer.xml', 'tokenizer_config.json')
os.makedirs(tokenizer_path, exist_ok=True)
shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.xml'), os.path.join(tokenizer_path, 'model.xml'))
shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.bin'), os.path.join(tokenizer_path, 'model.bin'))
gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_template)
graph_content = gtemplate.render(model_name=model_name, **task_parameters)
with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
f.write(graph_content)
print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
stemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_subconfig_template)
subconfig_content = stemplate.render(model_name=model_name, **task_parameters)
with open(os.path.join(model_repository_path, model_name, 'subconfig.json'), 'w') as f:
f.write(subconfig_content)
print("Created subconfig {}".format(os.path.join(model_repository_path, model_name, 'subconfig.json')))
add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))


def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams):
Expand Down Expand Up @@ -670,8 +578,6 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
elif args['task'] == 'embeddings_ov':
export_embeddings_model_ov(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['truncate'])

elif args['task'] == 'rerank':
export_rerank_model(args['model_repository_path'], args['source_model'], args['model_name'] ,args['precision'], template_parameters, str(args['version']), args['config_file_path'], args['max_doc_length'])

elif args['task'] == 'rerank_ov':
export_rerank_model_ov(args['model_repository_path'], args['source_model'], args['model_name'] ,args['precision'], template_parameters, args['config_file_path'], args['max_doc_length'])
Expand Down
1 change: 0 additions & 1 deletion src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,6 @@ ovms_cc_library(
"//src/image_gen:imagegen_init",
"//src/llm:openai_completions_api_handler",
"//src/embeddings:embeddingscalculator_ov",
"//src/rerank:rerankcalculator",
"//src/rerank:rerankcalculator_ov",
"//src/llm:llmcalculator",],
}) + select({
Expand Down
28 changes: 1 addition & 27 deletions src/rerank/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,7 @@
load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
load("//:common_settings.bzl", "ovms_cc_library")

mediapipe_proto_library(
name = "rerank_calculator_proto", # rerank_calculator_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
srcs = ["rerank_calculator.proto"],
visibility = ["//visibility:private"],
deps = [
"@mediapipe//mediapipe/framework:calculator_options_proto",
"@mediapipe//mediapipe/framework:calculator_proto",
],
)


ovms_cc_library(
name = "rerank_servable",
Expand All @@ -45,24 +37,6 @@ mediapipe_proto_library(
],
)

ovms_cc_library(
name = "rerankcalculator",
srcs = ["rerank_calculator.cc"],
deps = [
"@mediapipe//mediapipe/framework:calculator_framework",
"@com_github_tencent_rapidjson//:rapidjson",
"@model_api//:model_api",
"//src:httppayload",
"//src:libhttpclientconnection",
"//src:libovmslogging",
"//src:libovmsprofiler",
"rerank_calculator_cc_proto",
":rerank_api_handler",
],
visibility = ["//visibility:public"],
alwayslink = 1,
)

ovms_cc_library(
name = "rerankcalculator_ov",
srcs = ["rerank_calculator_ov.cc"],
Expand Down
Loading
Loading