diff --git a/.gitignore b/.gitignore index 3ca5fb2..d4a4bbd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ # project .idea +.envrc +docs +model models src/models -.envrc # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/actions/actions.py b/actions/actions.py index 517717e..35e6a46 100644 --- a/actions/actions.py +++ b/actions/actions.py @@ -16,6 +16,8 @@ from transformers import AutoTokenizer import src.config as cf from src.preprocessing import load_context_for_inference +import requests +import json class ActionCovid(Action): @@ -26,26 +28,30 @@ def name(self) -> Text: def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]: - question = tracker.latest_message['text'] text = load_context_for_inference("src/data/COVID-QA.json") - tokenizer = AutoTokenizer.from_pretrained(cf.setting["model_checkpoint"]) - model = TFAutoModelForQuestionAnswering.from_pretrained(cf.setting["model_checkpoint"]) + tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") + # model = TFAutoModelForQuestionAnswering.from_pretrained(cf.setting["model_checkpoint"]) + + inputs = tokenizer(question, text, add_special_tokens=True, truncation=True) + input_ids = inputs["input_ids"] + + batch = [dict(inputs)] - inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="tf", truncation=True) - input_ids = inputs["input_ids"].numpy()[0] + input_data = {'instances': batch} + r = requests.post('http://localhost:8501/v1/models/bert:predict', data=json.dumps(input_data)) + output = json.loads(r.text)['predictions'][0] - output = model(inputs) - answer_start = tf.argmax( - output.start_logits, axis=1 - ).numpy()[0] # Get the most likely beginning of answer with the argmax of the score - answer_end = ( - tf.argmax(output.end_logits, axis=1) + 1 - ).numpy()[0] # Get the most likely end of answer with the argmax of the score + # output = model(inputs) + answer_start = tf.argmax([output['start_logits']], axis=1).numpy()[ + 0] # Get the most likely beginning of answer with the argmax of the score + answer_end = (tf.argmax([output['end_logits']], axis=1) + 1).numpy()[ + 0] # Get the most likely end of answer with the argmax of the score answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])) - start = answer.index("SEP") + 4 - answer = answer[start:-5] + start = answer.index("SEP") + answer = answer[start:] + answer = answer.replace("SEP", "") dispatcher.utter_message(text=f"{answer}") return [] diff --git a/run_server.sh b/run_server.sh new file mode 100755 index 0000000..661f78d --- /dev/null +++ b/run_server.sh @@ -0,0 +1,12 @@ +echo -e "Starting base serving daemon ..." +docker pull tensorflow/serving +docker run -d --name serving_base tensorflow/serving + +echo -e "Exporting model as SavedModel object ..." +python3 serving/save_model.py +docker cp model/saved_model serving_base:/models/bert + +echo -e "Starting new serving container ..." +docker commit --change "ENV MODEL_NAME bert" serving_base qa_bert +docker kill serving_base +docker run -d -p 8501:8501 -p 8500:8500 --name bert qa_bert diff --git a/serving/save_model.py b/serving/save_model.py new file mode 100644 index 0000000..c379603 --- /dev/null +++ b/serving/save_model.py @@ -0,0 +1,17 @@ +import os +import logging + +from transformers import TFAutoModelForQuestionAnswering + + +def create_saved_model() -> None: + if not os.path.exists('model'): + model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") + model.save_pretrained('model', saved_model=True) + logging.info('SavedModel saved to model/ directory') + else: + logging.info('model/ directory already exists; no new SavedModel objects saved') + + +if __name__ == "__main__": + create_saved_model() \ No newline at end of file