From 9603a600f4ad51d421ed7be871cc4b474fa51838 Mon Sep 17 00:00:00 2001 From: Ann-He2026 Date: Thu, 12 Mar 2026 11:22:33 +0100 Subject: [PATCH 1/6] Relation-Extraction --- duui-relation-extraction/Dockerfile | 23 ++++++++++ duui-relation-extraction/Readme.md | 16 +++++++ duui-relation-extraction/requirements.txt | 7 +++ .../src/main/python/TypSystemRelation.xml | 28 ++++++++++++ .../src/tests/Rebel_Relation_Extractor/README | 24 ++++++++++ .../Rebel_Relation_Extractor/test_docker.sh | 44 +++++++++++++++++++ .../Rebel_Relation_Extractor/test_text.json | 8 ++++ 7 files changed, 150 insertions(+) create mode 100644 duui-relation-extraction/Dockerfile create mode 100644 duui-relation-extraction/Readme.md create mode 100644 duui-relation-extraction/requirements.txt create mode 100644 duui-relation-extraction/src/main/python/TypSystemRelation.xml create mode 100644 duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README create mode 100644 duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh create mode 100644 duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json diff --git a/duui-relation-extraction/Dockerfile b/duui-relation-extraction/Dockerfile new file mode 100644 index 00000000..d23599eb --- /dev/null +++ b/duui-relation-extraction/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.10 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY ./src/main/python/TypSystemRelation.xml ./TypSystemRelation.xml +COPY ./src/main/python/duui_relation_extraction.py ./duui_relation_extraction.py +COPY ./src/main/python/duui_relation_extraction.lua ./duui_relation_extraction.lua +COPY ./src/main/python/relation_extraction.py ./relation_extraction.py + +COPY ./src/main/python/offline_models/rebel_large ./offline_models/rebel_large +COPY ./src/main/python/offline_models/knowgl-large ./offline_models/knowgl-large + +ARG MODEL_NAME="rebel_large" +ENV MODEL_NAME=$MODEL_NAME + +EXPOSE 8000 + +ENV DEVICE=cpu + +CMD ["uvicorn", "duui_relation_extraction:duui_relation_extraction", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/duui-relation-extraction/Readme.md b/duui-relation-extraction/Readme.md new file mode 100644 index 00000000..cf69905d --- /dev/null +++ b/duui-relation-extraction/Readme.md @@ -0,0 +1,16 @@ +# Relation Extraction + +DUUI implementation for selected Hugging-Face-based transformer [relation-extraction tools](https://huggingface.co/models?other=relation-extraction) models. + +## Included Models + +| Name | link | Revision | Languages | +|----------------------------------------|----------------------------------------------------------------------------------|-------------------------------------------|-------------------| +| Babelscape/rebel-large | https://huggingface.co/Babelscape/rebel-large | 44eb6cb4585df284ce6c4d6a7013f83fe473c052 | Multilingual | +| ibm-research/knowgl-large | https://huggingface.co/ibm-research/knowgl-large | 94596fd9f697498f7ee7363dbf4cc66f08d499e8 | Multilingual | + +## Execution + +1. Im Ordner 'models' das gewünschte Modell runterladen, da der Container nicht mit dem Internet verbunden ist. +2. Docker bauen: docker build -t [IMAGE_NAME] . +3. Docker starten: docker run -p 8000:8000 -e MODEL_NAME=[MODEL_NAME] [IMAGE_NAME] \ No newline at end of file diff --git a/duui-relation-extraction/requirements.txt b/duui-relation-extraction/requirements.txt new file mode 100644 index 00000000..972cc78b --- /dev/null +++ b/duui-relation-extraction/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn +torch +transformers +pydantic +pydantic-settings +sentencepiece \ No newline at end of file diff --git a/duui-relation-extraction/src/main/python/TypSystemRelation.xml b/duui-relation-extraction/src/main/python/TypSystemRelation.xml new file mode 100644 index 00000000..5a25a7a1 --- /dev/null +++ b/duui-relation-extraction/src/main/python/TypSystemRelation.xml @@ -0,0 +1,28 @@ + + + + + Relation + uima.tcas.Annotation + Relation extracted by REBEL + + + subject + uima.cas.String + + + predicate + uima.cas.String + + + object + uima.cas.String + + + confidence + uima.cas.Float + + + + + \ No newline at end of file diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README new file mode 100644 index 00000000..b7a9e4bb --- /dev/null +++ b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README @@ -0,0 +1,24 @@ +# Docker-Test für das Modell https://huggingface.co/Babelscape/rebel-large + +## Ziel +Überprüfung, od das Modell 'rebel-large' korrekt im Docker-Container offline läuft. +Offline, da im Online-Modus beim Bauen des Docker Images es immer zu Fehlern beim Download kam. + +## Testaufbau +- Modell: Babelscape/rebel-large +- Docker-Endpunkt: http://localhost:8000/v1/process +- 6 Sätze, die auf ihre Relation im Satz untersucht werden. +- Skript: test_docker.sh +- Testausgabe: predicted sentence + Triplets + +## Durchführung +1. Modell durch das Pythonskript rebel_large.py in den Ordner laden +2. Docker Image bauen : docker build -t relation:latest . +2.1 Man muss sich im Ordner duui-trelation-extraction befinden. +2.2 Das Dockerfile muss direkt in dem Ordner sein und nicht im Unterordner docker, sonst werden die Pfade nicht gefunden. +3. Docker Image starten: docker run -p 8000:8000 \ + -e MODEL_NAME=Babelscape/rebel-large \ + relation:latest +4. Testskript ausführen + +Das Modell ibm-research/knowgl-large kann auf die gleiche Weise getestet werden. diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh new file mode 100644 index 00000000..c92944ed --- /dev/null +++ b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# URL des Docker-Endpunkts +URL="http://localhost:8000/v1/process" + +# Datei mit Testtexten +TEST_FILE="test_text.json" + +echo "Starte Tests" + +# Schleife über alle Testtexte +jq -c '.[]' "$TEST_FILE" | while read t; do + raw_text=$(echo "$t" | jq -r '.text') + lang=$(echo "$t" | jq -r '.lang') + + # Wichtig: Prefix "sentence:" für REBEL + text="sentence: $raw_text" + + # JSON für POST-Request bauen + payload=$(jq -n --arg txt "$text" --arg l "$lang" '{ + doc_len: 1, + lang: $l, + selections: [ + { + selection: "1", + sentences: [ + { text: $txt, begin: 0, end: ($txt | length) } + ] + } + ] + }') + + # Anfrage an Docker-Endpunkt + response=$(curl -s -X POST "$URL" -H "Content-Type: application/json" -d "$payload") + + # Triplets ausgeben + relations=$(echo "$response" | jq '.relations[0][0]') + + echo "Text: $raw_text" + echo "Triplets: $relations" + echo "-----------------------------" +done + +echo "Alle Tests abgeschlossen." \ No newline at end of file diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json new file mode 100644 index 00000000..79e47e15 --- /dev/null +++ b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json @@ -0,0 +1,8 @@ +[ + {"text": "Angela Merkel was born in Hamburg.", "lang": "en"}, + {"text": "Barack Obama was the 44th president of the USA.", "lang": "en"}, + {"text": "Elon Musk founded SpaceX in 2002.", "lang": "en"}, + {"text": "Der Eiffelturm befindet sich in Paris.", "lang": "de"}, + {"text": "Angela Merkel wurde 1954 geboren.", "lang": "de"}, + {"text": "Barack Obama war der 44. Präsident der USA.", "lang": "de"} +] \ No newline at end of file From c3a9abf4597165f85c9367e943db89161dd0d6bd Mon Sep 17 00:00:00 2001 From: Ann-He2026 Date: Thu, 12 Mar 2026 11:24:37 +0100 Subject: [PATCH 2/6] New Emotion --- duui-transformers-Emotion/Dockerfile | 110 ++++++++++++++++ duui-transformers-Emotion/Dockerfile-cuda | 117 ++++++++++++++++++ duui-transformers-Emotion/Readme.md | 2 + duui-transformers-Emotion/docker_build.sh | 7 ++ duui-transformers-Emotion/pom.xml | 8 +- .../src/main/docker/Dockerfile | 10 +- .../src/main/docker/Dockerfile-cuda | 4 +- .../transformers/emotion/EmotionTest.java | 44 +++++++ .../README | 22 ++++ ...rt_tiny2_russian_emotion_datection_cedr.py | 21 ++++ .../test_docker.sh | 38 ++++++ .../test_text.json | 11 ++ 12 files changed, 387 insertions(+), 7 deletions(-) create mode 100644 duui-transformers-Emotion/Dockerfile create mode 100644 duui-transformers-Emotion/Dockerfile-cuda create mode 100644 duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README create mode 100644 duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/rubert_tiny2_russian_emotion_datection_cedr.py create mode 100644 duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh create mode 100644 duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_text.json diff --git a/duui-transformers-Emotion/Dockerfile b/duui-transformers-Emotion/Dockerfile new file mode 100644 index 00000000..a963ad8b --- /dev/null +++ b/duui-transformers-Emotion/Dockerfile @@ -0,0 +1,110 @@ +FROM python:3.10 + +WORKDIR /usr/src/app + +EXPOSE 9714 + +# dependencies +RUN pip install setuptools wheel +COPY ./requirements.txt ./requirements.txt +RUN pip install -r requirements.txt + +RUN #python -m spacy download en_core_web_lg + +RUN #python -c "import nltk; nltk.download('all', download_dir='nltk_data')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='02shanky/finetuned-twitter-xlm-roberta-base-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/xlm-emo-t')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='michellejieli/emotion_text_classifier')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/bertweet-base-emotion-analysis')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ActivationAI/distilbert-base-uncased-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='SamLowe/roberta-base-go_emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='mrm8488/t5-base-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline(model='microsoft/mdeberta-v3-base')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='pranaydeeps/EXALT-Baseline')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='boltuix/bert-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/feel-it-italian-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion-multilabel-latest')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/beto-emotion-analysis')" +#RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('poltextlab/xlm-roberta-large-pooled-MORES', token=''); AutoTokenizer.from_pretrained('lytang/MiniCheck-Flan-T5-Large')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='daveni/twitter-xlm-roberta-emotion-es')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ChrisLalk/German-Emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='msgfrom96/xlm_emo_multi')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cointegrated/rubert-tiny2-cedr-emotion-detection')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Aniemore/rubert-tiny2-russian-emotion-detection')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion-Small')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AnasAlokla/multilingual_go_emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Zoopa/emotion-classification-model')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='esuriddick/distilbert-base-uncased-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Panda0116/emotion-classification-model')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='lordtt13/emo-mobilebert')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" +#RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', \ + model='visolex/phobert-emotion')" + +# copy scripts +COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml +COPY ./src/main/python/Emo_mDeBERTa2.py ./Emo_mDeBERTa2.py +COPY ./src/main/python/EmotionDetection.py ./EmotionDetection.py +COPY ./src/main/python/duui_transformers_emotion.py ./duui_transformers_emotion.py +COPY ./src/main/python/duui_emotion.lua ./duui_emotion.lua +COPY ./src/main/python/script.py ./script.py +COPY ./src/main/python/utils.py ./utils.py + +#Für das russische Modell, was zuvor über das Python Skript runtergeladen werden muss +COPY ./src/main/python/offline_models/rubert_tiny2_russian ./offline_models/rubert_tiny2_russian + + +#COPY ./src/main/python/UniversalJoy/models-multilingual/small-all.pt ./UniversalJoy/models-multilingual/small-all.pt +#COPY ./src/main/python/UniversalJoy/models-multilingual/large-all.pt ./UniversalJoy/models-multilingual/large-all.pt +#COPY ./src/main/python/UniversalJoy/models-multilingual/combi-all.pt ./UniversalJoy/models-multilingual/combi-all.pt +#COPY ./src/main/python/UniversalJoy/models-huge-english/huge-en.pt ./UniversalJoy/models-huge-english/huge-en.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-en.pt ./UniversalJoy/models-large-monolingual/large-en.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-es.pt ./UniversalJoy/models-large-monolingual/large-es.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-pt.pt ./UniversalJoy/models-large-monolingual/large-pt.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-en.pt ./UniversalJoy/models-small-monolingual/small-en.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-es.pt ./UniversalJoy/models-small-monolingual/small-es.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-pt.pt ./UniversalJoy/models-small-monolingual/small-pt.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-tl.pt ./UniversalJoy/models-small-monolingual/small-tl.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-zh.pt ./UniversalJoy/models-small-monolingual/small-zh.pt +#COPY ./src/main/python/nltk_data/ ./nltk_data/ +#COPY ./src/main/python/pol_emo_mDeBERTa/ ./pol_emo_mDeBERTa/ + +# log level +ARG LOG_LEVEL="DEBUG" +ENV LOG_LEVEL=$LOG_LEVEL + +# config +ARG MODEL_CACHE_SIZE=3 +ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE + +# meta data +ARG ANNOTATOR_NAME="duui-transformers-emotion" +ENV ANNOTATOR_NAME=$ANNOTATOR_NAME +ARG ANNOTATOR_VERSION="unset" +ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION + +# Model Info +ARG MODEL_VERSION=0.1 +ENV MODEL_VERSION=$MODEL_VERSION +ARG MODEL_NAME="" +ENV MODEL_NAME=$MODEL_NAME +ARG MODEL_SOURCE="" +ENV MODEL_SOURCE=$MODEL_SOURCE +ARG MODEL_LANG="" +ENV MODEL_LANG=$MODEL_LANG + +# offline mode for huggingface +ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1 +ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE + + + + +ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] +CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/Dockerfile-cuda b/duui-transformers-Emotion/Dockerfile-cuda new file mode 100644 index 00000000..9aceb468 --- /dev/null +++ b/duui-transformers-Emotion/Dockerfile-cuda @@ -0,0 +1,117 @@ +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 + +RUN apt update && \ + DEBIAN_FRONTEND=noninteractive \ + apt install --no-install-recommends -y build-essential software-properties-common && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt install --no-install-recommends -y python3.10 python3-pip python3-setuptools python3-distutils && \ + apt clean && rm -rf /var/lib/apt/lists/* + +RUN ln -s /usr/bin/python3 /usr/bin/python +RUN python -m pip install --upgrade pip + +WORKDIR /usr/src/app + +EXPOSE 9714 + +# dependencies +RUN pip install setuptools wheel +COPY ./requirements.txt ./requirements.txt +RUN apt remove -y python3-blinker || true +RUN pip install -r requirements.txt + +#RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl + + +#RUN python -c "import nltk; nltk.download('all', download_dir='nltk_data')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='02shanky/finetuned-twitter-xlm-roberta-base-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/xlm-emo-t')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='michellejieli/emotion_text_classifier')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/bertweet-base-emotion-analysis')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ActivationAI/distilbert-base-uncased-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='SamLowe/roberta-base-go_emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='mrm8488/t5-base-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline(model='microsoft/mdeberta-v3-base')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='pranaydeeps/EXALT-Baseline')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='boltuix/bert-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/feel-it-italian-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion-multilabel-latest')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/beto-emotion-analysis')" +#RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('poltextlab/xlm-roberta-large-pooled-MORES', token=''); AutoTokenizer.from_pretrained('lytang/MiniCheck-Flan-T5-Large')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='daveni/twitter-xlm-roberta-emotion-es')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ChrisLalk/German-Emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='msgfrom96/xlm_emo_multi')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cointegrated/rubert-tiny2-cedr-emotion-detection')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Aniemore/rubert-tiny2-russian-emotion-detection')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion-Small')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AnasAlokla/multilingual_go_emotions')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Zoopa/emotion-classification-model')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='esuriddick/distilbert-base-uncased-finetuned-emotion')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Panda0116/emotion-classification-model')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='lordtt13/emo-mobilebert')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" +#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" +#RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', \ + model='visolex/phobert-emotion')" + +# copy scripts +COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml +COPY ./src/main/python/Emo_mDeBERTa2.py ./Emo_mDeBERTa2.py +COPY ./src/main/python/EmotionDetection.py ./EmotionDetection.py +COPY ./src/main/python/duui_transformers_emotion.py ./duui_transformers_emotion.py +COPY ./src/main/python/duui_emotion.lua ./duui_emotion.lua +COPY ./src/main/python/script.py ./script.py +COPY ./src/main/python/utils.py ./utils.py +#COPY ./src/main/python/UniversalJoy/models-multilingual/small-all.pt ./UniversalJoy/models-multilingual/small-all.pt +#COPY ./src/main/python/UniversalJoy/models-multilingual/large-all.pt ./UniversalJoy/models-multilingual/large-all.pt +#COPY ./src/main/python/UniversalJoy/models-multilingual/combi-all.pt ./UniversalJoy/models-multilingual/combi-all.pt +#COPY ./src/main/python/UniversalJoy/models-huge-english/huge-en.pt ./UniversalJoy/models-huge-english/huge-en.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-en.pt ./UniversalJoy/models-large-monolingual/large-en.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-es.pt ./UniversalJoy/models-large-monolingual/large-es.pt +#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-pt.pt ./UniversalJoy/models-large-monolingual/large-pt.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-en.pt ./UniversalJoy/models-small-monolingual/small-en.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-es.pt ./UniversalJoy/models-small-monolingual/small-es.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-pt.pt ./UniversalJoy/models-small-monolingual/small-pt.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-tl.pt ./UniversalJoy/models-small-monolingual/small-tl.pt +#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-zh.pt ./UniversalJoy/models-small-monolingual/small-zh.pt +#COPY ./src/main/python/nltk_data/ ./nltk_data/ +#COPY ./src/main/python/pol_emo_mDeBERTa/ ./pol_emo_mDeBERTa/ + +# log level +ARG LOG_LEVEL="DEBUG" +ENV LOG_LEVEL=$LOG_LEVEL + +# config +ARG MODEL_CACHE_SIZE=3 +ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE + +# meta data +ARG ANNOTATOR_NAME="duui-transformers-emotion" +ENV ANNOTATOR_NAME=$ANNOTATOR_NAME +ARG ANNOTATOR_VERSION="unset" +ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION + +# Model Info +ARG MODEL_VERSION=0.1 +ENV MODEL_VERSION=$MODEL_VERSION +ARG MODEL_NAME="" +ENV MODEL_NAME=$MODEL_NAME +ARG MODEL_SOURCE="" +ENV MODEL_SOURCE=$MODEL_SOURCE +ARG MODEL_LANG="" +ENV MODEL_LANG=$MODEL_LANG + +# offline mode for huggingface +ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1 +ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE + + + + +ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] +CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/Readme.md b/duui-transformers-Emotion/Readme.md index 7655adee..41904eb5 100644 --- a/duui-transformers-Emotion/Readme.md +++ b/duui-transformers-Emotion/Readme.md @@ -53,6 +53,8 @@ DUUI implementation for selected Hugging-Face-based transformer [Emotion tools]( | universal-joy-pt-small | https://github.com/sotlampr/universal-joy | 6ab01e98c8106e610247e5e8f0712af08c007b67 | PT | | universal-joy-tl-small | https://github.com/sotlampr/universal-joy | 6ab01e98c8106e610247e5e8f0712af08c007b67 | TL | | universal-joy-zh-small | https://github.com/sotlampr/universal-joy | 6ab01e98c8106e610247e5e8f0712af08c007b67 | ZH | +| phobert-emotion | https://huggingface.co/visolex/phobert-emotion | 6099c5a6f91fc6c8175818e37f96fecad0c96b63 | VI | +|seara/rubert-tiny2-russian-emotion-detection-cedr|https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr | 927ce911c4343c6113859325ab76c6c3c473ff0f | RU | # How To Use diff --git a/duui-transformers-Emotion/docker_build.sh b/duui-transformers-Emotion/docker_build.sh index 89229e33..777aac30 100644 --- a/duui-transformers-Emotion/docker_build.sh +++ b/duui-transformers-Emotion/docker_build.sh @@ -377,6 +377,13 @@ export MODEL_LANG="DE" #export MODEL_LANG="ZH" ####-------------------------------------------------------------------- +####--------------------------------------------------------------------- +export MODEL_NAME="visolex/phobert-emotion" +export MODEL_SPECNAME="phobert-emotion" +export MODEL_VERSION="90460fb946cf640ef9c56ae484cabb49d48ef14e" +export MODEL_SOURCE="https://huggingface.co/visolex/phobert-emotion" +export MODEL_LANG="VI" +####-------------------------------------------------------------------- docker build \ --build-arg ANNOTATOR_NAME \ diff --git a/duui-transformers-Emotion/pom.xml b/duui-transformers-Emotion/pom.xml index 4cfec8fc..a265f77a 100644 --- a/duui-transformers-Emotion/pom.xml +++ b/duui-transformers-Emotion/pom.xml @@ -99,9 +99,9 @@ ${ttlab.duui.version} --> - com.github.mevbagci + com.github.texttechnologylab DockerUnifiedUIMAInterface - 1.4.9 + 1.5.3 @@ -110,9 +110,9 @@ - com.github.mevbagci + com.github.texttechnologylab UIMATypeSystem - 3.0.13 + 02fb1a2f13 diff --git a/duui-transformers-Emotion/src/main/docker/Dockerfile b/duui-transformers-Emotion/src/main/docker/Dockerfile index aa9f53c9..23472e7f 100644 --- a/duui-transformers-Emotion/src/main/docker/Dockerfile +++ b/duui-transformers-Emotion/src/main/docker/Dockerfile @@ -44,7 +44,8 @@ RUN python -c "from transformers import pipeline; pipeline('text-classification' #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" #RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" - +RUN python -c "from transformers import pipeline; pipeline('text-classification', \ + model='visolex/phobert-emotion')" # copy scripts COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml @@ -54,6 +55,11 @@ COPY ./src/main/python/duui_transformers_emotion.py ./duui_transformers_emotion. COPY ./src/main/python/duui_emotion.lua ./duui_emotion.lua COPY ./src/main/python/script.py ./script.py COPY ./src/main/python/utils.py ./utils.py + +#Für das russische Modell, was zuvor über das Python Skript runtergeladen werden muss +COPY ./src/main/python/offline_models/rubert_tiny2_russian /usr/src/app/offline_models/rubert_tiny2_russian + + #COPY ./src/main/python/UniversalJoy/models-multilingual/small-all.pt ./UniversalJoy/models-multilingual/small-all.pt #COPY ./src/main/python/UniversalJoy/models-multilingual/large-all.pt ./UniversalJoy/models-multilingual/large-all.pt #COPY ./src/main/python/UniversalJoy/models-multilingual/combi-all.pt ./UniversalJoy/models-multilingual/combi-all.pt @@ -101,4 +107,4 @@ ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] -CMD ["--workers", "1"] \ No newline at end of file +CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda b/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda index 33bfef7e..9aceb468 100644 --- a/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda +++ b/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda @@ -56,6 +56,8 @@ RUN python -c "from transformers import pipeline; pipeline('text-classification' #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" #RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', \ + model='visolex/phobert-emotion')" # copy scripts COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml @@ -112,4 +114,4 @@ ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] -CMD ["--workers", "1"] \ No newline at end of file +CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/src/test/java/org/hucompute/textimager/uima/transformers/emotion/EmotionTest.java b/duui-transformers-Emotion/src/test/java/org/hucompute/textimager/uima/transformers/emotion/EmotionTest.java index d0547ea1..9ff6e22a 100644 --- a/duui-transformers-Emotion/src/test/java/org/hucompute/textimager/uima/transformers/emotion/EmotionTest.java +++ b/duui-transformers-Emotion/src/test/java/org/hucompute/textimager/uima/transformers/emotion/EmotionTest.java @@ -225,4 +225,48 @@ public void TurkishTest() throws Exception { Assertions.assertEquals(expected_emotions.get(expected.indexOf(emotion)), key); } } + + + @Test + public void VietnameseTest() throws Exception { + composer.add( + new DUUIRemoteDriver.Component(url) + .withParameter("selection", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence") + ); + + List sentences = Arrays.asList( + "Tao ghét mày. Tao đang rất tức giận", + "Tôi rất vui khi được ở đây. Tôi yêu nơi này." + ); + + createCas("vi", sentences); + composer.run(cas); + + Collection all_emotions = JCasUtil.select(cas, Emotion.class); + ArrayList> expected = new ArrayList<>(); + + for (Emotion emotion : all_emotions) { + System.out.println("Text: " + emotion.getCoveredText()); + Map emotions = new HashMap<>(); + FSArray emotions_all = emotion.getEmotions(); + + for (AnnotationComment comment : emotions_all) { + emotions.put(comment.getKey(), Float.parseFloat(comment.getValue())); + System.out.println(" " + comment.getKey() + ": " + comment.getValue()); + } + expected.add(emotions); + } + + // Expected: Enjoyment, Sadness (CAPITALIZED to match model output) + ArrayList expected_emotions = new ArrayList<>(Arrays.asList("Anger", "Enjoyment")); + + for (int i = 0; i < expected.size(); i++) { + String top_emotion = Collections.max( + expected.get(i).entrySet(), + Map.Entry.comparingByValue() + ).getKey(); + Assertions.assertEquals(expected_emotions.get(i), top_emotion); + } + } + } \ No newline at end of file diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README new file mode 100644 index 00000000..5779ccc9 --- /dev/null +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README @@ -0,0 +1,22 @@ +# Docker-Test für das Modell https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr + +## Ziel +Überprüfung, od das Modell 'rubert_tiny2_russian' korrekt im Docker-Container offline läuft. +Offline, da im Online-Modus beim Bauen des Docker Images es immer zu Fehlern beim Download kam. + +## Testaufbau +- Modell: seara/rubert-tiny2-russian-emotion-detectio +- Docker-Endpunkt: http://localhost:9714/v1/process +- 8 Texte, die jeweils verschiedene Emontionen haben, auch Emotionen, die nicht im Modell enthalten sind +- Skript: test_docker.sh +- Testausgabe: predicted label + score + erwartetes Label + +## Durchführung +1. Modell durch das Pythonskript rubert_tiny2_russian_emotion_datection_cedr.py in den Ordner laden +2. Docker Image bauen : docker build -t emotionslos:latest . +2.1 Man muss sich im Ordner duui-transformers-Emotion befinden. +2.2 Das Dockerfile muss direkt in dem Ordner sein und nicht im Unterordner docker, sonst werden die Pfade nicht gefunden. +3. Docker Image starten: docker run -p 9714:9714 \ + -e MODEL_NAME=seara/rubert-tiny2-russian-emotion-detection-cedr \ + emotionslos:latest +4. Testskript ausführen diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/rubert_tiny2_russian_emotion_datection_cedr.py b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/rubert_tiny2_russian_emotion_datection_cedr.py new file mode 100644 index 00000000..30492574 --- /dev/null +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/rubert_tiny2_russian_emotion_datection_cedr.py @@ -0,0 +1,21 @@ +""" +Python-Code um das Modell manuell runter zu laden, damit es dann im Offline-Modus läuft. +Das Modell wird im Ordner main/python/offline_models abgelegt. Das Dockerfile greift darauf zu. +""" + + +from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline +from pathlib import Path + +# Modellname +model_name = "seara/rubert-tiny2-russian-emotion-detection-cedr" + +# Tokenizer und Modell herunterladen (online) +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForSequenceClassification.from_pretrained(model_name) +local_dir = Path("../../main/python/offline_models/rubert_tiny2_russian") + +model.save_pretrained(local_dir) +tokenizer.save_pretrained(local_dir) + +print(f"Modell wurde heruntergeladen und in {local_dir} gespeichert.") \ No newline at end of file diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh new file mode 100644 index 00000000..89135540 --- /dev/null +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# URL des Docker-Endpunkts +URL="http://localhost:9714/v1/process" + +# Datei mit Testtexten +TEST_FILE="test_text.json" + +# Schleife über alle Testtexte +jq -c '.[]' $TEST_FILE | while read t; do + text=$(echo $t | jq -r '.text') + expected=$(echo $t | jq -r '.expected') + + # JSON für POST-Request bauen + payload=$(jq -n --arg txt "$text" '{ + doc_len: 1, + lang: "ru", + selections: [ + { + selection: "1", + sentences: [ + { text: $txt, begin: 0, end: ($txt | length) } + ] + } + ] + }') + + # curl Request + response=$(curl -s -X POST "$URL" -H "Content-Type: application/json" -d "$payload") + + # Ergebnis extrahieren + label=$(echo $response | jq -r '.results[0][0]') + score=$(echo $response | jq -r '.factors[0][0]') + + echo "Text: $text" + echo "Predicted: $label (score: $score), Expected: $expected" + echo "-----------------------------" +done \ No newline at end of file diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_text.json b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_text.json new file mode 100644 index 00000000..3df6a67f --- /dev/null +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_text.json @@ -0,0 +1,11 @@ +[ + {"text": "Я сегодня очень рад!", "expected": "JOY"}, + {"text": "Мне грустно сегодня.", "expected": "SADNESS"}, + {"text": "Это злит меня!", "expected": "ANGER"}, + {"text": "Я не знаю, что сказать.", "expected": "NEUTRAL"}, + {"text": "Удивительно!", "expected": "SURPRISE"}, + {"text": "Сегодня я проснулся с отличным настроением. Солнце светит, птицы поют, и я чувствую себя невероятно счастливым, что могу провести день с друзьями и семьёй", "expected": "JOY"}, + {"text": "Меня просто выводит из себя, когда люди обещают что-то сделать и не выполняют. Я устал терпеть их безответственность, это заставляет меня злиться сильнее с каждым днём", "expected": "ANGER"}, + {"text": "Я не могу спокойно спать, мысли о предстоящем собеседовании держат меня в тревоге. Сердце колотится, а руки дрожат.", "expected": "FEAR"}, + {"text": "Я так завидую своим братьям и сестрам ", "expected": "Niedriger Wert, da Neid nicht im Modell oder NEUTRAL"} +] \ No newline at end of file From ef49e1ab0dc08561b6f3e30b6e09b4bfd340a49e Mon Sep 17 00:00:00 2001 From: Ann-He2026 Date: Thu, 12 Mar 2026 11:27:52 +0100 Subject: [PATCH 3/6] New Sentiment Funktioniert nicht wegen offline-Modus From 0de72ebc62c7c2a255a6e21f85ae4446eefc4664 Mon Sep 17 00:00:00 2001 From: Ann-He2026 Date: Thu, 12 Mar 2026 11:31:24 +0100 Subject: [PATCH 4/6] New Sentiment Funktioniert nicht wegen offline-Modus --- .../models/rubert_base_cased_sentiment.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 duui-transformers-sentiment/src/main/python/models/rubert_base_cased_sentiment.py diff --git a/duui-transformers-sentiment/src/main/python/models/rubert_base_cased_sentiment.py b/duui-transformers-sentiment/src/main/python/models/rubert_base_cased_sentiment.py new file mode 100644 index 00000000..4d37bde7 --- /dev/null +++ b/duui-transformers-sentiment/src/main/python/models/rubert_base_cased_sentiment.py @@ -0,0 +1,42 @@ +""" +Das Modell stammt von Huggingface https://huggingface.co/blanchefort/rubert-base-cased-sentiment/tree/main + +Kann nicht getestet werden, da das Dockerimage sich aufgrund vom RUN des ersten Modells nicht bauen lässt, +wenn man das Dockerfile im Ordner duui-transformers-sentiment hat. Wenn Dockerfile im Unterordner docker ist, dann +die Pfade der Copy-Aufrufe nicht gefunden. + +Mittels preload(RUBERT) wird das Modell aber auf jeden Fall geladen. +""" + + + +def deeppavlov_preprocess(text: str) -> str: + new_text = [] + for t in text.split(" "): + t = '@user' if t.startswith('@') and len(t) > 1 else t + t = 'http' if t.startswith('http') else t + new_text.append(t) + return text.strip() + +SUPPORTED_MODEL = { + "blanchefort/rubert-base-cased-sentiment": { + "version": "main", + "max_length": 512, + "mapping": { + "POSITIVE": 1, + "NEUTRAL": 0, + "NEGATIVE": -1 + }, + "3sentiment": { + "pos": ["POSITIVE"], + "neu": ["NEUTRAL"], + "neg": ["NEGATIVE"] + }, + "preprocess": deeppavlov_preprocess, + "languages": ["ru"] + } +} + +if __name__ == "__main__": + import preload_models + preload_models.preload(SUPPORTED_MODEL) \ No newline at end of file From 30c741149957f23a8674f844a96e32da59b32939 Mon Sep 17 00:00:00 2001 From: ann-kathrin Date: Sat, 14 Mar 2026 14:11:31 +0100 Subject: [PATCH 5/6] Online-Nutzung der Modelle --- duui-relation-extraction/docker_build.sh | 64 +++++++++ .../{ => src/main/docker}/Dockerfile | 4 +- .../src/main/docker/Dockerfile-cuda | 62 +++++++++ .../main/python/duui_relation_extraction.lua | 26 ++++ .../main/python/duui_relation_extraction.py | 126 ++++++++++++++++++ .../src/main/python/relation_extraction.py | 122 +++++++++++++++++ .../Rebel_Knowgl_Relation_Extractor/README | 20 +++ .../test_docker.sh | 0 .../test_text.json | 0 .../src/tests/Rebel_Relation_Extractor/README | 24 ---- duui-transformers-Emotion/Dockerfile | 110 --------------- duui-transformers-Emotion/Dockerfile-cuda | 117 ---------------- duui-transformers-Emotion/Readme.md | 2 +- duui-transformers-Emotion/docker_build.sh | 9 ++ .../src/main/docker/Dockerfile | 4 +- .../src/main/docker/Dockerfile-cuda | 2 +- .../src/main/python/EmotionDetection.py | 11 +- .../main/python/duui_transformers_emotion.py | 3 + .../README | 14 +- .../src/main/docker/Dockerfile | 2 +- 20 files changed, 455 insertions(+), 267 deletions(-) create mode 100644 duui-relation-extraction/docker_build.sh rename duui-relation-extraction/{ => src/main/docker}/Dockerfile (87%) create mode 100644 duui-relation-extraction/src/main/docker/Dockerfile-cuda create mode 100644 duui-relation-extraction/src/main/python/duui_relation_extraction.lua create mode 100644 duui-relation-extraction/src/main/python/duui_relation_extraction.py create mode 100644 duui-relation-extraction/src/main/python/relation_extraction.py create mode 100644 duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/README rename duui-relation-extraction/src/tests/{Rebel_Relation_Extractor => Rebel_Knowgl_Relation_Extractor}/test_docker.sh (100%) rename duui-relation-extraction/src/tests/{Rebel_Relation_Extractor => Rebel_Knowgl_Relation_Extractor}/test_text.json (100%) delete mode 100644 duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README delete mode 100644 duui-transformers-Emotion/Dockerfile delete mode 100644 duui-transformers-Emotion/Dockerfile-cuda diff --git a/duui-relation-extraction/docker_build.sh b/duui-relation-extraction/docker_build.sh new file mode 100644 index 00000000..f0625a4c --- /dev/null +++ b/duui-relation-extraction/docker_build.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +#export ANNOTATOR_CUDA= +export ANNOTATOR_CUDA="-cuda" + +export ANNOTATOR_NAME=duui-relation-extraction +export ANNOTATOR_VERSION=0.4.0 +export LOG_LEVEL=DEBUG +export MODEL_CACHE_SIZE=3 +export DOCKER_REGISTRY="docker.texttechnologylab.org/" + +####--------------------------------------------------------------------- +export MODEL_NAME="Babelscape/rebel-large" +export MODEL_SPECNAME="rebel-large" +export MODEL_VERSION="44eb6cb4585df284ce6c4d6a7013f83fe473c052" +export MODEL_SOURCE="https://huggingface.co/Babelscape/rebel-large" +export MODEL_LANG="Multilingual" +####-------------------------------------------------------------------- + +docker build \ + --build-arg ANNOTATOR_NAME \ + --build-arg ANNOTATOR_VERSION \ + --build-arg LOG_LEVEL \ + --build-arg MODEL_CACHE_SIZE \ + --build-arg MODEL_NAME \ + --build-arg MODEL_VERSION \ + --build-arg MODEL_SOURCE \ + --build-arg MODEL_LANG \ + -t ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \ + -f src/main/docker/Dockerfile${ANNOTATOR_CUDA} \ + . + +docker tag \ + ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \ + ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:latest${ANNOTATOR_CUDA} + + +####--------------------------------------------------------------------- +export MODEL_NAME="ibm-research/knowgl-large" +export MODEL_SPECNAME="knowgl-large" +export MODEL_VERSION="94596fd9f697498f7ee7363dbf4cc66f08d499e8" +export MODEL_SOURCE="https://huggingface.co/ibm-research/knowgl-large" +export MODEL_LANG="Multilingual" +####-------------------------------------------------------------------- + + +docker build \ + --build-arg ANNOTATOR_NAME \ + --build-arg ANNOTATOR_VERSION \ + --build-arg LOG_LEVEL \ + --build-arg MODEL_CACHE_SIZE \ + --build-arg MODEL_NAME \ + --build-arg MODEL_VERSION \ + --build-arg MODEL_SOURCE \ + --build-arg MODEL_LANG \ + -t ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \ + -f src/main/docker/Dockerfile${ANNOTATOR_CUDA} \ + . + +docker tag \ + ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \ + ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:latest${ANNOTATOR_CUDA} + diff --git a/duui-relation-extraction/Dockerfile b/duui-relation-extraction/src/main/docker/Dockerfile similarity index 87% rename from duui-relation-extraction/Dockerfile rename to duui-relation-extraction/src/main/docker/Dockerfile index d23599eb..3c966455 100644 --- a/duui-relation-extraction/Dockerfile +++ b/duui-relation-extraction/src/main/docker/Dockerfile @@ -2,10 +2,10 @@ FROM python:3.10 WORKDIR /app -COPY requirements.txt . +COPY ../../../requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY ./src/main/python/TypSystemRelation.xml ./TypSystemRelation.xml +COPY ../python/TypSystemRelation.xml ./TypSystemRelation.xml COPY ./src/main/python/duui_relation_extraction.py ./duui_relation_extraction.py COPY ./src/main/python/duui_relation_extraction.lua ./duui_relation_extraction.lua COPY ./src/main/python/relation_extraction.py ./relation_extraction.py diff --git a/duui-relation-extraction/src/main/docker/Dockerfile-cuda b/duui-relation-extraction/src/main/docker/Dockerfile-cuda new file mode 100644 index 00000000..f896f8fd --- /dev/null +++ b/duui-relation-extraction/src/main/docker/Dockerfile-cuda @@ -0,0 +1,62 @@ +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 + +RUN apt update && \ + DEBIAN_FRONTEND=noninteractive \ + apt install --no-install-recommends -y build-essential software-properties-common && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt install --no-install-recommends -y python3.10 python3-pip python3-setuptools python3-distutils && \ + apt clean && rm -rf /var/lib/apt/lists/* + +RUN ln -s /usr/bin/python3 /usr/bin/python +RUN python -m pip install --upgrade pip + +WORKDIR /usr/src/app + +EXPOSE 8000 + +# dependencies +RUN pip install setuptools wheel +COPY ./requirements.txt ./requirements.txt +RUN apt remove -y python3-blinker || true +RUN pip install -r requirements.txt + +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Babelscape/rebel-large')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ibm-research/knowgl-large')" + +# copy scripts +COPY ./src/main/python/TypSystemRelation.xml ./TypSystemRelation.xml +COPY ./src/main/python/duui_relation_extraction.py ./duui_relation_extraction.py +COPY ./src/main/python/duui_relation_extraction.lua ./duui_relation_extraction.lua +COPY ./src/main/python/relation_extraction.py ./relation_extraction.py + +# log level +ARG LOG_LEVEL="DEBUG" +ENV LOG_LEVEL=$LOG_LEVEL + +# config +ARG MODEL_CACHE_SIZE=3 +ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE + +# meta data +ARG ANNOTATOR_NAME="duui-transformers-emotion" +ENV ANNOTATOR_NAME=$ANNOTATOR_NAME +ARG ANNOTATOR_VERSION="unset" +ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION + +# Model Info +ARG MODEL_VERSION=0.1 +ENV MODEL_VERSION=$MODEL_VERSION +ARG MODEL_NAME="" +ENV MODEL_NAME=$MODEL_NAME +ARG MODEL_SOURCE="" +ENV MODEL_SOURCE=$MODEL_SOURCE +ARG MODEL_LANG="" +ENV MODEL_LANG=$MODEL_LANG + +# offline mode for huggingface +ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1 +ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE + + +ENTRYPOINT ["uvicorn", "duui_relation_extraction:duui_relation_extraction", "--host", "0.0.0.0", "--port" ,"8000"] +CMD ["--workers", "1"] diff --git a/duui-relation-extraction/src/main/python/duui_relation_extraction.lua b/duui-relation-extraction/src/main/python/duui_relation_extraction.lua new file mode 100644 index 00000000..48e04825 --- /dev/null +++ b/duui-relation-extraction/src/main/python/duui_relation_extraction.lua @@ -0,0 +1,26 @@ +-- DUUI Lua mapping for Relation Extraction +-- Maps DUUIRequest → Python +-- Maps Python Response → DUUI Annotations + +function map_request(doc) + return { + doc_len = #doc.text, + lang = doc.lang or "en", + selections = doc.sentences + } +end + +function map_response(response) + local annotations = {} + for i, sentence_relations in ipairs(response.relations) do + for _, triplet in ipairs(sentence_relations) do + table.insert(annotations, { + subject = triplet.subject, + predicate = triplet.predicate, + object = triplet.object, + confidence = triplet.confidence + }) + end + end + return annotations +end \ No newline at end of file diff --git a/duui-relation-extraction/src/main/python/duui_relation_extraction.py b/duui-relation-extraction/src/main/python/duui_relation_extraction.py new file mode 100644 index 00000000..5aa5a433 --- /dev/null +++ b/duui-relation-extraction/src/main/python/duui_relation_extraction.py @@ -0,0 +1,126 @@ +""" +DUUI Endpunkt +""" + +from fastapi import FastAPI +from pydantic import BaseModel +from typing import List +from relation_extraction import RebelRelationExtraction, KnowGLRelationExtraction, GLiNERRelationExtraction +from functools import lru_cache +from threading import Lock +import torch +from time import time +import os + +model_lock = Lock() +device = os.getenv("DEVICE", "cpu") + +MODELS = { + "Babelscape/rebel-large": { + "path": "Babelscape/rebel-large", + "class": RebelRelationExtraction + }, + "ibm-research/knowgl-large": { + "path": "ibm-research/knowgl-large", + "class": KnowGLRelationExtraction + } +} + +model_name = os.getenv("MODEL_NAME") + +MODEL_PATH = MODELS[model_name]["path"] + +model_class = MODELS[model_name]["class"] + +class UimaSentence(BaseModel): + text: str + begin: int + end: int + +class UimaSentenceSelection(BaseModel): + selection: str + sentences: List[UimaSentence] + +class DUUIRequest(BaseModel): + doc_len: int + lang: str + selections: List[UimaSentenceSelection] + +class DocumentModification(BaseModel): + user: str + timestamp: int + comment: str + +class AnnotationMeta(BaseModel): + name: str + version: str + modelName: str + modelVersion: str + +class DUUIResponse(BaseModel): + meta: AnnotationMeta + modification_meta: DocumentModification + begins: List[int] + ends: List[int] + relations: List[List[dict]] + model_name: str + model_version: str + model_source: str + model_lang: str + +duui_relation_extraction = FastAPI(title="DUUI Relation Extraction", version="1.0") + +@lru_cache() +def load_model(): + return model_class(MODEL_PATH, device) + +def process_selection(selection: UimaSentenceSelection): + begins, ends, relations = [], [], [] + + texts = [f"sentence: {s.text}" for s in selection.sentences] + with model_lock: + model = load_model() + results = model.extract_relations(texts) + + for idx, sentence in enumerate(selection.sentences): + begins.append(sentence.begin) + ends.append(sentence.end) + relations.append(results[idx]) + + return begins, ends, relations + +@duui_relation_extraction.post("/v1/process") +def post_process(request: DUUIRequest): + modification_timestamp_seconds = int(time()) + begins, ends, relations = [], [], [] + + meta = AnnotationMeta( + name="REBEL Relation Extractor", + version="1.0", + modelName="Babelscape/rebel-large", + modelVersion="1.0" + ) + + modification_meta = DocumentModification( + user="REBEL RE", + timestamp=modification_timestamp_seconds, + comment="Relation Extraction" + ) + + for selection in request.selections: + b, e, r = process_selection(selection) + begins.extend(b) + ends.extend(e) + relations.extend(r) + + return DUUIResponse( + meta=meta, + modification_meta=modification_meta, + begins=begins, + ends=ends, + relations=relations, + model_name="Babelscape/rebel-large", + model_version="1.0", + model_source="HuggingFace", + model_lang="multilingual" + ) \ No newline at end of file diff --git a/duui-relation-extraction/src/main/python/relation_extraction.py b/duui-relation-extraction/src/main/python/relation_extraction.py new file mode 100644 index 00000000..3062a7f9 --- /dev/null +++ b/duui-relation-extraction/src/main/python/relation_extraction.py @@ -0,0 +1,122 @@ +""" +Pipeline für Huggingface Model Loader +""" + +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM +import torch +import re + +class RebelRelationExtraction: + + def __init__(self, model_path: str, device: str): + self.device = device + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device) + + def extract_relations(self, texts): + inputs = self.tokenizer( + texts, + return_tensors="pt", + padding=True, + truncation=True + ).to(self.device) + + outputs = self.model.generate( + **inputs, + max_length=256, + num_beams=4 + ) + + decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) + + relations = [self.parse_triplets(text) for text in decoded] + return relations + + @staticmethod + def parse_triplets(text): + triplets = [] + # Splitte Triplets durch Doppel-Leerzeichen oder Tabs + raw_triplets = re.split(r"\t+|\s{2,}", text.strip()) + + # Prüfen, ob genug Teile da sind + if len(raw_triplets) % 3 != 0: + # Kann passieren bei mehreren Triplets, Fehlermeldung ignorieren + print("[WARN] Triplet-Parsing unvollständig:", text) + return triplets + + # Jedes 3er-Paket = Subject, Object, Predicate + for i in range(0, len(raw_triplets), 3): + subject = raw_triplets[i].strip() + object_ = raw_triplets[i + 1].strip() + predicate = raw_triplets[i + 2].strip() + triplets.append({ + "subject": subject, + "predicate": predicate, + "object": object_, + "confidence": 1.0 + }) + + return triplets + + +class KnowGLRelationExtraction: + + def __init__(self, model_path: str, device: str = "cpu"): + self.device = device + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device) + + def extract_relations(self, texts): + inputs = self.tokenizer( + texts, + return_tensors="pt", + padding=True, + truncation=True + ).to(self.device) + + outputs = self.model.generate( + **inputs, + max_length=256, + num_beams=4 + ) + + decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) + + relations = [self.parse_triplets(text) for text in decoded] + return relations + + @staticmethod + def parse_triplets(text): + """ + Parse KnowGL output: + [(subj_mention # subj_label # subj_type) | relation | (obj_mention # obj_label # obj_type)]$ + """ + triplets = [] + + raw_triplets = text.split("$") + + for triple in raw_triplets: + triple = triple.strip() + + match = re.match( + r"\[\((.*?)#(.*?)#(.*?)\)\s*\|\s*(.*?)\s*\|\s*\((.*?)#(.*?)#(.*?)\)\]", + triple + ) + + if not match: + continue + + subj_mention, subj_label, subj_type, relation, obj_mention, obj_label, obj_type = match.groups() + + triplets.append({ + "subject": subj_label.strip(), + "predicate": relation.strip(), + "object": obj_label.strip(), + "subject_mention": subj_mention.strip(), + "object_mention": obj_mention.strip(), + "subject_type": subj_type.strip(), + "object_type": obj_type.strip(), + "confidence": 1.0 + }) + + return triplets diff --git a/duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/README b/duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/README new file mode 100644 index 00000000..32f4278a --- /dev/null +++ b/duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/README @@ -0,0 +1,20 @@ +# Docker-Test für das Modell https://huggingface.co/Babelscape/rebel-large + +## Ziel +Überprüfung, od das Modell 'rebel-large' korrekt im Docker-Container läuft. + +## Testaufbau +- Modell: Babelscape/rebel-large +- Docker-Endpunkt: http://localhost:8000/v1/process +- 6 Sätze, die auf ihre Relation im Satz untersucht werden. +- Skript: test_docker.sh +- Testausgabe: predicted sentence + Triplets + +## Durchführung +1. Docker Image bauen mit docker_build.sh +2. Docker Image starten: docker run -p 8000:8000 \ + docker.texttechnologylab.org/duui-relation-extraction-rebel-large:latest-cuda \ + relation:latest +3. Testskript ausführen + +Das Modell ibm-research/knowgl-large kann auf die gleiche Weise getestet werden. diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh b/duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/test_docker.sh similarity index 100% rename from duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_docker.sh rename to duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/test_docker.sh diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json b/duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/test_text.json similarity index 100% rename from duui-relation-extraction/src/tests/Rebel_Relation_Extractor/test_text.json rename to duui-relation-extraction/src/tests/Rebel_Knowgl_Relation_Extractor/test_text.json diff --git a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README b/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README deleted file mode 100644 index b7a9e4bb..00000000 --- a/duui-relation-extraction/src/tests/Rebel_Relation_Extractor/README +++ /dev/null @@ -1,24 +0,0 @@ -# Docker-Test für das Modell https://huggingface.co/Babelscape/rebel-large - -## Ziel -Überprüfung, od das Modell 'rebel-large' korrekt im Docker-Container offline läuft. -Offline, da im Online-Modus beim Bauen des Docker Images es immer zu Fehlern beim Download kam. - -## Testaufbau -- Modell: Babelscape/rebel-large -- Docker-Endpunkt: http://localhost:8000/v1/process -- 6 Sätze, die auf ihre Relation im Satz untersucht werden. -- Skript: test_docker.sh -- Testausgabe: predicted sentence + Triplets - -## Durchführung -1. Modell durch das Pythonskript rebel_large.py in den Ordner laden -2. Docker Image bauen : docker build -t relation:latest . -2.1 Man muss sich im Ordner duui-trelation-extraction befinden. -2.2 Das Dockerfile muss direkt in dem Ordner sein und nicht im Unterordner docker, sonst werden die Pfade nicht gefunden. -3. Docker Image starten: docker run -p 8000:8000 \ - -e MODEL_NAME=Babelscape/rebel-large \ - relation:latest -4. Testskript ausführen - -Das Modell ibm-research/knowgl-large kann auf die gleiche Weise getestet werden. diff --git a/duui-transformers-Emotion/Dockerfile b/duui-transformers-Emotion/Dockerfile deleted file mode 100644 index a963ad8b..00000000 --- a/duui-transformers-Emotion/Dockerfile +++ /dev/null @@ -1,110 +0,0 @@ -FROM python:3.10 - -WORKDIR /usr/src/app - -EXPOSE 9714 - -# dependencies -RUN pip install setuptools wheel -COPY ./requirements.txt ./requirements.txt -RUN pip install -r requirements.txt - -RUN #python -m spacy download en_core_web_lg - -RUN #python -c "import nltk; nltk.download('all', download_dir='nltk_data')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='02shanky/finetuned-twitter-xlm-roberta-base-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/xlm-emo-t')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='michellejieli/emotion_text_classifier')" -RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/bertweet-base-emotion-analysis')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ActivationAI/distilbert-base-uncased-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='SamLowe/roberta-base-go_emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='mrm8488/t5-base-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline(model='microsoft/mdeberta-v3-base')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='pranaydeeps/EXALT-Baseline')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='boltuix/bert-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/feel-it-italian-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion-multilabel-latest')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/beto-emotion-analysis')" -#RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('poltextlab/xlm-roberta-large-pooled-MORES', token=''); AutoTokenizer.from_pretrained('lytang/MiniCheck-Flan-T5-Large')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='daveni/twitter-xlm-roberta-emotion-es')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ChrisLalk/German-Emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='msgfrom96/xlm_emo_multi')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cointegrated/rubert-tiny2-cedr-emotion-detection')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Aniemore/rubert-tiny2-russian-emotion-detection')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion-Small')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AnasAlokla/multilingual_go_emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Zoopa/emotion-classification-model')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='esuriddick/distilbert-base-uncased-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Panda0116/emotion-classification-model')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='lordtt13/emo-mobilebert')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" -#RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" -RUN python -c "from transformers import pipeline; pipeline('text-classification', \ - model='visolex/phobert-emotion')" - -# copy scripts -COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml -COPY ./src/main/python/Emo_mDeBERTa2.py ./Emo_mDeBERTa2.py -COPY ./src/main/python/EmotionDetection.py ./EmotionDetection.py -COPY ./src/main/python/duui_transformers_emotion.py ./duui_transformers_emotion.py -COPY ./src/main/python/duui_emotion.lua ./duui_emotion.lua -COPY ./src/main/python/script.py ./script.py -COPY ./src/main/python/utils.py ./utils.py - -#Für das russische Modell, was zuvor über das Python Skript runtergeladen werden muss -COPY ./src/main/python/offline_models/rubert_tiny2_russian ./offline_models/rubert_tiny2_russian - - -#COPY ./src/main/python/UniversalJoy/models-multilingual/small-all.pt ./UniversalJoy/models-multilingual/small-all.pt -#COPY ./src/main/python/UniversalJoy/models-multilingual/large-all.pt ./UniversalJoy/models-multilingual/large-all.pt -#COPY ./src/main/python/UniversalJoy/models-multilingual/combi-all.pt ./UniversalJoy/models-multilingual/combi-all.pt -#COPY ./src/main/python/UniversalJoy/models-huge-english/huge-en.pt ./UniversalJoy/models-huge-english/huge-en.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-en.pt ./UniversalJoy/models-large-monolingual/large-en.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-es.pt ./UniversalJoy/models-large-monolingual/large-es.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-pt.pt ./UniversalJoy/models-large-monolingual/large-pt.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-en.pt ./UniversalJoy/models-small-monolingual/small-en.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-es.pt ./UniversalJoy/models-small-monolingual/small-es.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-pt.pt ./UniversalJoy/models-small-monolingual/small-pt.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-tl.pt ./UniversalJoy/models-small-monolingual/small-tl.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-zh.pt ./UniversalJoy/models-small-monolingual/small-zh.pt -#COPY ./src/main/python/nltk_data/ ./nltk_data/ -#COPY ./src/main/python/pol_emo_mDeBERTa/ ./pol_emo_mDeBERTa/ - -# log level -ARG LOG_LEVEL="DEBUG" -ENV LOG_LEVEL=$LOG_LEVEL - -# config -ARG MODEL_CACHE_SIZE=3 -ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE - -# meta data -ARG ANNOTATOR_NAME="duui-transformers-emotion" -ENV ANNOTATOR_NAME=$ANNOTATOR_NAME -ARG ANNOTATOR_VERSION="unset" -ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION - -# Model Info -ARG MODEL_VERSION=0.1 -ENV MODEL_VERSION=$MODEL_VERSION -ARG MODEL_NAME="" -ENV MODEL_NAME=$MODEL_NAME -ARG MODEL_SOURCE="" -ENV MODEL_SOURCE=$MODEL_SOURCE -ARG MODEL_LANG="" -ENV MODEL_LANG=$MODEL_LANG - -# offline mode for huggingface -ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1 -ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE - - - - -ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] -CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/Dockerfile-cuda b/duui-transformers-Emotion/Dockerfile-cuda deleted file mode 100644 index 9aceb468..00000000 --- a/duui-transformers-Emotion/Dockerfile-cuda +++ /dev/null @@ -1,117 +0,0 @@ -FROM nvidia/cuda:11.8.0-base-ubuntu22.04 - -RUN apt update && \ - DEBIAN_FRONTEND=noninteractive \ - apt install --no-install-recommends -y build-essential software-properties-common && \ - add-apt-repository -y ppa:deadsnakes/ppa && \ - apt install --no-install-recommends -y python3.10 python3-pip python3-setuptools python3-distutils && \ - apt clean && rm -rf /var/lib/apt/lists/* - -RUN ln -s /usr/bin/python3 /usr/bin/python -RUN python -m pip install --upgrade pip - -WORKDIR /usr/src/app - -EXPOSE 9714 - -# dependencies -RUN pip install setuptools wheel -COPY ./requirements.txt ./requirements.txt -RUN apt remove -y python3-blinker || true -RUN pip install -r requirements.txt - -#RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl - - -#RUN python -c "import nltk; nltk.download('all', download_dir='nltk_data')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='02shanky/finetuned-twitter-xlm-roberta-base-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/xlm-emo-t')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='michellejieli/emotion_text_classifier')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/bertweet-base-emotion-analysis')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ActivationAI/distilbert-base-uncased-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='SamLowe/roberta-base-go_emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='mrm8488/t5-base-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline(model='microsoft/mdeberta-v3-base')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='pranaydeeps/EXALT-Baseline')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='boltuix/bert-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='MilaNLProc/feel-it-italian-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-emotion-multilabel-latest')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='finiteautomata/beto-emotion-analysis')" -#RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; AutoModelForSequenceClassification.from_pretrained('poltextlab/xlm-roberta-large-pooled-MORES', token=''); AutoTokenizer.from_pretrained('lytang/MiniCheck-Flan-T5-Large')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='daveni/twitter-xlm-roberta-emotion-es')" -RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ChrisLalk/German-Emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='msgfrom96/xlm_emo_multi')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cointegrated/rubert-tiny2-cedr-emotion-detection')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Aniemore/rubert-tiny2-russian-emotion-detection')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion-Small')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Johnson8187/Chinese-Emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AnasAlokla/multilingual_go_emotions')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Zoopa/emotion-classification-model')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='esuriddick/distilbert-base-uncased-finetuned-emotion')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Panda0116/emotion-classification-model')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='lordtt13/emo-mobilebert')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='alex-shvets/roberta-large-emopillars-contextual-emocontext')" -#RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" -#RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" -RUN python -c "from transformers import pipeline; pipeline('text-classification', \ - model='visolex/phobert-emotion')" - -# copy scripts -COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml -COPY ./src/main/python/Emo_mDeBERTa2.py ./Emo_mDeBERTa2.py -COPY ./src/main/python/EmotionDetection.py ./EmotionDetection.py -COPY ./src/main/python/duui_transformers_emotion.py ./duui_transformers_emotion.py -COPY ./src/main/python/duui_emotion.lua ./duui_emotion.lua -COPY ./src/main/python/script.py ./script.py -COPY ./src/main/python/utils.py ./utils.py -#COPY ./src/main/python/UniversalJoy/models-multilingual/small-all.pt ./UniversalJoy/models-multilingual/small-all.pt -#COPY ./src/main/python/UniversalJoy/models-multilingual/large-all.pt ./UniversalJoy/models-multilingual/large-all.pt -#COPY ./src/main/python/UniversalJoy/models-multilingual/combi-all.pt ./UniversalJoy/models-multilingual/combi-all.pt -#COPY ./src/main/python/UniversalJoy/models-huge-english/huge-en.pt ./UniversalJoy/models-huge-english/huge-en.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-en.pt ./UniversalJoy/models-large-monolingual/large-en.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-es.pt ./UniversalJoy/models-large-monolingual/large-es.pt -#COPY ./src/main/python/UniversalJoy/models-large-monolingual/large-pt.pt ./UniversalJoy/models-large-monolingual/large-pt.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-en.pt ./UniversalJoy/models-small-monolingual/small-en.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-es.pt ./UniversalJoy/models-small-monolingual/small-es.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-pt.pt ./UniversalJoy/models-small-monolingual/small-pt.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-tl.pt ./UniversalJoy/models-small-monolingual/small-tl.pt -#COPY ./src/main/python/UniversalJoy/models-small-monolingual/small-zh.pt ./UniversalJoy/models-small-monolingual/small-zh.pt -#COPY ./src/main/python/nltk_data/ ./nltk_data/ -#COPY ./src/main/python/pol_emo_mDeBERTa/ ./pol_emo_mDeBERTa/ - -# log level -ARG LOG_LEVEL="DEBUG" -ENV LOG_LEVEL=$LOG_LEVEL - -# config -ARG MODEL_CACHE_SIZE=3 -ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE - -# meta data -ARG ANNOTATOR_NAME="duui-transformers-emotion" -ENV ANNOTATOR_NAME=$ANNOTATOR_NAME -ARG ANNOTATOR_VERSION="unset" -ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION - -# Model Info -ARG MODEL_VERSION=0.1 -ENV MODEL_VERSION=$MODEL_VERSION -ARG MODEL_NAME="" -ENV MODEL_NAME=$MODEL_NAME -ARG MODEL_SOURCE="" -ENV MODEL_SOURCE=$MODEL_SOURCE -ARG MODEL_LANG="" -ENV MODEL_LANG=$MODEL_LANG - -# offline mode for huggingface -ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1 -ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE - - - - -ENTRYPOINT ["uvicorn", "duui_transformers_emotion:app", "--host", "0.0.0.0", "--port" ,"9714"] -CMD ["--workers", "1"] diff --git a/duui-transformers-Emotion/Readme.md b/duui-transformers-Emotion/Readme.md index 41904eb5..3c319b11 100644 --- a/duui-transformers-Emotion/Readme.md +++ b/duui-transformers-Emotion/Readme.md @@ -54,7 +54,7 @@ DUUI implementation for selected Hugging-Face-based transformer [Emotion tools]( | universal-joy-tl-small | https://github.com/sotlampr/universal-joy | 6ab01e98c8106e610247e5e8f0712af08c007b67 | TL | | universal-joy-zh-small | https://github.com/sotlampr/universal-joy | 6ab01e98c8106e610247e5e8f0712af08c007b67 | ZH | | phobert-emotion | https://huggingface.co/visolex/phobert-emotion | 6099c5a6f91fc6c8175818e37f96fecad0c96b63 | VI | -|seara/rubert-tiny2-russian-emotion-detection-cedr|https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr | 927ce911c4343c6113859325ab76c6c3c473ff0f | RU | +|rubert-tiny2-russian-emotion-detection-cedr|https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr | 927ce911c4343c6113859325ab76c6c3c473ff0f | RU | # How To Use diff --git a/duui-transformers-Emotion/docker_build.sh b/duui-transformers-Emotion/docker_build.sh index 777aac30..49f3de27 100644 --- a/duui-transformers-Emotion/docker_build.sh +++ b/duui-transformers-Emotion/docker_build.sh @@ -385,6 +385,15 @@ export MODEL_SOURCE="https://huggingface.co/visolex/phobert-emotion" export MODEL_LANG="VI" ####-------------------------------------------------------------------- +####--------------------------------------------------------------------- +export MODEL_NAME="seara/rubert-tiny2-russian-emotion-detection-cedr" +export MODEL_SPECNAME="rubert-tiny2-russian-emotion-detection-cedr" +export MODEL_VERSION="927ce911c4343c6113859325ab76c6c3c473ff0f" +export MODEL_SOURCE="https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr" +export MODEL_LANG="RU" +####-------------------------------------------------------------------- + + docker build \ --build-arg ANNOTATOR_NAME \ --build-arg ANNOTATOR_VERSION \ diff --git a/duui-transformers-Emotion/src/main/docker/Dockerfile b/duui-transformers-Emotion/src/main/docker/Dockerfile index 23472e7f..fb3595d5 100644 --- a/duui-transformers-Emotion/src/main/docker/Dockerfile +++ b/duui-transformers-Emotion/src/main/docker/Dockerfile @@ -45,7 +45,9 @@ RUN python -c "from transformers import pipeline; pipeline('text-classification' #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='AdapterHub/bert-base-uncased-pf-emo')" #RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" RUN python -c "from transformers import pipeline; pipeline('text-classification', \ - model='visolex/phobert-emotion')" + model='visolex/phobert-emotion')" \ +RUN python -c "from transformers import pipeline; pipeline('text-classification', \ + model='seara/rubert-tiny2-russian-emotion-detection-cedr')" # copy scripts COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml diff --git a/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda b/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda index 9aceb468..8c7cbb52 100644 --- a/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda +++ b/duui-transformers-Emotion/src/main/docker/Dockerfile-cuda @@ -58,7 +58,7 @@ RUN python -c "from transformers import pipeline; pipeline('text-classification' #RUN python -c "from pytorch_transformers import (BertTokenizer, BertModel, BertConfig,); BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False); BertModel.from_pretrained('bert-base-multilingual-cased')" RUN python -c "from transformers import pipeline; pipeline('text-classification', \ model='visolex/phobert-emotion')" - +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='seara/rubert-tiny2-russian-emotion-detection-cedr')" # copy scripts COPY ./src/main/python/TypeSystemEmotion.xml ./TypeSystemEmotion.xml COPY ./src/main/python/Emo_mDeBERTa2.py ./Emo_mDeBERTa2.py diff --git a/duui-transformers-Emotion/src/main/python/EmotionDetection.py b/duui-transformers-Emotion/src/main/python/EmotionDetection.py index 7802f8f1..abd1b922 100644 --- a/duui-transformers-Emotion/src/main/python/EmotionDetection.py +++ b/duui-transformers-Emotion/src/main/python/EmotionDetection.py @@ -34,7 +34,8 @@ def sigmoid(x): "SamLowe": "SamLowe/roberta-base-go_emotions", "michellejieli": "michellejieli/emotion_text_classifier", "EmoAtlas": "EmoAtlas", - "MRM8488": "mrm8488/t5-base-finetuned-emotion" + "MRM8488": "mrm8488/t5-base-finetuned-emotion", + "rubert": "rubert-tiny2-russian-emotion-detection-cedr", } map_emotion = { "DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence": { @@ -393,6 +394,14 @@ def sigmoid(x): 1: "happy", 2: "sad", 3: "angry" + }, + "seara/rubert-tiny2-russian-emotion-detection-cedr":{ + 0: "no_emotion", + 1: "joy", + 2: "sadness", + 3: "surprise", + 4: "fear", + 5: "anger", } } diff --git a/duui-transformers-Emotion/src/main/python/duui_transformers_emotion.py b/duui-transformers-Emotion/src/main/python/duui_transformers_emotion.py index 1b962f3a..b57f4f56 100644 --- a/duui-transformers-Emotion/src/main/python/duui_transformers_emotion.py +++ b/duui-transformers-Emotion/src/main/python/duui_transformers_emotion.py @@ -30,6 +30,7 @@ "mrm8488/t5-base-finetuned-emotion": "https://huggingface.co/mrm8488/t5-base-finetuned-emotion", "EmoAtlas": "https://github.com/alfonsosemeraro/emoatlas", "pysentimiento": "https://github.com/pysentimiento/pysentimiento/", + "rubert-tiny2-russian-emotion-detection-cedr": "https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr", } languages = { @@ -46,6 +47,7 @@ "mrm8488/t5-base-finetuned-emotion": "en", "SamLowe/roberta-base-go_emotions": "en", "ActivationAI/distilbert-base-uncased-finetuned-emotion": "en", + "rubert-tiny2-russian-emotion-detection-cedr": "ru", } versions = { @@ -62,6 +64,7 @@ "mrm8488/t5-base-finetuned-emotion": "e44a316825f11230724b36412fbf1899c76e82de", "EmoAtlas": "adae44a80dd55c1d1c467c4e72bdb2d8cf63bf28", "pysentimiento": "60822acfd805ad5d95437c695daa33c18dbda060", + "rubert-tiny2-russian-emotion-detection-cedr": "927ce911c4343c6113859325ab76c6c3c473ff0f", } diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README index 5779ccc9..b33b02b9 100644 --- a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/README @@ -1,8 +1,7 @@ # Docker-Test für das Modell https://huggingface.co/seara/rubert-tiny2-russian-emotion-detection-cedr ## Ziel -Überprüfung, od das Modell 'rubert_tiny2_russian' korrekt im Docker-Container offline läuft. -Offline, da im Online-Modus beim Bauen des Docker Images es immer zu Fehlern beim Download kam. +Überprüfung, od das Modell 'rubert_tiny2_russian' korrekt im Docker-Container läuft. ## Testaufbau - Modell: seara/rubert-tiny2-russian-emotion-detectio @@ -12,11 +11,8 @@ Offline, da im Online-Modus beim Bauen des Docker Images es immer zu Fehlern bei - Testausgabe: predicted label + score + erwartetes Label ## Durchführung -1. Modell durch das Pythonskript rubert_tiny2_russian_emotion_datection_cedr.py in den Ordner laden -2. Docker Image bauen : docker build -t emotionslos:latest . -2.1 Man muss sich im Ordner duui-transformers-Emotion befinden. -2.2 Das Dockerfile muss direkt in dem Ordner sein und nicht im Unterordner docker, sonst werden die Pfade nicht gefunden. -3. Docker Image starten: docker run -p 9714:9714 \ - -e MODEL_NAME=seara/rubert-tiny2-russian-emotion-detection-cedr \ +1. Docker Image bauen über docker_build.sh. +2. Docker Image starten: docker run -p 9714:9714 \ + -e docker.texttechnologylab.org/duui-transformers-emotion-rubert-tiny2-russian-emotion-detection-cedr:latest-cuda \ emotionslos:latest -4. Testskript ausführen +3. Testskript test_docker.sh ausführen diff --git a/duui-transformers-sentiment/src/main/docker/Dockerfile b/duui-transformers-sentiment/src/main/docker/Dockerfile index 09184046..37d13103 100644 --- a/duui-transformers-sentiment/src/main/docker/Dockerfile +++ b/duui-transformers-sentiment/src/main/docker/Dockerfile @@ -371,7 +371,7 @@ ARG TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_ANNOTATOR_VERSION="unset" ENV TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_ANNOTATOR_VERSION=$TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_ANNOTATOR_VERSION # offline mode for huggingface -ARG TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE=1 +ARG TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE=0 ENV TRANSFORMERS_OFFLINE=$TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE # copy scripts From 11613cd8d93b4e1e6450b7b0e1c9ce9d6b038ae9 Mon Sep 17 00:00:00 2001 From: ann-kathrin Date: Sat, 14 Mar 2026 15:07:08 +0100 Subject: [PATCH 6/6] Sentiment Modell --- duui-relation-extraction/Readme.md | 4 +- .../test_docker.sh | 2 + .../docker_build.sh | 9 ++++ .../src/main/docker/Dockerfile | 1 + .../src/main/docker/Dockerfile-cuda | 1 + .../src/main/python/SentimentSpeech.py | 5 ++ .../Test_text.json | 11 ++++ .../test_docker.sh | 52 +++++++++++++++++++ 8 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/Test_text.json create mode 100644 duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/test_docker.sh diff --git a/duui-relation-extraction/Readme.md b/duui-relation-extraction/Readme.md index cf69905d..b85616f3 100644 --- a/duui-relation-extraction/Readme.md +++ b/duui-relation-extraction/Readme.md @@ -11,6 +11,4 @@ DUUI implementation for selected Hugging-Face-based transformer [relation-extrac ## Execution -1. Im Ordner 'models' das gewünschte Modell runterladen, da der Container nicht mit dem Internet verbunden ist. -2. Docker bauen: docker build -t [IMAGE_NAME] . -3. Docker starten: docker run -p 8000:8000 -e MODEL_NAME=[MODEL_NAME] [IMAGE_NAME] \ No newline at end of file +Docker starten: docker run -p 8000:8000 -e MODEL_NAME=[MODEL_NAME] [IMAGE_NAME] \ No newline at end of file diff --git a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh index 89135540..54812b06 100644 --- a/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh +++ b/duui-transformers-Emotion/src/test/test_rubert_tiny2_russian_emotion_detection_cedr/test_docker.sh @@ -6,6 +6,8 @@ URL="http://localhost:9714/v1/process" # Datei mit Testtexten TEST_FILE="test_text.json" +echo "Starte Tests" + # Schleife über alle Testtexte jq -c '.[]' $TEST_FILE | while read t; do text=$(echo $t | jq -r '.text') diff --git a/duui-transformers-sentiment-atomar/docker_build.sh b/duui-transformers-sentiment-atomar/docker_build.sh index 5427cda1..89550216 100644 --- a/duui-transformers-sentiment-atomar/docker_build.sh +++ b/duui-transformers-sentiment-atomar/docker_build.sh @@ -67,6 +67,15 @@ export MODEL_LANG="Multi" #export MODEL_LANG="DE" ####-------------------------------------------------------------------- +#Russische Modell +##--------------------------------------------------------------------- +export MODEL_NAME="blanchefort/rubert-base-cased-sentiment" +export MODEL_SPECNAME="rubert-base-cased-sentiment" +export MODEL_VERSION="2d4b5fc431489a87ca4e44b842d904058706e8d4" +export MODEL_SOURCE="https://huggingface.co/blanchefort/rubert-base-cased-sentiment" +export MODEL_LANG="RU" +##-------------------------------------------------------------------- + export DOCKER_REGISTRY="docker.texttechnologylab.org/" export DUUI_CUDA= diff --git a/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile b/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile index 8945772d..743d5379 100644 --- a/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile +++ b/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile @@ -16,6 +16,7 @@ RUN python -c "from transformers import pipeline; pipeline('text-classification' #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='j-hartmann/sentiment-roberta-large-english-3-classes')" #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='bardsai/finance-sentiment-de-base')" RUN #python -c "from germansentiment import SentimentModel; model = SentimentModel()" +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='blanchefort/rubert-base-cased-sentiment')" # copy scripts COPY ./src/main/python/TypeSystemSentiment.xml ./TypeSystemSentiment.xml diff --git a/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile-cuda b/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile-cuda index 12d392f0..2c484b37 100644 --- a/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile-cuda +++ b/duui-transformers-sentiment-atomar/src/main/docker/Dockerfile-cuda @@ -38,6 +38,7 @@ RUN pip install -r requirements.txt #RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoModelForSequenceClassification.from_pretrained('poltextlab/xlm-roberta-large-party-cap-v3', trust_remote_code=True); AutoTokenizer.from_pretrained('xlm-roberta-large')" #RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/tweet-topic-latest-single')" RUN python -c "from transformers import pipeline; pipeline('text-classification', model='cardiffnlp/tweet-topic-large-multilingual')" +RUN python -c "from transformers import pipeline; pipeline('text-classification', model='blanchefort/rubert-base-cased-sentiment')" # log level ARG LOG_LEVEL="DEBUG" diff --git a/duui-transformers-sentiment-atomar/src/main/python/SentimentSpeech.py b/duui-transformers-sentiment-atomar/src/main/python/SentimentSpeech.py index a6e3bfab..fd309025 100644 --- a/duui-transformers-sentiment-atomar/src/main/python/SentimentSpeech.py +++ b/duui-transformers-sentiment-atomar/src/main/python/SentimentSpeech.py @@ -48,6 +48,11 @@ def sigmoid(x): 0: "positive", 1: "neutral", 2: "negative" + }, + "blanchefort/rubert-base-cased-sentiment":{ + 1: "positive", + 0: "neutral", + 2: "negative" } } diff --git a/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/Test_text.json b/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/Test_text.json new file mode 100644 index 00000000..679d6cdd --- /dev/null +++ b/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/Test_text.json @@ -0,0 +1,11 @@ +[ + {"text": "Я очень доволен сегодняшним днём!", "expected": "POSITIVE"}, + {"text": "Мне понравился фильм, он был великолепен.", "expected": "POSITIVE"}, + {"text": "Это лучшее, что случалось со мной за последнее время.", "expected": "POSITIVE"}, + {"text": "Сегодня обычный день, ничего особенного.", "expected": "NEUTRAL"}, + {"text": "Я иду в магазин за хлебом.", "expected": "NEUTRAL"}, + {"text": "Это предложение содержит только факты.", "expected": "NEUTRAL"}, + {"text": "Меня расстроило то, что произошло вчера.", "expected": "NEGATIVE"}, + {"text": "Мне не нравится эта музыка.", "expected": "NEGATIVE"}, + {"text": "Эта ошибка испортила весь проект.", "expected": "NEGATIVE"} +] \ No newline at end of file diff --git a/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/test_docker.sh b/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/test_docker.sh new file mode 100644 index 00000000..2c61e468 --- /dev/null +++ b/duui-transformers-sentiment-atomar/src/test/test_rubert_base_case_sentiment/test_docker.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# URL des Docker-Endpunkts +URL="http://localhost:9714/v1/process" + +# Datei mit Testtexten +TEST_FILE="Test_text.json" + +echo "Starte Tests" + +jq -c '.[]' "$TEST_FILE" | while read t; do + text=$(echo "$t" | jq -r '.text') + expected=$(echo "$t" | jq -r '.expected') + + # JSON für POST-Request bauen + payload=$(jq -n --arg txt "$text" '{ + doc_len: 1, + lang: "ru", + selections: [ + { + selection: "1", + sentences: [ + { text: $txt, begin: 0, end: ($txt | length) } + ] + } + ] + }') + + # curl Request + response=$(curl -s -X POST "$URL" -H "Content-Type: application/json" -d "$payload") + + echo "Text: $text" + echo "Expected: $expected" + echo "Scores:" + + # jq iteriert sicher über Labels und Scores + echo "$response" | jq -r ' + .results[0] as $labels | + .factors[0] as $scores | + to_entries[] | "\($labels[.key]): \($scores[.key])" + ' + + # Predicted = Label mit höchstem Score + max_index=$(echo "$response" | jq ' + .factors[0] | to_entries | max_by(.value) | .key + ') + pred_label=$(echo "$response" | jq -r ".results[0][$max_index]") + pred_score=$(echo "$response" | jq -r ".factors[0][$max_index]") + + echo "Predicted (highest score): $pred_label ($pred_score)" + echo "-----------------------------" +done \ No newline at end of file