diff --git a/duui-Climate/.dockerignore b/duui-Climate/.dockerignore
new file mode 100644
index 00000000..caab0c36
--- /dev/null
+++ b/duui-Climate/.dockerignore
@@ -0,0 +1,3 @@
+.idea/
+target/
+venv/
\ No newline at end of file
diff --git a/duui-Climate/.gitignore b/duui-Climate/.gitignore
new file mode 100644
index 00000000..d2092691
--- /dev/null
+++ b/duui-Climate/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+target/
+venv*/
\ No newline at end of file
diff --git a/duui-Climate/Readme.md b/duui-Climate/Readme.md
new file mode 100644
index 00000000..52078b0b
--- /dev/null
+++ b/duui-Climate/Readme.md
@@ -0,0 +1,90 @@
+[](https://docker.texttechnologylab.org/v2/duui-transformers-topic/tags/list)
+[]()
+[]()
+[]()
+
+# Transformers Climate
+
+DUUI implementation for selected Hugging-Face-based transformer [Climate tools](https://huggingface.co/models?sort=trending&search=climatebert) models.
+## Included Models
+
+| Name | | Revision | Languages |
+|-------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|--------------------------------|----------|
+| distilroberta-base-climate-sentiment | https://huggingface.co/climatebert/distilroberta-base-climate-sentiment | e9f9a94ee4263f5ad5cfc97b8539a497fc88aa7d | EN |
+| distilroberta-base-climate-tcfd | https://huggingface.co/climatebert/distilroberta-base-climate-tcfd | 970630beedc21db81a84156448ad2e3ac860153d | EN |
+| distilroberta-base-climate-commitment | https://huggingface.co/climatebert/distilroberta-base-climate-commitment | 17337c3292df16a8fe93b1505dfe4122d50a4c91 | EN |
+| distilroberta-base-climate-sentiment | https://huggingface.co/climatebert/distilroberta-base-climate-sentiment | e9f9a94ee4263f5ad5cfc97b8539a497fc88aa7d | EN |
+| distilroberta-base-climate-specificity | https://huggingface.co/climatebert/distilroberta-base-climate-specificity | 4ada96ed4bf5c3a7a711282e41f1ab9b29f0ddea | EN |
+
+# How To Use
+
+For using duui-climate as a DUUI image it is necessary to use the [Docker Unified UIMA Interface (DUUI)](https://github.com/texttechnologylab/DockerUnifiedUIMAInterface).
+
+## Start Docker container
+
+```
+docker run --rm -p 9714:9714 docker.texttechnologylab.org/duui-climate-[modelname]:latest
+
+```
+
+Find all available image tags here: [https://docker.texttechnologylab.org/v2/duui-climate-[modelname]/tags/list](https://docker.texttechnologylab.org/v2/duui-transformers-topic-[modelname]/tags/list)
+
+## Run within DUUI
+
+```
+composer.add(
+ new DUUIDockerDriver.Component("docker.texttechnologylab.org/duui-climate-[modelname]:latest")
+ .withParameter("selection", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence")
+);
+```
+
+### Parameters
+
+| Name | Description |
+| ---- | ----------- |
+| `selection` | Use `text` to process the full document text or any selectable UIMA type class name |
+
+# Cite
+
+If you want to use the DUUI image please quote this as follows:
+
+Alexander Leonhardt, Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. (2023). "Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI." Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399. [[LINK](https://aclanthology.org/2023.findings-emnlp.29)] [[PDF](https://aclanthology.org/2023.findings-emnlp.29.pdf)]
+
+## BibTeX
+
+```
+@inproceedings{Leonhardt:et:al:2023,
+ title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
+ author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
+ editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
+ booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
+ year = {2023},
+ address = {Singapore},
+ publisher = {Association for Computational Linguistics},
+ url = {https://aclanthology.org/2023.findings-emnlp.29},
+ pages = {385--399},
+ pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
+ abstract = {Automatic analysis of large corpora is a complex task, especially
+ in terms of time efficiency. This complexity is increased by the
+ fact that flexible, extensible text analysis requires the continuous
+ integration of ever new tools. Since there are no adequate frameworks
+ for these purposes in the field of NLP, and especially in the
+ context of UIMA, that are not outdated or unusable for security
+ reasons, we present a new approach to address the latter task:
+ Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
+ and feature-rich framework for automatic distributed analysis
+ of text corpora that leverages Big Data experience and virtualization
+ with Docker. We evaluate DUUI{'}s communication approach against
+ a state-of-the-art approach and demonstrate its outstanding behavior
+ in terms of time efficiency, enabling the analysis of big text
+ data.}
+}
+
+@misc{Bagci:2024,
+ author = {Bagci, Mevlüt},
+ title = {Hugging-Face-based climate models as {DUUI} component},
+ year = {2026},
+ howpublished = {https://github.com/texttechnologylab/duui-uima/tree/main/duui-Climate}
+}
+
+```
diff --git a/duui-Climate/docker_build.sh b/duui-Climate/docker_build.sh
new file mode 100644
index 00000000..0abfc296
--- /dev/null
+++ b/duui-Climate/docker_build.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+export ANNOTATOR_CUDA=
+#export ANNOTATOR_CUDA="-cuda"
+
+export ANNOTATOR_NAME=duui-climate
+export ANNOTATOR_VERSION=0.1.0
+export LOG_LEVEL=DEBUG
+export MODEL_CACHE_SIZE=3
+export DOCKER_REGISTRY="docker.texttechnologylab.org/"
+
+###---------------------------------------------------------------------
+#export MODEL_NAME="climatebert/distilroberta-base-climate-detector"
+#export MODEL_SPECNAME="distilroberta-base-climate-detector"
+#export MODEL_VERSION="2c3bc660d45a59e31b35f5d3e365ee4f59fdf76c"
+#export MODEL_SOURCE="https://huggingface.co/climatebert/distilroberta-base-climate-detector"
+#export MODEL_LANG="EN"
+###--------------------------------------------------------------------
+
+###---------------------------------------------------------------------
+#export MODEL_NAME="climatebert/distilroberta-base-climate-tcfd"
+#export MODEL_SPECNAME="distilroberta-base-climate-tcfd"
+#export MODEL_VERSION="970630beedc21db81a84156448ad2e3ac860153d"
+#export MODEL_SOURCE="https://huggingface.co/climatebert/distilroberta-base-climate-tcfd"
+#export MODEL_LANG="EN"
+###--------------------------------------------------------------------
+
+###---------------------------------------------------------------------
+#export MODEL_NAME="climatebert/distilroberta-base-climate-commitment"
+#export MODEL_SPECNAME="distilroberta-base-climate-commitment"
+#export MODEL_VERSION="17337c3292df16a8fe93b1505dfe4122d50a4c91"
+#export MODEL_SOURCE="https://huggingface.co/climatebert/distilroberta-base-climate-commitment"
+#export MODEL_LANG="EN"
+###--------------------------------------------------------------------
+
+###---------------------------------------------------------------------
+#export MODEL_NAME="climatebert/distilroberta-base-climate-sentiment"
+#export MODEL_SPECNAME="distilroberta-base-climate-sentiment"
+#export MODEL_VERSION="e9f9a94ee4263f5ad5cfc97b8539a497fc88aa7d"
+#export MODEL_SOURCE="https://huggingface.co/climatebert/distilroberta-base-climate-sentiment"
+#export MODEL_LANG="EN"
+###--------------------------------------------------------------------
+
+##---------------------------------------------------------------------
+export MODEL_NAME="climatebert/distilroberta-base-climate-specificity"
+export MODEL_SPECNAME="distilroberta-base-climate-specificity"
+export MODEL_VERSION="4ada96ed4bf5c3a7a711282e41f1ab9b29f0ddea"
+export MODEL_SOURCE="https://huggingface.co/climatebert/distilroberta-base-climate-specificity"
+export MODEL_LANG="EN"
+##--------------------------------------------------------------------
+
+
+
+docker build \
+ --build-arg ANNOTATOR_NAME \
+ --build-arg ANNOTATOR_VERSION \
+ --build-arg LOG_LEVEL \
+ --build-arg MODEL_CACHE_SIZE \
+ --build-arg MODEL_NAME \
+ --build-arg MODEL_VERSION \
+ --build-arg MODEL_SOURCE \
+ --build-arg MODEL_LANG \
+ -t ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
+ -f src/main/docker/Dockerfile${ANNOTATOR_CUDA} \
+ .
+
+docker tag \
+ ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
+ ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:latest${ANNOTATOR_CUDA}
diff --git a/duui-Climate/pom.xml b/duui-Climate/pom.xml
new file mode 100644
index 00000000..23c49fe7
--- /dev/null
+++ b/duui-Climate/pom.xml
@@ -0,0 +1,157 @@
+
+
+ 4.0.0
+
+ org.texttechnologylab.duui
+ duui-climate
+ 0.1.0
+
+
+
+ AGPL-3.0-or-later
+ https://www.gnu.org/licenses/agpl.txt
+ repo
+ GNU Affero General Public License v3.0 or later
+
+
+
+
+ Texttechnology Lab
+ https://www.texttechnologylab.org
+
+
+
+ mehler
+ Prof. Dr. Alexander Mehler
+ mehler@em.uni-frankfurt.de
+ https://www.texttechnologylab.org/team/alexander-abrami/
+ Goethe University Frankfurt / Texttechnology Lab
+ https://www.texttechnologylab.org
+
+ head of department
+
+
+
+ bagci
+ Mevlüt Bagci
+ bagci@em.uni-frankfurt.de
+ https://www.texttechnologylab.org/team/mevl%c3%bct-bagci/
+ Goethe University Frankfurt / Texttechnology Lab
+ https://www.texttechnologylab.org
+
+ lead developer
+
+ Europe/Berlin
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.22.0
+
+
+ --illegal-access=permit
+ --add-opens java.base/java.util=ALL-UNNAMED
+
+
+
+
+
+
+
+
+
+ 17
+ 17
+ 2.4.0
+
+
+
+
+
+
+ jitpack.io
+ https://jitpack.io
+
+
+
+
+
+
+ org.dkpro.core
+ dkpro-core-asl
+ ${dkpro.core.version}
+ pom
+ import
+
+
+
+
+
+
+
+ com.github.mevbagci
+ DockerUnifiedUIMAInterface
+
+
+ ad501be374
+
+
+
+
+
+
+
+
+ com.github.mevbagci
+ UIMATypeSystem
+ 3.0.23.1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ 5.9.0
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-api-segmentation-asl
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-io-xmi-asl
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-api-resources-asl
+ test
+
+
+
\ No newline at end of file
diff --git a/duui-Climate/requirements.txt b/duui-Climate/requirements.txt
new file mode 100644
index 00000000..c8109fba
--- /dev/null
+++ b/duui-Climate/requirements.txt
@@ -0,0 +1,14 @@
+torch==2.11.0
+torchaudio==2.11.0
+torchvision==0.26.0
+scipy==1.17.1
+transformers==5.9.0
+sentencepiece==0.2.1
+protobuf==4.25.3
+numpy==2.4.6
+scikit-learn==1.8.0
+fastapi==0.110.0
+dkpro-cassis==0.9.1
+uvicorn[standard]==0.27.1
+pydantic-settings==2.0.2
+torchmetrics==1.2.0
\ No newline at end of file
diff --git a/duui-Climate/src/main/docker/Dockerfile b/duui-Climate/src/main/docker/Dockerfile
new file mode 100644
index 00000000..69b89a12
--- /dev/null
+++ b/duui-Climate/src/main/docker/Dockerfile
@@ -0,0 +1,55 @@
+FROM python:3.12
+
+WORKDIR /usr/src/app
+
+EXPOSE 9714
+
+# dependencies
+COPY ./requirements.txt ./requirements.txt
+RUN pip install -r requirements.txt
+
+# copy scripts
+COPY ./src/main/python/TypeSystemTopic.xml ./TypeSystemTopic.xml
+COPY ./src/main/python/duui_climate.py ./duui_climate.py
+COPY ./src/main/python/duui_climate.lua ./duui_climate.lua
+COPY ./src/main/python/Climate.py ./Climate.py
+
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-detector'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-detector')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-tcfd'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-tcfd')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-commitment'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-commitment')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-sentiment'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-sentiment')"
+RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-specificity'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-specificity')"
+
+# log level
+ARG LOG_LEVEL="DEBUG"
+ENV LOG_LEVEL=$LOG_LEVEL
+
+# config
+ARG MODEL_CACHE_SIZE=3
+ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE
+
+# meta data
+ARG ANNOTATOR_NAME="duui-climate"
+ENV ANNOTATOR_NAME=$ANNOTATOR_NAME
+ARG ANNOTATOR_VERSION="unset"
+ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION
+
+# Model Info
+ARG MODEL_VERSION=0.1
+ENV MODEL_VERSION=$MODEL_VERSION
+ARG MODEL_NAME=""
+ENV MODEL_NAME=$MODEL_NAME
+ARG MODEL_SOURCE=""
+ENV MODEL_SOURCE=$MODEL_SOURCE
+ARG MODEL_LANG=""
+ENV MODEL_LANG=$MODEL_LANG
+
+# offline mode for huggingface
+ARG TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE=1
+ENV TRANSFORMERS_OFFLINE=$TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE
+
+
+
+
+ENTRYPOINT ["uvicorn", "duui_climate:app", "--host", "0.0.0.0", "--port" ,"9714"]
+CMD ["--workers", "1"]
diff --git a/duui-Climate/src/main/docker/Dockerfile-cuda b/duui-Climate/src/main/docker/Dockerfile-cuda
new file mode 100644
index 00000000..8d902811
--- /dev/null
+++ b/duui-Climate/src/main/docker/Dockerfile-cuda
@@ -0,0 +1,74 @@
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04
+
+RUN apt update && \
+ DEBIAN_FRONTEND=noninteractive \
+ apt install --no-install-recommends -y build-essential software-properties-common && \
+ add-apt-repository -y ppa:deadsnakes/ppa && \
+ apt install --no-install-recommends -y python3.10 python3-pip python3-setuptools python3-distutils && \
+ apt clean && rm -rf /var/lib/apt/lists/*
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+RUN python -m pip install --upgrade pip
+
+WORKDIR /usr/src/app
+
+EXPOSE 9714
+
+# dependencies
+RUN pip install setuptools wheel
+COPY ./requirements.txt ./requirements.txt
+RUN apt remove -y python3-blinker || true
+RUN pip install -r requirements.txt
+
+
+
+# dependencies
+COPY ./requirements.txt ./requirements.txt
+RUN pip install -r requirements.txt
+
+# copy scripts
+COPY ./src/main/python/TypeSystemTopic.xml ./TypeSystemTopic.xml
+COPY ./src/main/python/duui_genre.py ./duui_genre.py
+COPY ./src/main/python/duui_genre.lua ./duui_genre.lua
+COPY ./src/main/python/GenreSpeech.py ./GenreSpeech.py
+
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-detector'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-detector')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-tcfd'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-tcfd')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-commitment'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-commitment')"
+#RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-sentiment'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-sentiment')"
+RUN python -c "from transformers import AutoModelForSequenceClassification, AutoTokenizer; AutoTokenizer.from_pretrained('climatebert/distilroberta-base-climate-specificity'); AutoModelForSequenceClassification.from_pretrained('climatebert/distilroberta-base-climate-specificity')"
+
+# log level
+ARG LOG_LEVEL="DEBUG"
+ENV LOG_LEVEL=$LOG_LEVEL
+
+# config
+ARG MODEL_CACHE_SIZE=3
+ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE
+
+# meta data
+ARG ANNOTATOR_NAME="duui-climate"
+ENV ANNOTATOR_NAME=$ANNOTATOR_NAME
+ARG ANNOTATOR_VERSION="unset"
+ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION
+
+# Model Info
+ARG MODEL_VERSION=0.1
+ENV MODEL_VERSION=$MODEL_VERSION
+ARG MODEL_NAME=""
+ENV MODEL_NAME=$MODEL_NAME
+ARG MODEL_SOURCE=""
+ENV MODEL_SOURCE=$MODEL_SOURCE
+ARG MODEL_LANG=""
+ENV MODEL_LANG=$MODEL_LANG
+
+# offline mode for huggingface
+ARG TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE=1
+ENV TRANSFORMERS_OFFLINE=$TEXTIMAGER_DUUI_TRANSFORMERS_SENTIMENT_TRANSFORMERS_OFFLINE
+
+
+
+
+ENTRYPOINT ["uvicorn", "duui_climate:app", "--host", "0.0.0.0", "--port" ,"9714"]
+CMD ["--workers", "1"]
+
diff --git a/duui-Climate/src/main/python/Climate.py b/duui-Climate/src/main/python/Climate.py
new file mode 100644
index 00000000..b9ed6344
--- /dev/null
+++ b/duui-Climate/src/main/python/Climate.py
@@ -0,0 +1,53 @@
+import torch
+import math
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from scipy.special import softmax
+import numpy as np
+from typing import List
+
+model_name_map = {
+ "climatebert/distilroberta-base-climate-detector": "ClimateDetector",
+ "climatebert/distilroberta-base-climate-tcfd": "ClimateTCFD",
+ "climatebert/distilroberta-base-climate-commitment": "ClimateCommitment",
+ "climatebert/distilroberta-base-climate-sentiment": "ClimateSentiment",
+ "climatebert/distilroberta-base-climate-specificity": "ClimateSpecificity",
+}
+
+def sigmoid(x):
+ return 1 / (1 + math.exp(-x))
+
+class ClimateBert:
+ def __init__(self, model_name: str, device='cuda:0'):
+ self.device = device
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
+ self.class_mapping = self.model.config.id2label
+ self.labels = list(self.class_mapping.values())
+
+ def prediction(self, texts: List[str]):
+ with torch.no_grad():
+ inputs = self.tokenizer(
+ texts,
+ return_tensors="pt",
+ padding=True,
+ truncation=True,
+ max_length=512
+ ).to(self.device)
+
+ outputs = self.model(**inputs)
+ logits = outputs[0].float() # convert bfloat16 -> float32
+ probs = torch.softmax(logits, dim=-1)
+
+ score_list = []
+
+ for prob in probs.cpu():
+ ranking = torch.argsort(prob, descending=True)
+
+ score_dict_i = {
+ self.labels[i]: float(prob[i])
+ for i in ranking
+ }
+
+ score_list.append(score_dict_i)
+ return score_list
+
diff --git a/duui-Climate/src/main/python/TypeSystemTopic.xml b/duui-Climate/src/main/python/TypeSystemTopic.xml
new file mode 100644
index 00000000..dc052a36
--- /dev/null
+++ b/duui-Climate/src/main/python/TypeSystemTopic.xml
@@ -0,0 +1,132 @@
+
+
+
+
+ org.texttechnologylab.annotation.AnnotatorMetaData
+
+ uima.cas.AnnotationBase
+
+
+ reference
+
+ uima.cas.TOP
+
+
+ name
+
+ uima.cas.String
+
+
+ version
+
+ uima.cas.String
+
+
+ modelName
+
+ uima.cas.String
+
+
+ modelVersion
+
+ uima.cas.String
+
+
+
+
+ uima.tcas.DocumentAnnotation
+
+ uima.tcas.Annotation
+
+
+ language
+
+ uima.cas.String
+
+
+
+
+ org.texttechnologylab.annotation.DocumentModification
+
+ uima.cas.AnnotationBase
+
+
+ user
+
+ uima.cas.String
+
+
+ timestamp
+
+ uima.cas.Long
+
+
+ comment
+
+ uima.cas.String
+
+
+
+
+ org.hucompute.textimager.uima.type.Sentiment
+
+ uima.tcas.Annotation
+
+
+ sentiment
+
+ uima.cas.Double
+
+
+ subjectivity
+
+ uima.cas.Double
+
+
+
+
+ org.hucompute.textimager.uima.type.CategorizedSentiment
+
+ org.hucompute.textimager.uima.type.Sentiment
+
+
+ pos
+
+ uima.cas.Double
+
+
+ neu
+
+ uima.cas.Double
+
+
+ neg
+
+ uima.cas.Double
+
+
+
+
+ org.texttechnologylab.annotation.AnnotationComment
+
+ uima.cas.AnnotationBase
+
+
+ reference
+
+ uima.cas.TOP
+
+
+ value
+
+ uima.cas.String
+
+
+ key
+
+ uima.cas.String
+
+
+
+
+
diff --git a/duui-Climate/src/main/python/duui_climate.lua b/duui-Climate/src/main/python/duui_climate.lua
new file mode 100644
index 00000000..fcd1740f
--- /dev/null
+++ b/duui-Climate/src/main/python/duui_climate.lua
@@ -0,0 +1,133 @@
+StandardCharsets = luajava.bindClass("java.nio.charset.StandardCharsets")
+Class = luajava.bindClass("java.lang.Class")
+JCasUtil = luajava.bindClass("org.apache.uima.fit.util.JCasUtil")
+TopicUtils = luajava.bindClass("org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaUtils")
+
+function serialize(inputCas, outputStream, parameters)
+ local doc_lang = inputCas:getDocumentLanguage()
+ local doc_text = inputCas:getDocumentText()
+ local doc_len = TopicUtils:getDocumentTextLength(inputCas)
+
+ local selection_types = parameters["selection"]
+
+ local selections = {}
+ local selections_count = 1
+ for selection_type in string.gmatch(selection_types, "([^,]+)") do
+ local sentences = {}
+ if selection_type == "text" then
+ local s = {
+ text = doc_text,
+ begin = 0,
+ ['end'] = doc_len
+ }
+ sentences[1] = s
+ else
+ local sentences_count = 1
+ local clazz = Class:forName(selection_type);
+ local sentences_it = JCasUtil:select(inputCas, clazz):iterator()
+ while sentences_it:hasNext() do
+ local sentence = sentences_it:next()
+ local s = {
+ text = sentence:getCoveredText(),
+ begin = sentence:getBegin(),
+ ['end'] = sentence:getEnd()
+ }
+ sentences[sentences_count] = s
+ sentences_count = sentences_count + 1
+ end
+ end
+
+ local selection = {
+ sentences = sentences,
+ selection = selection_type
+ }
+ selections[selections_count] = selection
+ selections_count = selections_count + 1
+ end
+
+ outputStream:write(json.encode({
+ selections = selections,
+ lang = doc_lang,
+ doc_len = doc_len
+ }))
+end
+
+function deserialize(inputCas, inputStream)
+ local inputString = luajava.newInstance("java.lang.String", inputStream:readAllBytes(), StandardCharsets.UTF_8)
+ local results = json.decode(inputString)
+ if results["modification_meta"] ~= nil and results["meta"] ~= nil and results["results"] ~= nil then
+ -- print("GetInfo")
+ local source = results["model_source"]
+ local model_version = results["model_version"]
+ local model_name = results["model_name"]
+ local model_lang = results["model_lang"]
+ -- print("meta")
+ local modification_meta = results["modification_meta"]
+ local modification_anno = luajava.newInstance("org.texttechnologylab.annotation.DocumentModification", inputCas)
+ modification_anno:setUser(modification_meta["user"])
+ modification_anno:setTimestamp(modification_meta["timestamp"])
+ modification_anno:setComment(modification_meta["comment"])
+ modification_anno:addToIndexes()
+
+ -- print("setMetaData")
+ local model_meta = luajava.newInstance("org.texttechnologylab.annotation.model.MetaData", inputCas)
+ model_meta:setModelVersion(model_version)
+ -- print(model_version)
+ model_meta:setModelName(model_name)
+ -- print(model_name)
+ model_meta:setSource(source)
+ -- print(source)
+ model_meta:setLang(model_lang)
+ -- print(model_lang)
+ model_meta:addToIndexes()
+
+ local meta = results["meta"]
+ -- print("meta")
+ local begin_climate = results["begin"]
+ -- print("begin_emo")
+ local end_climate = results["end"]
+ -- print("end_emo")
+ local res_out = results["results"]
+-- print("results")
+ local res_len = results["len_results"]
+ -- print("Len_results")
+ local factors = results["factors"]
+ local maptype = results["model_type"]
+-- print(factors)
+ for index_i, res in ipairs(res_out) do
+ -- print(res)
+ local begin_climate_i = begin_climate[index_i]
+ -- print(begin_climate_i)
+ local end_climate_i = end_climate[index_i]
+ -- print(end_climate_i)
+ local len_i = res_len[index_i]
+ -- print(len_i)
+ -- print(type(len_i))
+ local climate_i = luajava.newInstance("org.texttechnologylab.annotation.Climate", inputCas, begin_climate_i, end_climate_i)
+ -- print(climate_i)
+ local fsarray = luajava.newInstance("org.apache.uima.jcas.cas.FSArray", inputCas, len_i)
+ -- print(fsarray)
+ climate_i:setClimates(fsarray)
+ local counter = 0
+ local factor_i = factors[index_i]
+ -- print(factor_i)
+ for index_j, climate_j in ipairs(res) do
+ -- print(climate_j)
+ local factor_j = factor_i[index_j]
+ -- print(factor_j)
+ climate_in_i = luajava.newInstance("org.texttechnologylab.annotation.AnnotationComment", inputCas)
+ climate_in_i:setReference(climate_i)
+ climate_in_i:setKey(climate_j)
+ climate_in_i:setValue(factor_j)
+ climate_in_i:addToIndexes()
+ climate_i:setClimates(counter, climate_in_i)
+ counter = counter + 1
+ end
+ climate_i:setModel(model_meta)
+ climate_i:setClimateType(maptype)
+ climate_i:addToIndexes()
+ -- print("add")
+ end
+ end
+ -- print("end")
+ end
diff --git a/duui-Climate/src/main/python/duui_climate.py b/duui-Climate/src/main/python/duui_climate.py
new file mode 100644
index 00000000..1fd9390c
--- /dev/null
+++ b/duui-Climate/src/main/python/duui_climate.py
@@ -0,0 +1,286 @@
+from pydantic import BaseModel
+from pydantic_settings import BaseSettings
+from typing import List, Optional, Dict, Union
+import logging
+from time import time
+from fastapi import FastAPI, Response
+from cassis import load_typesystem
+import torch
+from threading import Lock
+from functools import lru_cache
+from Climate import ClimateBert,model_name_map
+# from sp_correction import SentenceBestPrediction
+
+# Settings
+# These are automatically loaded from env variables
+from starlette.responses import PlainTextResponse
+
+model_lock = Lock()
+
+
+class UimaSentence(BaseModel):
+ text: str
+ begin: int
+ end: int
+
+
+class UimaSentenceSelection(BaseModel):
+ selection: str
+ sentences: List[UimaSentence]
+
+
+class Settings(BaseSettings):
+ # Name of this annotator
+ annotator_name: str
+ # Version of this annotator
+ annotator_version: str
+ # Log level
+ log_level: str
+ # model_name
+ model_name: str
+ # Name of this annotator
+ model_version: str
+ #cach_size
+ model_cache_size: int
+ # url of the model
+ model_source: str
+ # language of the model
+ model_lang: str
+
+
+# Load settings from env vars
+settings = Settings()
+lru_cache_with_size = lru_cache(maxsize=settings.model_cache_size)
+logging.basicConfig(level=settings.log_level)
+logger = logging.getLogger(__name__)
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# device = "cpu"
+logger.info(f'USING {device}')
+# Load the predefined typesystem that is needed for this annotator to work
+typesystem_filename = 'TypeSystemTopic.xml'
+logger.debug("Loading typesystem from \"%s\"", typesystem_filename)
+with open(typesystem_filename, 'rb') as f:
+ typesystem = load_typesystem(f)
+ logger.debug("Base typesystem:")
+ logger.debug(typesystem.to_xml())
+
+# Load the Lua communication script
+lua_communication_script_filename = "duui_climate.lua"
+logger.debug("Loading Lua communication script from \"%s\"", lua_communication_script_filename)
+
+
+# Request sent by DUUI
+# Note, this is transformed by the Lua script
+class DUUIRequest(BaseModel):
+ # The texts language
+ doc_len: int
+ #
+ lang: str
+ #
+ selections: List[UimaSentenceSelection]
+ #
+
+
+# UIMA type: mark modification of the document
+class DocumentModification(BaseModel):
+ user: str
+ timestamp: int
+ comment: str
+
+
+# UIMA type: adds metadata to each annotation
+class AnnotationMeta(BaseModel):
+ name: str
+ version: str
+ modelName: str
+ modelVersion: str
+
+
+# Response sent by DUUI
+# Note, this is transformed by the Lua script
+class DUUIResponse(BaseModel):
+ # Symspelloutput
+ # List of Sentence with every token
+ # Every token is a dictionary with following Infos:
+ # Symspelloutput right if the token is correct, wrong if the token is incorrect, skipped if the token was skipped, unkownn if token can corrected with Symspell
+ # If token is unkown it will be predicted with BERT Three output pos:
+ # 1. Best Prediction with BERT MASKED
+ # 2. Best Cos-sim with Sentence-Bert and with perdicted words of BERT MASK
+ # 3. Option 1 and 2 together
+ meta: AnnotationMeta
+ # Modification meta, one per document
+ modification_meta: DocumentModification
+ begin: List[int]
+ end: List[int]
+ results: List
+ factors: List
+ len_results: List[int]
+ model_name: str
+ model_version: str
+ model_source: str
+ model_lang: str
+ model_type: str
+
+
+app = FastAPI(
+ openapi_url="/openapi.json",
+ docs_url="/api",
+ redoc_url=None,
+ title=settings.annotator_name,
+ description="Factuality annotator",
+ version=settings.annotator_version,
+ terms_of_service="https://www.texttechnologylab.org/legal_notice/",
+ contact={
+ "name": "TTLab Team",
+ "url": "https://texttechnologylab.org",
+ "email": "bagci@em.uni-frankfurt.de",
+ },
+ license_info={
+ "name": "AGPL",
+ "url": "http://www.gnu.org/licenses/agpl-3.0.en.html",
+ },
+)
+
+with open(lua_communication_script_filename, 'rb') as f:
+ lua_communication_script = f.read().decode("utf-8")
+logger.debug("Lua communication script:")
+logger.debug(lua_communication_script_filename)
+
+
+# Get typesystem of this annotator
+@app.get("/v1/typesystem")
+def get_typesystem() -> Response:
+ # TODO remove cassis dependency, as only needed for typesystem at the moment?
+ xml = typesystem.to_xml()
+ xml_content = xml.encode("utf-8")
+
+ return Response(
+ content=xml_content,
+ media_type="application/xml"
+ )
+
+
+# Return Lua communication script
+@app.get("/v1/communication_layer", response_class=PlainTextResponse)
+def get_communication_layer() -> str:
+ return lua_communication_script
+
+
+# Return documentation info
+@app.get("/v1/documentation")
+def get_documentation():
+ return "Test"
+
+
+@lru_cache_with_size
+def load_model(model_name):
+ model_i = ClimateBert(model_name, device)
+ return model_i
+
+
+def fix_unicode_problems(text):
+ # fix emoji in python string and prevent json error on response
+ # File "/usr/local/lib/python3.8/site-packages/starlette/responses.py", line 190, in render
+ # UnicodeEncodeError: 'utf-8' codec can't encode characters in position xx-yy: surrogates not allowed
+ clean_text = text.encode('utf-16', 'surrogatepass').decode('utf-16', 'surrogateescape')
+ return clean_text
+
+
+def process_selection(model_name, selection):
+ begin = []
+ end = []
+ results_out = []
+ factors = []
+ len_results = []
+ for s in selection.sentences:
+ s.text = fix_unicode_problems(s.text)
+
+ texts = [
+ s.text
+ for s in selection.sentences
+ ]
+ logger.debug("Preprocessed texts:")
+ logger.debug(texts)
+ model_map = "others"
+
+ with model_lock:
+ if model_name in model_name_map:
+ model_map = model_name_map[model_name]
+ classifier = load_model(model_name)
+
+ results = classifier.prediction(texts)
+ for c, res in enumerate(results):
+ res_i = []
+ factor_i = []
+ sentence_i = selection.sentences[c]
+ begin_i = sentence_i.begin
+ end_i = sentence_i.end
+ len_rel = len(res)
+ begin.append(begin_i)
+ end.append(end_i)
+ for i in res:
+ res_i.append(i)
+ factor_i.append(res[i])
+ len_results.append(len_rel)
+ results_out.append(res_i)
+ factors.append(factor_i)
+ output = {
+ "begin": begin,
+ "end": end,
+ "len_results": len_results,
+ "results": results_out,
+ "factors": factors,
+ "model_type": model_map
+ }
+
+ return output
+
+
+# Process request from DUUI
+@app.post("/v1/process")
+def post_process(request: DUUIRequest):
+ # Return data
+ meta = None
+ begin = []
+ end = []
+ len_results = []
+ results = []
+ factors = []
+ model_type = "others"
+ # Save modification start time for later
+ modification_timestamp_seconds = int(time())
+ try:
+ model_source = settings.model_source
+ model_lang = settings.model_lang
+ model_version = settings.model_version
+ # set meta Informations
+ meta = AnnotationMeta(
+ name=settings.annotator_name,
+ version=settings.annotator_version,
+ modelName=settings.model_name,
+ modelVersion=model_version,
+ )
+ if settings.model_name in model_name_map:
+ model_type = model_name_map[settings.model_name]
+ # Add modification info
+ modification_meta_comment = f"{settings.annotator_name} ({settings.annotator_version}))"
+ modification_meta = DocumentModification(
+ user=settings.annotator_name,
+ timestamp=modification_timestamp_seconds,
+ comment=modification_meta_comment
+ )
+ mv = ""
+
+ for selection in request.selections:
+ processed_sentences = process_selection(settings.model_name, selection)
+ begin = begin + processed_sentences["begin"]
+ end = end + processed_sentences["end"]
+ len_results = len_results + processed_sentences["len_results"]
+ results = results + processed_sentences["results"]
+ factors = factors + processed_sentences["factors"]
+ except Exception as ex:
+ logger.exception(ex)
+ return DUUIResponse(meta=meta, modification_meta=modification_meta, begin=begin, end=end, results=results,
+ len_results=len_results, factors=factors, model_name=settings.model_name,
+ model_version=model_version, model_source=model_source, model_lang=model_lang, model_type=model_type)
diff --git a/duui-Climate/src/test/java/org/hucompute/textimager/uima/climate/ClimateTest.java b/duui-Climate/src/test/java/org/hucompute/textimager/uima/climate/ClimateTest.java
new file mode 100644
index 00000000..23db65e7
--- /dev/null
+++ b/duui-Climate/src/test/java/org/hucompute/textimager/uima/climate/ClimateTest.java
@@ -0,0 +1,183 @@
+package org.hucompute.textimager.uima.climate;
+
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.util.XmlCasSerializer;
+import org.junit.jupiter.api.*;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIRemoteDriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaContext;
+import org.xml.sax.SAXException;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.UnknownHostException;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.texttechnologylab.annotation.Climate;
+import org.texttechnologylab.annotation.AnnotationComment;
+
+public class ClimateTest {
+ static DUUIComposer composer;
+ static JCas cas;
+
+ static String url = "http://127.0.0.1:9714";
+// static String url = "http://tweentopic.service.component.duui.texttechnologylab.org";
+// static String model = "chkla/parlbert-topic-german";
+
+ @BeforeAll
+ static void beforeAll() throws URISyntaxException, IOException, UIMAException, SAXException, CompressorException {
+ composer = new DUUIComposer()
+ .withSkipVerification(true)
+ .withLuaContext(new DUUILuaContext().withJsonLibrary());
+
+ DUUIRemoteDriver remoteDriver = new DUUIRemoteDriver();
+ composer.addDriver(remoteDriver);
+// DUUIDockerDriver docker_driver = new DUUIDockerDriver();
+// composer.addDriver(docker_driver);
+
+
+ cas = JCasFactory.createJCas();
+ }
+
+ @AfterAll
+ static void afterAll() throws UnknownHostException {
+ composer.shutdown();
+ }
+
+ @AfterEach
+ public void afterEach() throws IOException, SAXException {
+ composer.resetPipeline();
+
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ XmlCasSerializer.serialize(cas.getCas(), null, stream);
+ System.out.println(stream.toString(StandardCharsets.UTF_8));
+
+ cas.reset();
+ }
+
+ public void createCas(String language, List sentences) throws UIMAException {
+ cas.setDocumentLanguage(language);
+
+ StringBuilder sb = new StringBuilder();
+ for (String sentence : sentences) {
+ Sentence sentenceAnnotation = new Sentence(cas, sb.length(), sb.length()+sentence.length());
+ sentenceAnnotation.addToIndexes();
+ sb.append(sentence).append(" ");
+ }
+
+ cas.setDocumentText(sb.toString());
+ }
+
+ @Test
+ public void DeTest() throws Exception {
+ HashMap> expected1 = new HashMap<>();
+ ArrayList expected2 = new ArrayList<>();
+ expected2.add("Domestic");
+ expected2.add("Technology");
+ expected1.put("test", expected2);
+ composer.add(
+ new DUUIRemoteDriver.Component(url)
+ .withParameter("selection", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence")
+ );
+
+ List sentences = Arrays.asList(
+ "Ich bin ein Profi-Fußballspieler und spiele bei FC Barcelona in Spanien.",
+ "Das sind die Aktuellen Neuigkeiten aus den USA. Joe Biden hat die Wahl gewonnen."
+ );
+
+ createCas("de", sentences);
+ composer.run(cas);
+
+ Collection all_climates = JCasUtil.select(cas, Climate.class);
+ ArrayList