Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions duui-relation-extraction/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Relation Extraction

DUUI implementation for selected Hugging-Face-based transformer [relation-extraction tools](https://huggingface.co/models?other=relation-extraction) models.

## Included Models

| Name | link | Revision | Languages |
|----------------------------------------|----------------------------------------------------------------------------------|-------------------------------------------|-------------------|
| Babelscape/rebel-large | https://huggingface.co/Babelscape/rebel-large | 44eb6cb4585df284ce6c4d6a7013f83fe473c052 | Multilingual |
| ibm-research/knowgl-large | https://huggingface.co/ibm-research/knowgl-large | 94596fd9f697498f7ee7363dbf4cc66f08d499e8 | Multilingual |

## Execution

Docker starten: docker run -p 8000:8000 -e MODEL_NAME=[MODEL_NAME] [IMAGE_NAME]
64 changes: 64 additions & 0 deletions duui-relation-extraction/docker_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash
set -euo pipefail

#export ANNOTATOR_CUDA=
export ANNOTATOR_CUDA="-cuda"

export ANNOTATOR_NAME=duui-relation-extraction
export ANNOTATOR_VERSION=0.4.0
export LOG_LEVEL=DEBUG
export MODEL_CACHE_SIZE=3
export DOCKER_REGISTRY="docker.texttechnologylab.org/"

####---------------------------------------------------------------------
export MODEL_NAME="Babelscape/rebel-large"
export MODEL_SPECNAME="rebel-large"
export MODEL_VERSION="44eb6cb4585df284ce6c4d6a7013f83fe473c052"
export MODEL_SOURCE="https://huggingface.co/Babelscape/rebel-large"
export MODEL_LANG="Multilingual"
####--------------------------------------------------------------------

docker build \
--build-arg ANNOTATOR_NAME \
--build-arg ANNOTATOR_VERSION \
--build-arg LOG_LEVEL \
--build-arg MODEL_CACHE_SIZE \
--build-arg MODEL_NAME \
--build-arg MODEL_VERSION \
--build-arg MODEL_SOURCE \
--build-arg MODEL_LANG \
-t ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
-f src/main/docker/Dockerfile${ANNOTATOR_CUDA} \
.

docker tag \
${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:latest${ANNOTATOR_CUDA}


####---------------------------------------------------------------------
export MODEL_NAME="ibm-research/knowgl-large"
export MODEL_SPECNAME="knowgl-large"
export MODEL_VERSION="94596fd9f697498f7ee7363dbf4cc66f08d499e8"
export MODEL_SOURCE="https://huggingface.co/ibm-research/knowgl-large"
export MODEL_LANG="Multilingual"
####--------------------------------------------------------------------


docker build \
--build-arg ANNOTATOR_NAME \
--build-arg ANNOTATOR_VERSION \
--build-arg LOG_LEVEL \
--build-arg MODEL_CACHE_SIZE \
--build-arg MODEL_NAME \
--build-arg MODEL_VERSION \
--build-arg MODEL_SOURCE \
--build-arg MODEL_LANG \
-t ${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
-f src/main/docker/Dockerfile${ANNOTATOR_CUDA} \
.

docker tag \
${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:${ANNOTATOR_VERSION}${ANNOTATOR_CUDA} \
${DOCKER_REGISTRY}${ANNOTATOR_NAME}"-"${MODEL_SPECNAME}:latest${ANNOTATOR_CUDA}

7 changes: 7 additions & 0 deletions duui-relation-extraction/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fastapi
uvicorn
torch
transformers
pydantic
pydantic-settings
sentencepiece
23 changes: 23 additions & 0 deletions duui-relation-extraction/src/main/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM python:3.10

WORKDIR /app

COPY ../../../requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY ../python/TypSystemRelation.xml ./TypSystemRelation.xml
COPY ./src/main/python/duui_relation_extraction.py ./duui_relation_extraction.py
COPY ./src/main/python/duui_relation_extraction.lua ./duui_relation_extraction.lua
COPY ./src/main/python/relation_extraction.py ./relation_extraction.py

COPY ./src/main/python/offline_models/rebel_large ./offline_models/rebel_large
COPY ./src/main/python/offline_models/knowgl-large ./offline_models/knowgl-large

ARG MODEL_NAME="rebel_large"
ENV MODEL_NAME=$MODEL_NAME

EXPOSE 8000

ENV DEVICE=cpu

CMD ["uvicorn", "duui_relation_extraction:duui_relation_extraction", "--host", "0.0.0.0", "--port", "8000"]
62 changes: 62 additions & 0 deletions duui-relation-extraction/src/main/docker/Dockerfile-cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
FROM nvidia/cuda:11.8.0-base-ubuntu22.04

RUN apt update && \
DEBIAN_FRONTEND=noninteractive \
apt install --no-install-recommends -y build-essential software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt install --no-install-recommends -y python3.10 python3-pip python3-setuptools python3-distutils && \
apt clean && rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade pip

WORKDIR /usr/src/app

EXPOSE 8000

# dependencies
RUN pip install setuptools wheel
COPY ./requirements.txt ./requirements.txt
RUN apt remove -y python3-blinker || true
RUN pip install -r requirements.txt

RUN python -c "from transformers import pipeline; pipeline('text-classification', model='Babelscape/rebel-large')"
RUN python -c "from transformers import pipeline; pipeline('text-classification', model='ibm-research/knowgl-large')"

# copy scripts
COPY ./src/main/python/TypSystemRelation.xml ./TypSystemRelation.xml
COPY ./src/main/python/duui_relation_extraction.py ./duui_relation_extraction.py
COPY ./src/main/python/duui_relation_extraction.lua ./duui_relation_extraction.lua
COPY ./src/main/python/relation_extraction.py ./relation_extraction.py

# log level
ARG LOG_LEVEL="DEBUG"
ENV LOG_LEVEL=$LOG_LEVEL

# config
ARG MODEL_CACHE_SIZE=3
ENV MODEL_CACHE_SIZE=$MODEL_CACHE_SIZE

# meta data
ARG ANNOTATOR_NAME="duui-transformers-emotion"
ENV ANNOTATOR_NAME=$ANNOTATOR_NAME
ARG ANNOTATOR_VERSION="unset"
ENV ANNOTATOR_VERSION=$ANNOTATOR_VERSION

# Model Info
ARG MODEL_VERSION=0.1
ENV MODEL_VERSION=$MODEL_VERSION
ARG MODEL_NAME=""
ENV MODEL_NAME=$MODEL_NAME
ARG MODEL_SOURCE=""
ENV MODEL_SOURCE=$MODEL_SOURCE
ARG MODEL_LANG=""
ENV MODEL_LANG=$MODEL_LANG

# offline mode for huggingface
ARG DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE=1
ENV TRANSFORMERS_OFFLINE=$DUUI_TRANSFORMERS_TRANSFORMERS_OFFLINE


ENTRYPOINT ["uvicorn", "duui_relation_extraction:duui_relation_extraction", "--host", "0.0.0.0", "--port" ,"8000"]
CMD ["--workers", "1"]
28 changes: 28 additions & 0 deletions duui-relation-extraction/src/main/python/TypSystemRelation.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<TypeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
<types>
<typeDescription>
<name>Relation</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
<description>Relation extracted by REBEL</description>
<features>
<featureDescription>
<name>subject</name>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>predicate</name>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>object</name>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>confidence</name>
<rangeTypeName>uima.cas.Float</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</TypeSystemDescription>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- DUUI Lua mapping for Relation Extraction
-- Maps DUUIRequest → Python
-- Maps Python Response → DUUI Annotations

function map_request(doc)
return {
doc_len = #doc.text,
lang = doc.lang or "en",
selections = doc.sentences
}
end

function map_response(response)
local annotations = {}
for i, sentence_relations in ipairs(response.relations) do
for _, triplet in ipairs(sentence_relations) do
table.insert(annotations, {
subject = triplet.subject,
predicate = triplet.predicate,
object = triplet.object,
confidence = triplet.confidence
})
end
end
return annotations
end
126 changes: 126 additions & 0 deletions duui-relation-extraction/src/main/python/duui_relation_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
DUUI Endpunkt
"""

from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
from relation_extraction import RebelRelationExtraction, KnowGLRelationExtraction, GLiNERRelationExtraction
from functools import lru_cache
from threading import Lock
import torch
from time import time
import os

model_lock = Lock()
device = os.getenv("DEVICE", "cpu")

MODELS = {
"Babelscape/rebel-large": {
"path": "Babelscape/rebel-large",
"class": RebelRelationExtraction
},
"ibm-research/knowgl-large": {
"path": "ibm-research/knowgl-large",
"class": KnowGLRelationExtraction
}
}

model_name = os.getenv("MODEL_NAME")

MODEL_PATH = MODELS[model_name]["path"]

model_class = MODELS[model_name]["class"]

class UimaSentence(BaseModel):
text: str
begin: int
end: int

class UimaSentenceSelection(BaseModel):
selection: str
sentences: List[UimaSentence]

class DUUIRequest(BaseModel):
doc_len: int
lang: str
selections: List[UimaSentenceSelection]

class DocumentModification(BaseModel):
user: str
timestamp: int
comment: str

class AnnotationMeta(BaseModel):
name: str
version: str
modelName: str
modelVersion: str

class DUUIResponse(BaseModel):
meta: AnnotationMeta
modification_meta: DocumentModification
begins: List[int]
ends: List[int]
relations: List[List[dict]]
model_name: str
model_version: str
model_source: str
model_lang: str

duui_relation_extraction = FastAPI(title="DUUI Relation Extraction", version="1.0")

@lru_cache()
def load_model():
return model_class(MODEL_PATH, device)

def process_selection(selection: UimaSentenceSelection):
begins, ends, relations = [], [], []

texts = [f"sentence: {s.text}" for s in selection.sentences]
with model_lock:
model = load_model()
results = model.extract_relations(texts)

for idx, sentence in enumerate(selection.sentences):
begins.append(sentence.begin)
ends.append(sentence.end)
relations.append(results[idx])

return begins, ends, relations

@duui_relation_extraction.post("/v1/process")
def post_process(request: DUUIRequest):
modification_timestamp_seconds = int(time())
begins, ends, relations = [], [], []

meta = AnnotationMeta(
name="REBEL Relation Extractor",
version="1.0",
modelName="Babelscape/rebel-large",
modelVersion="1.0"
)

modification_meta = DocumentModification(
user="REBEL RE",
timestamp=modification_timestamp_seconds,
comment="Relation Extraction"
)

for selection in request.selections:
b, e, r = process_selection(selection)
begins.extend(b)
ends.extend(e)
relations.extend(r)

return DUUIResponse(
meta=meta,
modification_meta=modification_meta,
begins=begins,
ends=ends,
relations=relations,
model_name="Babelscape/rebel-large",
model_version="1.0",
model_source="HuggingFace",
model_lang="multilingual"
)
Loading