diff --git a/duui-taxon-resolver/.gitignore b/duui-taxon-resolver/.gitignore
new file mode 100644
index 0000000..fc03531
--- /dev/null
+++ b/duui-taxon-resolver/.gitignore
@@ -0,0 +1,11 @@
+### IDE Files ###
+.idea/
+.vscode/
+
+### Java Environment ###
+target/
+
+### Python Environment ###
+__pycache__/
+*.pyc
+.venv/
diff --git a/duui-taxon-resolver/DOCKERFILE b/duui-taxon-resolver/DOCKERFILE
new file mode 100644
index 0000000..08473f5
--- /dev/null
+++ b/duui-taxon-resolver/DOCKERFILE
@@ -0,0 +1,21 @@
+FROM python:3.12.3
+
+WORKDIR /app
+
+RUN pip install --upgrade pip
+
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY src/main/python/*.py ./
+COPY src/main/lua/communication_layer.lua ./
+COPY src/main/resources/typesystem.xml ./
+
+# Preload the backbone data to avoid doing it at runtime when the first request arrives
+RUN python -c "import taxref_loader; taxref_loader.preload_backbone()"
+
+ENV TAXON_RESOLVER_EXECUTION_MODE=production
+
+EXPOSE 9714
+ENTRYPOINT ["uvicorn", "taxon-resolver:app", "--host", "0.0.0.0", "--port", "9714"]
+CMD ["--workers", "1"]
diff --git a/duui-taxon-resolver/docker_build.sh b/duui-taxon-resolver/docker_build.sh
new file mode 100644
index 0000000..43495a7
--- /dev/null
+++ b/duui-taxon-resolver/docker_build.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# set default values for build args if not provided
+ANNOTATOR_NAME="${ANNOTATOR_NAME:-duui-taxon-resolver}"
+ANNOTATOR_VERSION="${ANNOTATOR_VERSION:-1.0.0}"
+LOG_LEVEL="${LOG_LEVEL:-INFO}"
+
+# Check if BUILD_TOOL is set, otherwise check for podman or docker
+if [ -n "${BUILD_TOOL:-}" ]; then
+ echo "⚙️ Using build tool: ${BUILD_TOOL}"
+# Test if docker is available and can be used
+elif (command -v docker > /dev/null 2>&1;) && (docker info > /dev/null 2>&1;) then
+ BUILD_TOOL="docker"
+ echo "⚙️ Using Docker as build tool"
+elif (command -v podman > /dev/null 2>&1;) && (podman info > /dev/null 2>&1;) then
+ BUILD_TOOL="podman"
+ echo "⚙️ Using Podman as build tool"
+else
+ echo "❌ Error: No build tool found or permissions missing. Please install Docker or Podman and ensure you have permission to run it."
+ exit 1
+fi
+
+${BUILD_TOOL} build \
+ --env TAXON_RESOLVER_ANNOTATOR_NAME="${ANNOTATOR_NAME}" \
+ --env TAXON_RESOLVER_ANNOTATOR_VERSION="${ANNOTATOR_VERSION}" \
+ --env TAXON_RESOLVER_LOG_LEVEL="${LOG_LEVEL}" \
+ -t "${ANNOTATOR_NAME}:${ANNOTATOR_VERSION}" \
+ -f DOCKERFILE \
diff --git a/duui-taxon-resolver/pom.xml b/duui-taxon-resolver/pom.xml
new file mode 100644
index 0000000..bbe6b9d
--- /dev/null
+++ b/duui-taxon-resolver/pom.xml
@@ -0,0 +1,94 @@
+
+
+ 4.0.0
+
+ org.texttechnologylab
+ taxon-resolver
+ 1.0-SNAPSHOT
+
+
+ 21
+ 21
+ UTF-8
+ UTF-8
+ 1.5.5
+ 3.0.14
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+ true
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.22.0
+
+
+ --illegal-access=permit
+ --add-opens java.base/java.util=ALL-UNNAMED
+
+
+
+
+ org.apache.maven.plugins
+ maven-failsafe-plugin
+ 2.22.0
+
+
+ --illegal-access=permit
+ --add-opens java.base/java.util=ALL-UNNAMED
+
+
+
+
+
+
+
+
+ jitpack.io
+ https://jitpack.io
+
+
+
+
+ com.github.texttechnologylab
+ DockerUnifiedUIMAInterface
+ ${ttlab.duui.version}
+
+
+ com.github.texttechnologylab
+ UIMATypeSystem
+
+
+
+
+
+ com.github.texttechnologylab
+ UIMATypeSystem
+ ${ttlab.typesystem.version}
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ 6.1.0-M1
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-io-xmi-asl
+ 2.4.0
+ test
+
+
+
+
\ No newline at end of file
diff --git a/duui-taxon-resolver/requirements.txt b/duui-taxon-resolver/requirements.txt
new file mode 100644
index 0000000..bbb6961
--- /dev/null
+++ b/duui-taxon-resolver/requirements.txt
@@ -0,0 +1,14 @@
+annotated-types==0.7.0
+anyio==3.7.1
+attrs==25.4.0
+fastapi==0.104.1
+ipykernel==7.2.0
+pandas==3.0.3
+pydantic==2.13.4
+pydantic-settings==2.0.3
+python-dotenv==1.2.2
+requests==2.34.2
+# Taxoniq resolves NCBI taxonomic identifiers
+taxoniq==1.0.3
+urllib3==2.6.3
+uvicorn==0.46.0
diff --git a/duui-taxon-resolver/src/main/lua/communication_layer.lua b/duui-taxon-resolver/src/main/lua/communication_layer.lua
new file mode 100644
index 0000000..ded8396
--- /dev/null
+++ b/duui-taxon-resolver/src/main/lua/communication_layer.lua
@@ -0,0 +1,339 @@
+-- Bind static classes from java
+StandardCharsets = luajava.bindClass("java.nio.charset.StandardCharsets")
+JCasUtil = luajava.bindClass("org.apache.uima.fit.util.JCasUtil")
+Class = luajava.bindClass("java.lang.Class")
+AnnotationCommentClass = Class:forName("org.texttechnologylab.annotation.AnnotationComment")
+TaxonClassBio = Class:forName("org.texttechnologylab.annotation.biofid.Taxon")
+TaxonClass = Class:forName("org.texttechnologylab.annotation.type.Taxon")
+
+--function instanceOf(clazz, object)
+ -- local object_class = object:getClass()
+ -- local object_class_name = tostring(object_class)
+ -- local clazz_name = tostring(clazz)
+ --local is_instance = object_class_name == clazz_name
+ -- return is_instance
+--end
+
+function selectAnnotationComments(view)
+ local annotation_comments = {}
+
+ local foundAC = false
+
+ local selection_iterator = JCasUtil:select(view, AnnotationCommentClass):iterator()
+ while selection_iterator:hasNext() do
+ local annotation_comment = selection_iterator:next()
+ local ref = annotation_comment:getReference()
+ if (TaxonClassBio:isInstance(ref) or TaxonClass:isInstance(ref)) then
+ table.insert(annotation_comments, annotation_comment)
+ foundAC = true
+ end
+ end
+
+ -- Tools such as GNFinder do not add AnnotationComments, but only link the taxon to the recognized taxon.
+ -- In this case, we create an AnnotationComment for each taxon, so that the linking information can be stored in the value of the comment.
+ if foundAC == false then
+ local biofid_taxon_iter = JCasUtil:select(view, TaxonClassBio):iterator()
+ while biofid_taxon_iter:hasNext() do
+ local biofid_taxon = biofid_taxon_iter:next()
+ local cID = luajava.newInstance("org.texttechnologylab.annotation.AnnotationComment", view)
+
+ cID:setKey("linking")
+ cID:setValue(biofid_taxon:getIdentifier())
+ cID:setReference(biofid_taxon)
+ cID:addToIndexes()
+
+ table.insert(annotation_comments, cID)
+ end
+ end
+
+ return annotation_comments
+end
+
+function serialize(inputCas, outputStream, parameters)
+ local document_text = inputCas:getDocumentText()
+
+ local annotation_comments = selectAnnotationComments(inputCas)
+ local recognized_taxa = {}
+ for _, annotation_comment in ipairs(annotation_comments) do
+ local taxon = annotation_comment:getReference()
+ local begin = taxon:getBegin()
+ -- insert taxon collection by begin position, if not already present
+ local recognized_taxon = recognized_taxa[begin]
+ if recognized_taxon == nil then
+ local end_ = taxon:getEnd()
+ recognized_taxon = {
+ -- text = text,
+ linkings = {}
+ }
+ recognized_taxon["begin"] = begin
+ recognized_taxon["end"] = end_
+ recognized_taxa[begin] = recognized_taxon
+ end
+ local comment_key = annotation_comment:getKey()
+ if comment_key == "linking" then
+ local comment_value = annotation_comment:getValue()
+ table.insert(recognized_taxon.linkings, comment_value)
+ end
+ end
+ local recognized_taxa_list = {}
+ for _, recognized_taxon in pairs(recognized_taxa) do
+ table.insert(recognized_taxa_list, recognized_taxon)
+ end
+
+ outputStream:write(json.encode({
+ taxa = recognized_taxa_list,
+ document_text = document_text
+ }))
+end
+
+function populateTaxonResolution(taxon_resolution, properties)
+ taxon_resolution:setProvider(properties["provider"])
+ taxon_resolution:setTaxonId(properties["taxon_id"])
+
+ local kingdom_name = properties["kingdom_name"]
+ if kingdom_name ~= nil then
+ taxon_resolution:setKingdomName(kingdom_name)
+ end
+ local kingdom_id = properties["kingdom_key"]
+ if kingdom_id ~= nil then
+ taxon_resolution:setKingdomId(kingdom_id)
+ else
+ taxon_resolution:setKingdomId(-1)
+ end
+ local phylum_name = properties["phylum_name"]
+ if phylum_name ~= nil then
+ taxon_resolution:setPhylumName(phylum_name)
+ end
+ local phylum_id = properties["phylum_key"]
+ if phylum_id ~= nil then
+ taxon_resolution:setPhylumId(phylum_id)
+ else
+ taxon_resolution:setPhylumId(-1)
+ end
+ local class_name = properties["class_name"]
+ if class_name ~= nil then
+ taxon_resolution:setClassName(class_name)
+ end
+ local class_id = properties["class_key"]
+ if class_id ~= nil then
+ taxon_resolution:setClassId(class_id)
+ else
+ taxon_resolution:setClassId(-1)
+ end
+ local order_name = properties["order_name"]
+ if order_name ~= nil then
+ taxon_resolution:setOrderName(order_name)
+ end
+ local order_id = properties["order_key"]
+ if order_id ~= nil then
+ taxon_resolution:setOrderId(order_id)
+ else
+ taxon_resolution:setOrderId(-1)
+ end
+ local superfamily_name = properties["superfamily_name"]
+ if superfamily_name ~= nil then
+ taxon_resolution:setSuperfamilyName(superfamily_name)
+ end
+ local superfamily_id = properties["superfamily_key"]
+ if superfamily_id ~= nil then
+ taxon_resolution:setSuperfamilyId(superfamily_id)
+ else
+ taxon_resolution:setSuperfamilyId(-1)
+ end
+ local family_name = properties["family_name"]
+ if family_name ~= nil then
+ taxon_resolution:setFamilyName(family_name)
+ end
+ local family_id = properties["family_key"]
+ if family_id ~= nil then
+ taxon_resolution:setFamilyId(family_id)
+ else
+ taxon_resolution:setFamilyId(-1)
+ end
+ local subfamily_name = properties["subfamily_name"]
+ if subfamily_name ~= nil then
+ taxon_resolution:setSubfamilyName(subfamily_name)
+ end
+ local subfamily_id = properties["subfamily_key"]
+ if subfamily_id ~= nil then
+ taxon_resolution:setSubfamilyId(subfamily_id)
+ else
+ taxon_resolution:setSubfamilyId(-1)
+ end
+ local tribe_name = properties["tribe_name"]
+ if tribe_name ~= nil then
+ taxon_resolution:setTribeName(tribe_name)
+ end
+ local tribe_id = properties["tribe_key"]
+ if tribe_id ~= nil then
+ taxon_resolution:setTribeId(tribe_id)
+ else
+ taxon_resolution:setTribeId(-1)
+ end
+ local subtribe_name = properties["subtribe_name"]
+ if subtribe_name ~= nil then
+ taxon_resolution:setSubtribeName(subtribe_name)
+ end
+ local subtribe_id = properties["subtribe_key"]
+ if subtribe_id ~= nil then
+ taxon_resolution:setSubtribeId(subtribe_id)
+ else
+ taxon_resolution:setSubtribeId(-1)
+ end
+ local genus_name = properties["genus_name"]
+ if genus_name ~= nil then
+ taxon_resolution:setGenusName(genus_name)
+ end
+ local genus_id = properties["genus_key"]
+ if genus_id ~= nil then
+ taxon_resolution:setGenusId(genus_id)
+ else
+ taxon_resolution:setGenusId(-1)
+ end
+ local subgenus_name = properties["subgenus_name"]
+ if subgenus_name ~= nil then
+ taxon_resolution:setSubgenusName(subgenus_name)
+ end
+ local subgenus_id = properties["subgenus_key"]
+ if subgenus_id ~= nil then
+ taxon_resolution:setSubgenusId(subgenus_id)
+ else
+ taxon_resolution:setSubgenusId(-1)
+ end
+ local species_name = properties["species_name"]
+ if species_name ~= nil then
+ taxon_resolution:setSpeciesName(species_name)
+ end
+ local species_id = properties["species_key"]
+ if species_id ~= nil then
+ taxon_resolution:setSpeciesId(species_id)
+ else
+ taxon_resolution:setSpeciesId(-1)
+ end
+ local parent_name = properties["parent_name"]
+ if parent_name ~= nil then
+ taxon_resolution:setParentName(parent_name)
+ end
+ local parent_id = properties["parent_key"]
+ if parent_id ~= nil then
+ taxon_resolution:setParentId(parent_id)
+ else
+ taxon_resolution:setParentId(-1)
+ end
+
+ local scientific_name = properties["scientific_name"]
+ if scientific_name ~= nil then
+ taxon_resolution:setScientificName(scientific_name)
+ end
+ local canonical_name = properties["canonical_name"]
+ if canonical_name ~= nil then
+ taxon_resolution:setCanonicalName(canonical_name)
+ end
+ local vernacular_name = properties["vernacular_name"]
+ if vernacular_name ~= nil then
+ taxon_resolution:setVernacularName(vernacular_name)
+ end
+ local accepted_name_usage = properties["accepted_name_usage"]
+ if accepted_name_usage ~= nil then
+ taxon_resolution:setAcceptedNameUsage(accepted_name_usage)
+ end
+ local authorship = properties["authorship"]
+ if authorship ~= nil then
+ taxon_resolution:setAuthorship(authorship)
+ end
+ local name_type = properties["name_type"]
+ if name_type ~= nil then
+ taxon_resolution:setNameType(name_type)
+ end
+ local rank = properties["rank"]
+ if rank ~= nil then
+ taxon_resolution:setRank(rank)
+ end
+ local origin = properties["origin"]
+ if origin ~= nil then
+ taxon_resolution:setOrigin(origin)
+ end
+ local taxonomic_status = properties["taxonomic_status"]
+ if taxonomic_status ~= nil then
+ taxon_resolution:setTaxonomicStatus(taxonomic_status)
+ end
+ local remarks = properties["remarks"]
+ if remarks ~= nil then
+ taxon_resolution:setRemarks(remarks)
+ end
+ local references = properties["references"]
+ if references ~= nil then
+ taxon_resolution:setReferences(references)
+ end
+ local published_in = properties["published_in"]
+ if published_in ~= nil then
+ taxon_resolution:setPublishedIn(published_in)
+ end
+ local num_descendants = properties["num_descendants"]
+ if num_descendants ~= nil then
+ taxon_resolution:setNumDescendants(num_descendants)
+ else
+ taxon_resolution:setNumDescendants(-1)
+ end
+ local last_crawled = properties["last_crawled"]
+ if last_crawled ~= nil then
+ taxon_resolution:setLastCrawled(last_crawled)
+ end
+ local last_interpreted = properties["last_interpreted"]
+ if last_interpreted ~= nil then
+ taxon_resolution:setLastInterpreted(last_interpreted)
+ end
+ local species_epithet = properties["species_epithet"]
+ if species_epithet ~= nil then
+ taxon_resolution:setSpeciesEpithet(species_epithet)
+ end
+ local infraspecific_epithet = properties["infraspecific_epithet"]
+ if infraspecific_epithet ~= nil then
+ taxon_resolution:setInfraspecificEpithet(infraspecific_epithet)
+ end
+ local cultivar_epithet = properties["cultivar_epithet"]
+ if cultivar_epithet ~= nil then
+ taxon_resolution:setCultivarEpithet(cultivar_epithet)
+ end
+ local url = properties["url"]
+ if url ~= nil then
+ taxon_resolution:setUrl(url)
+ end
+ local wikidata_id = properties["wikidata_id"]
+ if wikidata_id ~= nil then
+ taxon_resolution:setWikidataId(wikidata_id)
+ end
+ local wikidata_url = properties["wikidata_url"]
+ if wikidata_url ~= nil then
+ taxon_resolution:setWikidataUrl(wikidata_url)
+ end
+end
+
+function deserialize(inputCas, inputStream)
+ local input_string = luajava.newInstance("java.lang.String", inputStream:readAllBytes(), StandardCharsets.UTF_8)
+ local results = json.decode(input_string)
+
+ for _, taxon in ipairs(results["taxa"] or {}) do
+ local begin = taxon["begin"]
+ local end_ = taxon["end"]
+ local recognized_taxon = luajava.newInstance("org.texttechnologylab.annotation.type.RecognizedTaxon", inputCas)
+ recognized_taxon:setBegin(begin)
+ recognized_taxon:setEnd(end_)
+ recognized_taxon:setText(taxon["text"])
+ local linkings = taxon["resolved_linkings"]
+ local resolutions = luajava.newInstance("org.apache.uima.jcas.cas.FSArray", inputCas, #linkings)
+ recognized_taxon:setResolutions(resolutions)
+ recognized_taxon:addToIndexes()
+
+ for i, linking in ipairs(linkings) do
+ local taxon_resolution = luajava.newInstance("org.texttechnologylab.annotation.type.TaxonResolution", inputCas)
+ taxon_resolution:setBegin(begin)
+ taxon_resolution:setEnd(end_)
+ taxon_resolution:setRecognizedTaxon(recognized_taxon)
+ populateTaxonResolution(taxon_resolution, linking)
+ taxon_resolution:addToIndexes()
+ resolutions:set(i - 1, taxon_resolution)
+ end
+ end
+
+end
diff --git a/duui-taxon-resolver/src/main/python/gbif_api.py b/duui-taxon-resolver/src/main/python/gbif_api.py
new file mode 100644
index 0000000..8a3352f
--- /dev/null
+++ b/duui-taxon-resolver/src/main/python/gbif_api.py
@@ -0,0 +1,111 @@
+from typing import override
+
+from pydantic import BaseModel, Field
+import requests
+from shared_taxon import SharedTaxon, TaxonBase
+
+base_api_url = "https://api.gbif.org/v1"
+
+
+class GbifTaxon(BaseModel, TaxonBase):
+ key: int
+ taxon_id: str = Field(alias="taxonID")
+ kingdom: str | None = Field(default=None)
+ order: str | None = Field(default=None)
+ family: str | None = Field(default=None)
+ genus: str | None = Field(default=None)
+ species: str | None = Field(default=None)
+ kingdom_key: int | None = Field(alias="kingdomKey", default=None)
+ order_key: int | None = Field(alias="orderKey", default=None)
+ family_key: int | None = Field(alias="familyKey", default=None)
+ genus_key: int | None = Field(alias="genusKey", default=None)
+ species_key: int | None = Field(alias="speciesKey", default=None)
+ parent_key: int | None = Field(alias="parentKey", default=None)
+ parent: str | None = Field(default=None)
+ scientific_name: str | None = Field(alias="scientificName", default=None)
+ canonical_name: str | None = Field(alias="canonicalName", default=None)
+ vernacular_name: str | None = Field(alias="vernacularName", default=None)
+ authorship: str | None = Field(default=None)
+ name_type: str | None = Field(alias="nameType", default=None)
+ rank: str
+ origin: str | None = Field(default=None)
+ taxonomic_status: str | None = Field(alias="taxonomicStatus", default=None)
+ remarks: str | None = Field(default=None)
+ published_in: str | None = Field(alias="publishedIn", default=None)
+ num_descendants: int | None = Field(alias="numDescendants", default=None)
+ last_crawled: str | None = Field(alias="lastCrawled", default=None)
+ last_interpreted: str | None = Field(alias="lastInterpreted", default=None)
+ issues: list[str] = Field(default_factory=list)
+ class_: str | None = Field(alias="class", default=None)
+
+ @property
+ def raw_taxon_id(self) -> int:
+ return int(self.taxon_id.split(":")[-1])
+
+ @override
+ def as_shared(self) -> SharedTaxon:
+ return SharedTaxon(
+ provider="gbif",
+ taxon_id=self.raw_taxon_id,
+ kingdom_name=self.kingdom,
+ kingdom_key=self.kingdom_key,
+ order_name=self.order,
+ order_key=self.order_key,
+ family_name=self.family,
+ family_key=self.family_key,
+ genus_name=self.genus,
+ genus_key=self.genus_key,
+ species_name=self.species,
+ species_key=self.species_key,
+ parent_name=self.parent,
+ parent_key=self.parent_key,
+ scientific_name=self.scientific_name,
+ canonical_name=self.canonical_name,
+ vernacular_name=self.vernacular_name,
+ authorship=self.authorship,
+ name_type=self.name_type,
+ rank=self.rank,
+ origin=self.origin,
+ taxonomic_status=self.taxonomic_status,
+ remarks=self.remarks,
+ published_in=self.published_in,
+ num_descendants=self.num_descendants,
+ last_crawled=self.last_crawled,
+ last_interpreted=self.last_interpreted,
+ url=f"https://www.gbif.org/species/{self.key}",
+ )
+
+
+def get_taxon(taxon_id: int) -> GbifTaxon:
+ response = requests.get(f"{base_api_url}/species/{taxon_id}")
+ response.raise_for_status()
+ response_data = response.content
+ return GbifTaxon.model_validate_json(response_data)
+
+
+def main():
+ while True:
+ print("Enter taxon ID (or 'exit' to quit): ", end="")
+ user_input = input().strip()
+ if user_input.lower() == "exit":
+ break
+ try:
+ taxon_id = int(user_input)
+ except ValueError:
+ print(
+ f"Invalid input '{user_input}'. Please enter a valid integer taxon ID."
+ )
+ continue
+ try:
+ taxon = get_taxon(taxon_id)
+ print(taxon)
+ except ValueError as e:
+ print(f"Error parsing taxon data: {e}")
+ except requests.HTTPError as e:
+ print(f"HTTP error occurred: {e}")
+ except Exception as e:
+ print(f"An error occurred: {e}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/duui-taxon-resolver/src/main/python/ncbi_api.py b/duui-taxon-resolver/src/main/python/ncbi_api.py
new file mode 100644
index 0000000..c3c32a5
--- /dev/null
+++ b/duui-taxon-resolver/src/main/python/ncbi_api.py
@@ -0,0 +1,108 @@
+import taxoniq
+from typing import List, Self, override
+from shared_taxon import SharedTaxon, TaxonBase
+
+class NcbiTaxon(TaxonBase):
+ handle: taxoniq.Taxon
+
+ def __init__(self, handle: taxoniq.Taxon):
+ self.handle = handle
+
+ @classmethod
+ def from_tax_id(cls, tax_id: int) -> Self:
+ handle = taxoniq.Taxon(tax_id)
+ return cls(handle)
+
+ @property
+ def taxon_id(self) -> int | None:
+ return self.handle.tax_id
+
+ @property
+ def rank(self) -> str | None:
+ try:
+ enum_rank = self.handle.rank
+ return enum_rank.name if enum_rank is not None else None
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def scientific_name(self) -> str | None:
+ try:
+ return self.handle.scientific_name
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def common_name(self) -> str | None:
+ try:
+ return self.handle.common_name
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def lineage(self) -> List[Self] | None:
+ try:
+ return [NcbiTaxon(taxon) for taxon in self.handle.lineage]
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def ranked_lineage(self) -> List[Self] | None:
+ try:
+ return [NcbiTaxon(taxon) for taxon in self.handle.ranked_lineage]
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def parent(self) -> Self | None:
+ try:
+ return NcbiTaxon(self.handle.parent)
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def description(self) -> str | None:
+ try:
+ return self.handle.description
+ except taxoniq.NoValue:
+ return None
+
+ @property
+ def url(self) -> str:
+ return self.handle.url
+
+ @property
+ def wikidata_id(self) -> str | None:
+ try:
+ return self.handle.wikidata_id
+ except KeyError:
+ return None
+
+ @property
+ def wikidata_url(self) -> str | None:
+ try:
+ return self.handle.wikidata_url
+ except KeyError:
+ return None
+
+ @override
+ def as_shared(self) -> SharedTaxon:
+ tid = self.taxon_id
+ if tid is None:
+ raise ValueError("Taxon ID is required to convert to SharedTaxon")
+ parent = self.parent
+ parent_name = parent.scientific_name if parent is not None else None
+ parent_key = parent.taxon_id if parent is not None else None
+ return SharedTaxon(
+ provider="ncbi",
+ taxon_id=tid,
+ scientific_name=self.scientific_name,
+ vernacular_name=self.common_name,
+ parent_name=parent_name,
+ parent_key=parent_key,
+ rank=self.rank,
+ remarks=self.description,
+ url=self.url,
+ wikidata_id=self.wikidata_id,
+ wikidata_url=self.wikidata_url,
+ )
diff --git a/duui-taxon-resolver/src/main/python/shared_taxon.py b/duui-taxon-resolver/src/main/python/shared_taxon.py
new file mode 100644
index 0000000..250f7c2
--- /dev/null
+++ b/duui-taxon-resolver/src/main/python/shared_taxon.py
@@ -0,0 +1,60 @@
+from pydantic import BaseModel, Field
+from typing import Literal
+
+type TaxonProvider = Literal["gbif", "taxref", "ncbi"]
+
+class SharedTaxon(BaseModel):
+ provider: TaxonProvider
+ taxon_id: int
+ kingdom_name: str | None = Field(default=None)
+ kingdom_key: int | None = Field(default=None)
+ phylum_name: str | None = Field(default=None)
+ phylum_key: int | None = Field(default=None)
+ class_name: str | None = Field(default=None)
+ class_key: int | None = Field(default=None)
+ order_name: str | None = Field(default=None)
+ order_key: int | None = Field(default=None)
+ superfamily_name: str | None = Field(default=None)
+ superfamily_key: int | None = Field(default=None)
+ family_name: str | None = Field(default=None)
+ family_key: int | None = Field(default=None)
+ subfamily_name: str | None = Field(default=None)
+ subfamily_key: int | None = Field(default=None)
+ tribe_name: str | None = Field(default=None)
+ tribe_key: int | None = Field(default=None)
+ subtribe_name: str | None = Field(default=None)
+ subtribe_key: int | None = Field(default=None)
+ genus_name: str | None = Field(default=None)
+ genus_key: int | None = Field(default=None)
+ subgenus_name: str | None = Field(default=None)
+ subgenus_key: int | None = Field(default=None)
+ species_name: str | None = Field(default=None)
+ species_key: int | None = Field(default=None)
+ parent_name: str | None = Field(default=None)
+ parent_key: int | None = Field(default=None)
+ scientific_name: str | None = Field(default=None)
+ canonical_name: str | None = Field(default=None)
+ vernacular_name: str | None = Field(default=None)
+ accepted_name_usage: str | None = Field(default=None)
+ authorship: str | None = Field(default=None)
+ name_type: str | None = Field(default=None)
+ rank: str | None = Field(default=None)
+ origin: str | None = Field(default=None)
+ taxonomic_status: str | None = Field(default=None)
+ remarks: str | None = Field(default=None)
+ references: str | None = Field(default=None)
+ published_in: str | None = Field(default=None)
+ num_descendants: int | None = Field(default=None)
+ last_crawled: str | None = Field(default=None)
+ last_interpreted: str | None = Field(default=None)
+ species_epithet: str | None = Field(default=None)
+ infraspecific_epithet: str | None = Field(default=None)
+ cultivar_epithet: str | None = Field(default=None)
+ url: str | None = Field(default=None)
+ wikidata_id: str | None = Field(default=None)
+ wikidata_url: str | None = Field(default=None)
+
+class TaxonBase:
+ def as_shared(self) -> SharedTaxon:
+ raise NotImplementedError("Subclasses must implement as_shared method")
+
diff --git a/duui-taxon-resolver/src/main/python/taxon-resolver.py b/duui-taxon-resolver/src/main/python/taxon-resolver.py
new file mode 100644
index 0000000..9877c7a
--- /dev/null
+++ b/duui-taxon-resolver/src/main/python/taxon-resolver.py
@@ -0,0 +1,179 @@
+import logging
+from typing import List, Literal, Self
+
+from fastapi import FastAPI, Request, Response
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel, Field
+from pydantic_settings import BaseSettings
+from urllib3 import request
+
+import ncbi_api, gbif_api, taxref_loader
+from shared_taxon import SharedTaxon, TaxonBase, TaxonProvider
+
+class Settings(BaseSettings):
+ annotator_name: str = Field("duui-taxon-resolver", env="ANNOTATOR_NAME")
+ annotator_version: str = Field("1.0", env="ANNOTATOR_VERSION")
+ log_level: str = Field("INFO", env="LOG_LEVEL")
+ execution_mode: Literal["development", "production"] = Field("development", env="EXECUTION_MODE")
+
+ class Config:
+ env_prefix = "TAXON_RESOLVER_"
+
+settings = Settings()
+
+lua_communication_script_path: str
+typesystem_path: str
+if settings.execution_mode == "development":
+ lua_communication_script_path = "../lua/communication_layer.lua"
+ typesystem_path = "../resources/typesystem.xml"
+elif settings.execution_mode == "production":
+ lua_communication_script_path = "/app/communication_layer.lua"
+ typesystem_path = "/app/typesystem.xml"
+else:
+ raise ValueError(f"Unknown execution mode '{settings.execution_mode}'")
+
+# Init logger
+logging.basicConfig(
+ format="%(asctime)s %(levelname)-8s %(message)s",
+ level=settings.log_level,
+ datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
+logger.info("TTLab Taxon Resolver started in %s mode", settings.execution_mode)
+logger.info("Name: %s", settings.annotator_name)
+logger.info("Version: %s", settings.annotator_version)
+
+logger.info("Loading backbone data for Taxref...")
+taxref_loader.initialize_backbone()
+logger.info("Taxref backbone data loaded successfully")
+
+def load_communication_script() -> str:
+ with open(lua_communication_script_path, "r") as f:
+ communication_script = f.read()
+ logger.info("Loaded Lua communication script from %s", lua_communication_script_path)
+ return communication_script
+
+def load_typesystem() -> str:
+ with open(typesystem_path, "r") as f:
+ typesystem = f.read()
+ logger.info("Loaded type system from %s", typesystem_path)
+ return typesystem
+
+lua_communication_script: str | None = load_communication_script() if settings.execution_mode == "production" else None
+typesystem: str | None = load_typesystem() if settings.execution_mode == "production" else None
+
+# FastAPI app
+app = FastAPI(
+ title=settings.annotator_name,
+ description="Annotator for resolving and normalizing taxons in documents",
+ version=settings.annotator_version,
+ terms_of_service="https://www.texttechnologylab.org/legal_notice/",
+ license_info={
+ "name": "AGPL",
+ "url": "http://www.gnu.org/licenses/agpl-3.0.en.html",
+ },
+)
+
+# Return Lua communication script
+@app.get("/v1/communication_layer", response_class=PlainTextResponse)
+def get_communication_layer() -> str:
+ result = lua_communication_script
+ if result is None:
+ result = load_communication_script()
+ return result
+
+
+# Return typesystem
+@app.get("/v1/typesystem")
+def get_typesystem() -> Response:
+ result = typesystem
+ if result is None:
+ result = load_typesystem()
+ return Response(content=result, media_type="application/xml")
+
+class RecognizedTaxonLinking(BaseModel):
+ provider: TaxonProvider
+ taxon_id: int
+
+ @classmethod
+ def from_string(cls, linking_str: str) -> Self:
+ try:
+ provider_str, taxon_id_str = linking_str.split(":")
+ provider: TaxonProvider = provider_str.lower()
+ taxon_id = int(taxon_id_str)
+ return cls(provider=provider, taxon_id=taxon_id)
+ except Exception as e:
+ logger.error("Error parsing linking string '%s': %s", linking_str, e)
+ raise ValueError(f"Invalid linking string '{linking_str}': {e}")
+
+class RecognizedTaxon(BaseModel):
+ begin: int
+ end: int
+ text: str
+ linkings: List[RecognizedTaxonLinking]
+
+class RequestTaxon(BaseModel):
+ begin: int
+ end: int
+ linkings: List[str]
+
+ def to_recognized_taxon(self, document_text: str) -> RecognizedTaxon:
+ text = document_text[self.begin:self.end]
+ return RecognizedTaxon(
+ begin=self.begin,
+ end=self.end,
+ text=text,
+ linkings=[RecognizedTaxonLinking.from_string(linking_str) for linking_str in self.linkings]
+ )
+
+class DuuiRequest(BaseModel):
+ taxa: List[RequestTaxon]
+ document_text: str
+
+ @property
+ def recognized_taxa(self) -> List[RecognizedTaxon]:
+ return [taxon.to_recognized_taxon(self.document_text) for taxon in self.taxa]
+
+class ExportedTaxon(BaseModel):
+ begin: int
+ end: int
+ text: str
+ resolved_linkings: List[SharedTaxon]
+
+class DuuiResponse(BaseModel):
+ taxa: List[ExportedTaxon]
+
+def resolve_taxon_linking(linking: RecognizedTaxonLinking) -> TaxonBase:
+ match linking.provider:
+ case "ncbi":
+ return ncbi_api.NcbiTaxon.from_tax_id(linking.taxon_id)
+ case "gbif":
+ return gbif_api.get_taxon(linking.taxon_id)
+ case "taxref":
+ return taxref_loader.taxon_from_id(linking.taxon_id)
+ case _:
+ raise ValueError(f"Unknown taxon provider '{linking.provider}'")
+
+def resolve_taxon_linkings(linkings: List[RecognizedTaxonLinking]) -> List[TaxonBase]:
+ return [resolve_taxon_linking(linking) for linking in linkings]
+
+def resolve_recognized_taxon(recognized_taxon: RecognizedTaxon) -> ExportedTaxon:
+ resolved_linkings = resolve_taxon_linkings(recognized_taxon.linkings)
+ return ExportedTaxon(
+ begin=recognized_taxon.begin,
+ end=recognized_taxon.end,
+ text=recognized_taxon.text,
+ resolved_linkings=[linking.as_shared() for linking in resolved_linkings]
+ )
+
+def resolve_recognized_taxa(recognized_taxa: List[RecognizedTaxon]) -> List[ExportedTaxon]:
+ return [resolve_recognized_taxon(recognized_taxon) for recognized_taxon in recognized_taxa]
+
+@app.post("/v1/process")
+async def post_process(request: DuuiRequest) -> DuuiResponse:
+ recognized_taxa = request.recognized_taxa
+ resolved_taxa = resolve_recognized_taxa(recognized_taxa)
+ logger.debug("Resolved %d taxons", len(resolved_taxa))
+ print("AAAAAAAAAAAAAAAAAAAAA")
+ print(recognized_taxa)
+ return DuuiResponse(taxa=resolved_taxa)
\ No newline at end of file
diff --git a/duui-taxon-resolver/src/main/python/taxref_loader.py b/duui-taxon-resolver/src/main/python/taxref_loader.py
new file mode 100644
index 0000000..3a32140
--- /dev/null
+++ b/duui-taxon-resolver/src/main/python/taxref_loader.py
@@ -0,0 +1,209 @@
+import os
+import tempfile
+from typing import override
+from pydantic import BaseModel, Field
+from shared_taxon import SharedTaxon, TaxonBase
+import requests
+import zipfile
+
+import pandas as pd
+
+### SETUP ###
+
+
+def download_backbone(
+ output_path: str, url: str = "https://ipt.gbif.fr/archive.do?r=taxref"
+) -> None:
+ with tempfile.NamedTemporaryFile(suffix=".zip") as tmp:
+ # download the zip file
+ response = requests.get(url)
+ response.raise_for_status()
+ # write the content to the temporary file
+ tmp.write(response.content)
+ tmp.flush()
+ # extract the zip file
+ with zipfile.ZipFile(tmp.name, "r") as zip_ref:
+ zip_ref.extractall(output_path)
+
+
+def load_backbone(dir_path: str) -> tuple[pd.DataFrame, pd.DataFrame]:
+ vernacular_names_path = f"{dir_path}/vernacularname.txt"
+ taxonomy_path = f"{dir_path}/taxon.txt"
+ vernacular_names = pd.read_csv(vernacular_names_path, sep="\t", low_memory=False)
+ taxonomy = pd.read_csv(taxonomy_path, sep="\t", low_memory=False)
+ return vernacular_names, taxonomy
+
+
+def load_backbone_from_url(
+ url: str = "https://ipt.gbif.fr/archive.do?r=taxref",
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ download_backbone(tmpdir, url)
+ return load_backbone(tmpdir)
+
+
+def load_taxref() -> tuple[pd.DataFrame, pd.DataFrame]:
+ local_path = "backbone"
+ if not os.path.exists(local_path):
+ return load_backbone_from_url()
+ else:
+ return load_backbone(local_path)
+
+
+### Backbone data ###
+
+vernacular_names: pd.DataFrame
+taxonomy: pd.DataFrame
+
+def preload_backbone():
+ download_backbone("backbone")
+
+def initialize_backbone():
+ global vernacular_names, taxonomy
+ vernacular_names, taxonomy = load_taxref()
+
+### Utility methods ###
+
+
+def taxon_index(taxon_id: int) -> int:
+ return taxonomy.index[taxonomy["taxonID"] == taxon_id][0]
+
+
+def vernacular_name_index(vernacular_name_id: int) -> int:
+ return vernacular_names.index[vernacular_names["id"] == vernacular_name_id][0]
+
+
+### Wrapper classes ###
+
+
+class TaxrefTaxon(BaseModel, TaxonBase):
+ id_: int = Field(alias="id")
+ taxon_id: int = Field(alias="taxonID")
+ scientific_name_id: int | None = Field(alias="scientificNameID", default=None)
+ accepted_name_usage_id: int | None = Field(
+ alias="acceptedNameUsageID", default=None
+ )
+ parent_name_usage_id: int | None = Field(alias="parentNameUsageID", default=None)
+ original_name_usage_id: int | None = Field(
+ alias="originalNameUsageID", default=None
+ )
+ scientific_name: str | None = Field(alias="scientificName", default=None)
+ accepted_name_usage: str | None = Field(alias="acceptedNameUsage", default=None)
+ kingdom: str | None = Field(default=None)
+ phylum: str | None = Field(default=None)
+ class_: str | None = Field(alias="class", default=None)
+ order: str | None = Field(default=None)
+ superfamily: str | None = Field(default=None)
+ family: str | None = Field(default=None)
+ subfamily: str | None = Field(default=None)
+ tribe: str | None = Field(default=None)
+ subtribe: str | None = Field(default=None)
+ genus: str | None = Field(default=None)
+ subgenus: str | None = Field(default=None)
+ specific_epithet: str | None = Field(alias="specificEpithet", default=None)
+ infraspecific_epithet: str | None = Field(
+ alias="infraspecificEpithet", default=None
+ )
+ cultivar_epithet: str | None = Field(alias="cultivarEpithet", default=None)
+ taxon_rank: str | None = Field(alias="taxonRank", default=None)
+ scientific_name_authorship: str | None = Field(
+ alias="scientificNameAuthorship", default=None
+ )
+ vernacular_name: str | None = Field(alias="vernacularName", default=None)
+ taxon_remarks: str | None = Field(alias="taxonRemarks", default=None)
+ references: str | None = Field(default=None)
+
+ @override
+ def as_shared(self) -> SharedTaxon:
+ # TODO: resolve parent name and key using parent_name_usage_id
+ return SharedTaxon(
+ provider="taxref",
+ taxon_id=str(self.taxon_id),
+ kingdom_name=self.kingdom,
+ phylum_name=self.phylum,
+ class_name=self.class_,
+ order_name=self.order,
+ superfamily_name=self.superfamily,
+ family_name=self.family,
+ subfamily_name=self.subfamily,
+ tribe_name=self.tribe,
+ subtribe_name=self.subtribe,
+ genus_name=self.genus,
+ subgenus_name=self.subgenus,
+ species_name=self.specific_epithet,
+ scientific_name=self.scientific_name,
+ vernacular_name=self.vernacular_name,
+ accepted_name_usage=self.accepted_name_usage,
+ authorship=self.scientific_name_authorship,
+ rank=self.taxon_rank,
+ remarks=self.taxon_remarks,
+ references=self.references,
+ )
+
+
+class VernacularName(BaseModel):
+ id_: int = Field(alias="id")
+ vernacular_name: str = Field(alias="vernacularName")
+ source: str | None = Field(default=None)
+ language: str | None = Field(default=None)
+ location_id: str | None = Field(alias="locationID", default=None)
+ country_code: str | None = Field(alias="countryCode", default=None)
+
+
+def taxon_from_id(taxon_id: int) -> TaxrefTaxon:
+ taxon_index_ = taxon_index(taxon_id)
+ taxon_data = taxonomy.loc[taxon_index_]
+ # convert NaN to None for optional fields
+ taxon_data = taxon_data.where(pd.notnull(taxon_data), None)
+ return TaxrefTaxon(**taxon_data)
+
+
+def vernacular_name_from_id(vernacular_name_id: int) -> VernacularName:
+ vernacular_name_index_ = vernacular_name_index(vernacular_name_id)
+ vernacular_name_data = vernacular_names.loc[vernacular_name_index_]
+ # convert NaN to None for optional fields
+ vernacular_name_data = vernacular_name_data.where(
+ pd.notnull(vernacular_name_data), None
+ )
+ return VernacularName(**vernacular_name_data)
+
+
+def main():
+ while True:
+ print(
+ "Enter taxon ID 't {id}' or vernacular name ID 'v {id}' (or 'exit' to quit): ",
+ end="",
+ )
+ user_input = input().strip()
+ if user_input.lower() == "exit":
+ break
+ if user_input.startswith("t "):
+ taxon_id_str = user_input[2:].strip()
+ try:
+ taxon_id = int(taxon_id_str)
+ taxon = taxon_from_id(taxon_id)
+ print(taxon)
+ except ValueError:
+ print(
+ f"Invalid taxon ID '{taxon_id_str}'. Please enter a valid integer taxon ID."
+ )
+ except IndexError:
+ print("Taxon ID not found. Please enter a valid taxon ID.")
+ elif user_input.startswith("v "):
+ vernacular_name_id_str = user_input[2:].strip()
+ try:
+ vernacular_name_id = int(vernacular_name_id_str)
+ vernacular_name = vernacular_name_from_id(vernacular_name_id)
+ print(vernacular_name)
+ except ValueError:
+ print(
+ f"Invalid vernacular name ID '{vernacular_name_id_str}'. Please enter a valid integer vernacular name ID."
+ )
+ except IndexError:
+ print(
+ "Vernacular name ID not found. Please enter a valid vernacular name ID."
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/duui-taxon-resolver/src/main/resources/typesystem.xml b/duui-taxon-resolver/src/main/resources/typesystem.xml
new file mode 100644
index 0000000..198e9c0
--- /dev/null
+++ b/duui-taxon-resolver/src/main/resources/typesystem.xml
@@ -0,0 +1,282 @@
+
+
+
+
+ org.texttechnologylab.annotation.type.RecognizedTaxon
+ Annotation type for recognized taxon mentions.
+ uima.tcas.Annotation
+
+
+ resolutions
+ List of taxon resolutions associated with this recognized taxon.
+ uima.cas.FSArray
+
+ true
+
+
+ text
+ The text of the recognized taxon.
+ uima.cas.String
+
+
+
+
+ org.texttechnologylab.annotation.type.TaxonResolution
+ Annotation type for matching taxon information to a taxonomic database.
+ uima.tcas.Annotation
+
+
+ recognizedTaxon
+ Reference to the taxon in the document that was recognized.
+ org.texttechnologylab.annotation.type.RecognizedTaxon
+
+
+ provider
+ The provider of the taxon resolution information.
+ uima.cas.String
+
+
+ taxonId
+ The identifier of the taxon in the taxonomic database.
+ uima.cas.Integer
+
+
+ kingdomName
+ The name of the kingdom to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ kingdomId
+ The identifier of the kingdom to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ phylumName
+ The name of the phylum to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ phylumId
+ The identifier of the phylum to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ className
+ The name of the class to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ classId
+ The identifier of the class to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ orderName
+ The name of the order to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ orderId
+ The identifier of the order to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ superfamilyName
+ The name of the superfamily to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ superfamilyId
+ The identifier of the superfamily to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ familyName
+ The name of the family to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ familyId
+ The identifier of the family to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ subfamilyName
+ The name of the subfamily to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ subfamilyId
+ The identifier of the subfamily to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ tribeName
+ The name of the tribe to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ tribeId
+ The identifier of the tribe to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ subtribeName
+ The name of the subtribe to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ subtribeId
+ The identifier of the subtribe to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ genusName
+ The name of the genus to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ genusId
+ The identifier of the genus to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ subgenusName
+ The name of the subgenus to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ subgenusId
+ The identifier of the subgenus to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ speciesName
+ The name of the species to which the taxon belongs, if available.
+ uima.cas.String
+
+
+ speciesId
+ The identifier of the species to which the taxon belongs, if available.
+ uima.cas.Integer
+
+
+ parentName
+ The name of the parent taxon, if available.
+ uima.cas.String
+
+
+ parentId
+ The identifier of the parent taxon, if available.
+ uima.cas.Integer
+
+
+
+ scientificName
+ The scientific name of the taxon, if available.
+ uima.cas.String
+
+
+ canonicalName
+ The canonical name of the taxon, if available.
+ uima.cas.String
+
+
+ vernacularName
+ The vernacular name of the taxon, if available.
+ uima.cas.String
+
+
+ acceptedNameUsage
+ The accepted name usage of the taxon, if available.
+ uima.cas.String
+
+
+ authorship
+ The authorship information for the taxon, if available.
+ uima.cas.String
+
+
+ nameType
+ The type of the taxon name, if available.
+ uima.cas.String
+
+
+ rank
+ The taxonomic rank of the taxon, if available.
+ uima.cas.String
+
+
+ origin
+ The origin of the taxon, if available.
+ uima.cas.String
+
+
+ taxonomicStatus
+ The taxonomic status of the taxon, if available.
+ uima.cas.String
+
+
+ remarks
+ Any additional remarks about the taxon, if available.
+ uima.cas.String
+
+
+ references
+ References for the taxon, if available.
+ uima.cas.String
+
+
+ publishedIn
+ The publication in which the taxon was published, if available.
+ uima.cas.String
+
+
+ numDescendants
+ The number of descendant taxa, if available.
+ uima.cas.Integer
+
+
+ lastCrawled
+ The date and time when the taxon was last crawled, if available.
+ uima.cas.String
+
+
+ lastInterpreted
+ The date and time when the taxon was last interpreted, if available.
+ uima.cas.String
+
+
+ speciesEpithet
+ The species epithet of the taxon, if available.
+ uima.cas.String
+
+
+ infraspecificEpithet
+ The infraspecific epithet of the taxon, if available.
+ uima.cas.String
+
+
+ cultivarEpithet
+ The cultivar epithet of the taxon, if available.
+ uima.cas.String
+
+
+ url
+ A web URL associated with the taxon, provided by the taxonomic database, if available.
+ uima.cas.String
+
+
+ wikidataId
+ The Wikidata ID of the taxon, if available.
+ uima.cas.String
+
+
+ wikidataUrl
+ The URL of the Wikidata page for the taxon, if available.
+ uima.cas.String
+
+
+
+
+
diff --git a/duui-taxon-resolver/src/test/java/org/texttechnologylab/TaxonResolverTest.java b/duui-taxon-resolver/src/test/java/org/texttechnologylab/TaxonResolverTest.java
new file mode 100644
index 0000000..e612320
--- /dev/null
+++ b/duui-taxon-resolver/src/test/java/org/texttechnologylab/TaxonResolverTest.java
@@ -0,0 +1,276 @@
+package org.texttechnologylab;
+
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIDockerDriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIPodmanDriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIRemoteDriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaContext;
+import org.texttechnologylab.annotation.AnnotationComment;
+import org.texttechnologylab.annotation.type.RecognizedTaxon;
+import org.texttechnologylab.annotation.type.Taxon;
+import org.texttechnologylab.annotation.type.TaxonResolution;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collection;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class TaxonResolverTest {
+
+ enum Strategy {
+ REMOTE,
+ DOCKER,
+ PODMAN
+ }
+
+ static final String TAXON_RESOLVER_IMAGE = "localhost/duui-taxon-resolver:1.0.0";
+ static final String TAXON_RESOLVER_REMOTE_HOST = "http://localhost:12345";
+ static final Strategy STRATEGY = Strategy.REMOTE;
+ static final boolean STORE_OUTPUT = false;
+
+ DUUIComposer composer;
+
+ @BeforeEach
+ public void setup() throws Exception {
+ composer = new DUUIComposer()
+ .withSkipVerification(true)
+ .withLuaContext(new DUUILuaContext().withJsonLibrary());
+ switch (STRATEGY) {
+ case REMOTE -> composer.addDriver(new DUUIRemoteDriver());
+ case DOCKER -> composer.addDriver(new DUUIDockerDriver());
+ case PODMAN -> composer.addDriver(new DUUIPodmanDriver());
+ }
+ }
+
+ private void addTaxonResolver(String annotationsView) throws Exception {
+ switch (STRATEGY) {
+ case REMOTE -> {
+ var component = new DUUIRemoteDriver.Component(TAXON_RESOLVER_REMOTE_HOST);
+ if (annotationsView != null) {
+ component.withParameter("annotations_view", annotationsView);
+ }
+ composer.add(component.build());
+ }
+ case DOCKER -> {
+ var component = new DUUIDockerDriver.Component(TAXON_RESOLVER_IMAGE);
+ if (annotationsView != null) {
+ component.withParameter("annotations_view", annotationsView);
+ }
+ composer.add(component.build());
+ }
+ case PODMAN -> {
+ var component = new DUUIPodmanDriver.Component(TAXON_RESOLVER_IMAGE);
+ if (annotationsView != null) {
+ component.withParameter("annotations_view", annotationsView);
+ }
+ composer.add(component.build());
+ }
+ }
+ }
+
+ @Test
+ public void testEmpty() throws Exception {
+ addTaxonResolver(null);
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentLanguage("en");
+ jcas.setDocumentText("This is a test.");
+
+ composer.run(jcas);
+
+ storeCas(jcas, "empty");
+
+ Collection taxa = JCasUtil.select(jcas, RecognizedTaxon.class);
+ assertTrue(taxa.isEmpty(), "Expected no taxa to be recognized");
+ }
+
+ @Test
+ public void testSingleAnnotationsSameView() throws Exception {
+ addTaxonResolver(null);
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentLanguage("en");
+ jcas.setDocumentText("Ammophila arenaria");
+
+ // manually insert a taxon annotation for "Ammophila arenaria"
+ Taxon taxon = new Taxon(jcas, 0, 18);
+ taxon.addToIndexes();
+ AnnotationComment comment = new AnnotationComment(jcas);
+ comment.setReference(taxon);
+ comment.setKey("linking");
+ comment.setValue("GBIF:1347914");
+ comment.addToIndexes();
+
+ composer.run(jcas);
+
+ storeCas(jcas, "single_same_view");
+
+ Collection taxa = JCasUtil.select(jcas, RecognizedTaxon.class);
+ assertEquals(1, taxa.size(), "Expected exactly one taxon to be recognized");
+ RecognizedTaxon recognized = taxa.iterator().next();
+ assertEquals("Ammophila arenaria", recognized.getText(), "Expected taxon text to match");
+ assertEquals(1, recognized.getResolutions().size(), "Expected exactly one resolution");
+ assertInstanceOf(TaxonResolution.class, recognized.getResolutions(0), "Expected resolution to be of type TaxonResolution");
+ TaxonResolution resolution = (TaxonResolution) recognized.getResolutions(0);
+ assertEquals("gbif", resolution.getProvider(), "Expected provider to be gbif");
+ assertEquals("SCIENTIFIC", resolution.getNameType(), "Expected name type to be SCIENTIFIC");
+ assertEquals("SPECIES", resolution.getRank(), "Expected rank to be SPECIES");
+ assertEquals("Animalia", resolution.getKingdomName(), "Expected kingdom name to be 'Animalia'");
+ assertEquals(1, resolution.getKingdomId(), "Expected kingdom ID to be 1");
+ assertEquals("Hymenoptera", resolution.getOrderName(), "Expected order name to be 'Hymenoptera'");
+ assertEquals(1457, resolution.getOrderId(), "Expected order ID to be 1457");
+ assertEquals("Sphecidae", resolution.getFamilyName(), "Expected family name to be 'Sphecidae'");
+ assertEquals(4352, resolution.getFamilyId(), "Expected family ID to be 4352");
+ assertEquals("Podalonia", resolution.getGenusName(), "Expected genus name to be 'Podalonia'");
+ assertEquals(1347780, resolution.getGenusId(), "Expected genus ID to be 1347780");
+ assertEquals("Podalonia hirsuta", resolution.getSpeciesName(), "Expected species name to be 'Podalonia hirsuta'");
+ assertEquals(1347914, resolution.getSpeciesId(), "Expected species ID to be 1347914");
+ assertEquals("Podalonia hirsuta (Scopoli, 1763)", resolution.getScientificName(), "Expected scientific name to be 'Podalonia hirsuta (Scopoli, 1763)'");
+ }
+
+ @Test
+ public void testSingleAnnotationsDifferentView() throws Exception {
+ String annotationsView = "taxons";
+ addTaxonResolver(annotationsView);
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentLanguage("en");
+ jcas.setDocumentText("Ammophila arenaria");
+
+ // manually insert a taxon annotation for "Ammophila arenaria" in the specified view
+ JCas annotationsJCas = jcas.createView(annotationsView);
+ Taxon taxon = new Taxon(annotationsJCas, 0, 18);
+ taxon.addToIndexes();
+ AnnotationComment comment = new AnnotationComment(annotationsJCas);
+ comment.setReference(taxon);
+ comment.setKey("linking");
+ comment.setValue("GBIF:1347914");
+ comment.addToIndexes();
+
+ composer.run(jcas);
+
+ storeCas(jcas, "single_different_view");
+
+ Collection taxa = JCasUtil.select(jcas, RecognizedTaxon.class);
+ assertEquals(1, taxa.size(), "Expected exactly one taxon to be recognized");
+ RecognizedTaxon recognized = taxa.iterator().next();
+ assertEquals("Ammophila arenaria", recognized.getText(), "Expected taxon text to match");
+ assertEquals(1, recognized.getResolutions().size(), "Expected exactly one resolution");
+ assertInstanceOf(TaxonResolution.class, recognized.getResolutions(0), "Expected resolution to be of type TaxonResolution");
+ TaxonResolution resolution = (TaxonResolution) recognized.getResolutions(0);
+ assertEquals("gbif", resolution.getProvider(), "Expected provider to be gbif");
+ assertEquals(1347914, resolution.getSpeciesId(), "Expected species ID to be 1347914");
+ }
+
+ @Test
+ public void testMultipleAnnotationsDifferentProviders() throws Exception {
+ addTaxonResolver(null);
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentLanguage("en");
+ jcas.setDocumentText("Ammophila arenaria");
+
+ // manually insert taxon annotations for "Ammophila arenaria" and "Ulex europaeus"
+ Taxon taxon = new Taxon(jcas, 0, 18);
+ taxon.addToIndexes();
+
+ AnnotationComment comment1 = new AnnotationComment(jcas);
+ comment1.setReference(taxon);
+ comment1.setKey("linking");
+ comment1.setValue("GBIF:1347914");
+ comment1.addToIndexes();
+
+ AnnotationComment comment2 = new AnnotationComment(jcas);
+ comment2.setReference(taxon);
+ comment2.setKey("linking");
+ comment2.setValue("NCBI:96047");
+ comment2.addToIndexes();
+
+ AnnotationComment comment3 = new AnnotationComment(jcas);
+ comment3.setReference(taxon);
+ comment3.setKey("linking");
+ comment3.setValue("TAXREF:82139");
+ comment3.addToIndexes();
+
+ composer.run(jcas);
+
+ storeCas(jcas, "multiple_different_providers");
+
+ Collection taxa = JCasUtil.select(jcas, RecognizedTaxon.class);
+ assertEquals(1, taxa.size(), "Expected exactly one taxon to be recognized");
+ RecognizedTaxon recognized = taxa.iterator().next();
+ assertEquals("Ammophila arenaria", recognized.getText(), "Expected taxon text to match");
+ assertEquals(3, recognized.getResolutions().size(), "Expected exactly three resolutions");
+ boolean hasGbif = false;
+ boolean hasNcbi = false;
+ boolean hasTaxref = false;
+ for (int i = 0; i < recognized.getResolutions().size(); i++) {
+ assertInstanceOf(TaxonResolution.class, recognized.getResolutions(i), "Expected resolution to be of type TaxonResolution");
+ TaxonResolution resolution = (TaxonResolution) recognized.getResolutions(i);
+ switch (resolution.getProvider()) {
+ case "gbif" -> {
+ hasGbif = true;
+ assertEquals(1347914, resolution.getSpeciesId(), "Expected species ID to be 1347914 for GBIF");
+ }
+ case "ncbi" -> {
+ hasNcbi = true;
+ assertEquals(96047, resolution.getTaxonId(), "Expected taxon ID to be 96047 for NCBI");
+ assertEquals("Calamagrostis", resolution.getParentName(), "Expected parent name to be 'Calamagrostis' for NCBI");
+ assertEquals(15376, resolution.getParentId(), "Expected parent ID to be 15376 for NCBI");
+ assertEquals("Calamagrostis arenaria", resolution.getScientificName(), "Expected scientific name to be 'Calamagrostis arenaria' for NCBI");
+ assertEquals("species", resolution.getRank(), "Expected rank to be 'species' for NCBI");
+ }
+ case "taxref" -> {
+ hasTaxref = true;
+ assertEquals(82139, resolution.getTaxonId(), "Expected taxon ID to be 82139 for TAXREF");
+ assertEquals("Plantae", resolution.getKingdomName(), "Expected kingdom name to be 'Plantae' for TAXREF");
+ assertEquals("Equisetopsida", resolution.getClassName(), "Expected class name to be 'Equisetopsida' for TAXREF");
+ assertEquals("Poales", resolution.getOrderName(), "Expected order name to be 'Poales' for TAXREF");
+ assertEquals("Poaceae", resolution.getFamilyName(), "Expected family name to be 'Poaceae' for TAXREF");
+ assertEquals("Pooideae", resolution.getSubfamilyName(), "Expected subfamily name to be 'Pooideae' for TAXREF");
+ assertEquals("Poeae", resolution.getTribeName(), "Expected tribe name to be 'Poeae' for TAXREF");
+ assertEquals("Ammophila arenaria", resolution.getSpeciesName(), "Expected species name to be 'Ammophila arenaria' for TAXREF");
+ assertEquals("Ammophila arenaria", resolution.getScientificName(), "Expected scientific name to be 'Ammophila arenaria' for TAXREF");
+ assertEquals("Oyat des sables, Ammophile des sables, Oyat, Chiendent marin, Roseau des sables, Gourbet", resolution.getVernacularName(), "Expected vernacular name to match for TAXREF");
+ assertEquals("Ammophila arenaria (L.) Link, 1827", resolution.getAcceptedNameUsage(), "Expected accepted name usage to be 'Ammophila arenaria (L.) Link, 1827' for TAXREF");
+ assertEquals("(L.) Link, 1827", resolution.getAuthorship(), "Expected authorship to be '(L.) Link, 1827' for TAXREF");
+ assertEquals("species", resolution.getRank(), "Expected rank to be 'species' for TAXREF");
+ assertEquals("https://taxref.mnhn.fr/taxref-web/taxa/82139", resolution.getReferences(), "Expeceted references to be 'https://taxref.mnhn.fr/taxref-web/taxa/82139' for TAXREF");
+ }
+ default -> fail("Unexpected provider: " + resolution.getProvider());
+ }
+ }
+ assertTrue(hasGbif, "Expected a resolution from GBIF");
+ assertTrue(hasNcbi, "Expected a resolution from NCBI");
+ assertTrue(hasTaxref, "Expected a resolution from TAXREF");
+ }
+
+ static void storeCas(JCas cas, String name) {
+ if (!STORE_OUTPUT) {
+ return;
+ }
+ Path folderPath = Path.of(".", "outputs");
+ try {
+ Files.createDirectories(folderPath);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ Path path = folderPath.resolve(name + ".xmi");
+ try (OutputStream os = Files.newOutputStream(path)) {
+ XmiCasSerializer.serialize(cas.getCas(), os);
+ System.out.println("CAS stored at: " + path.toAbsolutePath());
+ } catch (Exception e) {
+ System.err.println("Error serializing CAS: " + e.getMessage());
+ }
+ }
+
+}