diff --git a/liquid_node/configuration.py b/liquid_node/configuration.py index 8248518d..f3458452 100644 --- a/liquid_node/configuration.py +++ b/liquid_node/configuration.py @@ -8,7 +8,7 @@ from .util import import_string from .docker import docker from liquid_node.jobs import Job, liquid, hoover, dokuwiki, rocketchat, \ - nextcloud, hypothesis, codimd, ci + nextcloud, hypothesis, codimd, ci, newsleak def split_lang_codes(option): @@ -20,7 +20,7 @@ def split_lang_codes(option): class Configuration: ALL_APPS = ('hoover', 'dokuwiki', 'rocketchat', 'nextcloud', - 'hypothesis', 'codimd',) + 'hypothesis', 'codimd', 'newsleak') # The core apps can't be turned off. CORE_APPS = ('liquid', 'ingress',) @@ -36,7 +36,8 @@ class Configuration: 'dokuwiki': 'is a wiki system used as a knowledge base for processed information.', 'codimd': 'is a real-time collaboration pad.', 'nextcloud': 'has a file share system and a contact list of users.', - 'rocketchat': 'is the chat app.' + 'rocketchat': 'is the chat app.', + 'newsleak' : 'University of Hamburg entity extraction and visualization tool' } ALL_JOBS = [ @@ -68,6 +69,9 @@ class Configuration: ci.Drone(), ci.Deps(), ci.DroneWorkers(), + newsleak.Newsleak(), + newsleak.Deps(), + newsleak.Proxy() ] def __init__(self): @@ -160,6 +164,8 @@ def __init__(self): self.tika_count = self.ini.getint('liquid', 'tika_count', fallback=1) self.tika_memory_limit = self.ini.getint('liquid', 'tika_memory_limit', fallback=800) + self.nlp_memory_limit = self.ini.getint('liquid', 'nlp_memory_limit', fallback=800) + self.hypothesis_memory_limit = \ self.ini.getint('liquid', 'hypothesis_memory_limit', @@ -304,7 +310,7 @@ def tag(name): client = tag('h-client') return f'h: {h}, client: {client}' - if name in ['dokuwiki', 'nextcloud']: + if name in ['dokuwiki', 'nextcloud', 'newsleak']: return tag('liquid-' + name) return tag(name) diff --git a/liquid_node/jobs/newsleak.py b/liquid_node/jobs/newsleak.py new file mode 100644 index 00000000..e172a100 --- /dev/null +++ b/liquid_node/jobs/newsleak.py @@ -0,0 +1,31 @@ +from liquid_node import jobs + + +class Newsleak(jobs.Job): + name = 'newsleak' + template = jobs.TEMPLATES / f'{name}.nomad' + app = 'newsleak' + stage = 2 + core_oauth_apps = [ + { + 'name': 'newsleak', + 'vault_path': 'liquid/newsleak/auth.oauth2', + 'callback': '/oauth2/callback', + }, + ] + generate_oauth2_proxy_cookie = True + + + +class Proxy(jobs.Job): + name = 'newsleak-proxy' + template = jobs.TEMPLATES / f'{name}.nomad' + app = 'newsleak-ui' + stage = 4 + + +class Deps(jobs.Job): + name = 'newsleak-deps' + template = jobs.TEMPLATES / f'{name}.nomad' + app = 'newsleak-ui' + stage = 1 diff --git a/production-versions.ini b/production-versions.ini index 10454da3..24f0703b 100644 --- a/production-versions.ini +++ b/production-versions.ini @@ -9,5 +9,7 @@ liquid-authproxy = liquidinvestigations/oauth-proxy:0.0.3 liquid-core = liquidinvestigations/core:0.5.0 liquid-dokuwiki = liquidinvestigations/liquid-dokuwiki:0.1.0 liquid-nextcloud = liquidinvestigations/liquid-nextcloud:0.2.3 +liquid-newsleak = liquidinvestigations/newsleak:latest rocketchat = rocketchat/rocket.chat:3.9.1 rocketchat-mongo = mongo:4.4 +nlp-service = liquidinvestigations/nlp-service:initial_droned_service diff --git a/staging-versions.ini b/staging-versions.ini index d8c5d381..9f54ad98 100644 --- a/staging-versions.ini +++ b/staging-versions.ini @@ -11,3 +11,4 @@ liquid-dokuwiki = liquidinvestigations/liquid-dokuwiki:release liquid-nextcloud = liquidinvestigations/liquid-nextcloud:0.2.3 rocketchat = rocketchat/rocket.chat:3.9.1 rocketchat-mongo = mongo:4.4 +nlp-service = liquidinvestigations/nlp-service:initial_droned_service diff --git a/templates/hoover-deps.nomad b/templates/hoover-deps.nomad index 1f6de320..b2267ee1 100644 --- a/templates/hoover-deps.nomad +++ b/templates/hoover-deps.nomad @@ -353,6 +353,59 @@ job "hoover-deps" { } } + group "nlp-service" { + ${ continuous_reschedule() } + ${ group_disk() } + + task "nlp" { + ${ task_logs() } + + constraint { + attribute = "{% raw %}${meta.liquid_volumes}{% endraw %}" + operator = "is_set" + } + + driver = "docker" + config { + image = "${config.image('nlp-service')}" + volumes = [ + "{% raw %}${meta.liquid_volumes}{% endraw %}/nlp-service/data:/data", + ] + port_map { + nlp = 5000 + } + labels { + liquid_task = "hoover-nlp" + } + memory_hard_limit = ${4 * config.nlp_memory_limit} + } + env { + NLP_SERVICE_PRESET = "full_sm" + } + resources { + memory = ${config.nlp_memory_limit} + cpu = 1500 + network { + mbits = 1 + port "nlp" {} + } + } + service { + name = "hoover-nlp-service" + tags = ["fabio-/_nlp strip=/_nlp"] + port = "nlp" + check { + name = "http" + initial_status = "critical" + type = "http" + path = "/config" + interval = "${check_interval}" + timeout = "600s" + } + } + } + } + group "rabbitmq" { ${ continuous_reschedule() } ${ group_disk() } diff --git a/templates/hoover-workers.nomad b/templates/hoover-workers.nomad index 63eeee94..88dc8da9 100644 --- a/templates/hoover-workers.nomad +++ b/templates/hoover-workers.nomad @@ -61,6 +61,7 @@ job "hoover-workers" { # exec tail -f /dev/null if [ -z "$SNOOP_TIKA_URL" ] \ || [ -z "$SNOOP_DB" ] \ + || [ -z "$SNOOP_NLP_URL" ] \ || [ -z "$SNOOP_ES_URL" ] \ || [ -z "$SNOOP_AMQP_URL" ]; then echo "incomplete configuration!" @@ -76,6 +77,7 @@ job "hoover-workers" { env { SNOOP_ES_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_es" SNOOP_TIKA_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_tika/" + SNOOP_NLP_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_nlp" SNOOP_RABBITMQ_HTTP_URL = "{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_rabbit/" SNOOP_COLLECTIONS = ${ config.snoop_collections | tojson | tojson } diff --git a/templates/hoover.nomad b/templates/hoover.nomad index ae578890..e5e1b009 100644 --- a/templates/hoover.nomad +++ b/templates/hoover.nomad @@ -188,6 +188,7 @@ job "hoover" { env { SNOOP_ES_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_es" SNOOP_TIKA_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_tika/" + SNOOP_NLP_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_nlp" SNOOP_RABBITMQ_HTTP_URL = "{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_rabbit/" SNOOP_COLLECTIONS = ${ config.snoop_collections | tojson | tojson } } @@ -278,6 +279,7 @@ job "hoover" { # exec tail -f /dev/null if [ -z "$SNOOP_TIKA_URL" ] \ || [ -z "$SNOOP_DB" ] \ + || [ -z "$SNOOP_NLP_URL" ] \ || [ -z "$SNOOP_ES_URL" ] \ || [ -z "$SNOOP_AMQP_URL" ]; then echo "incomplete configuration!" @@ -293,6 +295,7 @@ job "hoover" { env { SNOOP_ES_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_es" SNOOP_TIKA_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_tika/" + SNOOP_NLP_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_nlp" SNOOP_RABBITMQ_HTTP_URL = "{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_rabbit/" SNOOP_COLLECTIONS = ${ config.snoop_collections | tojson | tojson } @@ -400,6 +403,7 @@ job "hoover" { env { SNOOP_ES_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_es" SNOOP_TIKA_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_tika/" + SNOOP_NLP_URL = "http://{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_nlp" SNOOP_RABBITMQ_HTTP_URL = "{% raw %}${attr.unique.network.ip-address}{% endraw %}:9990/_rabbit/" SNOOP_COLLECTIONS = ${ config.snoop_collections | tojson | tojson } } diff --git a/templates/newsleak-deps.nomad b/templates/newsleak-deps.nomad new file mode 100644 index 00000000..6ffd3e9d --- /dev/null +++ b/templates/newsleak-deps.nomad @@ -0,0 +1,178 @@ +{% from '_lib.hcl' import shutdown_delay, authproxy_group, continuous_reschedule, set_pg_password_template, task_logs, group_disk with context -%} + +job "newsleak-deps" { + datacenters = ["dc1"] + type = "service" + priority = 60 + + spread { attribute = {% raw %}"${attr.unique.hostname}"{% endraw %} } + + group "newsleak-pg" { + ${ continuous_reschedule() } + ${ group_disk() } + + task "newsleak-pg" { + ${ task_logs() } + + constraint { + attribute = "{% raw %}${meta.liquid_volumes}{% endraw %}" + operator = "is_set" + } + + affinity { + attribute = "{% raw %}${meta.liquid_large_databases}{% endraw %}" + value = "true" + weight = 100 + } + + driver = "docker" + + config { + image = "postgres:9.6" + volumes = [ + "{% raw %}${meta.liquid_volumes}{% endraw %}/newsleak/pg/data:/var/lib/postgresql/data" + ] + labels { + liquid_task = "newsleak-pg" + } + port_map { + pg = 5432 + } + # 128MB, the default postgresql shared_memory config + shm_size = 134217728 + memory_hard_limit = 1500 + } + template { + data = <