From 2c2347fc220d574f8ddcd0dde3dc7a1bb180fcdc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 11 Mar 2026 11:15:27 +0100 Subject: [PATCH 01/18] firstboot: harden credential handling a bit Credentials are highly privileged things, but still, let's do some validation, because we can. --- src/firstboot/firstboot.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 38e3adaed6eca..3a1067194d2ac 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -412,13 +412,16 @@ static int prompt_keymap(int rfd, sd_varlink **mute_console_link) { if (arg_keymap) return 0; - r = read_credential("firstboot.keymap", (void**) &arg_keymap, NULL); + _cleanup_free_ char *km = NULL; + r = read_credential("firstboot.keymap", (void**) &km, NULL); if (r < 0) log_debug_errno(r, "Failed to read credential firstboot.keymap, ignoring: %m"); - else { + else if (keymap_is_valid(km)) { log_debug("Acquired keymap from credential."); + arg_keymap = TAKE_PTR(km); return 0; - } + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Keymap '%s' supplied via credential is not valid, ignoring.", km); bool b; if (arg_prompt_keymap_auto) { @@ -540,13 +543,16 @@ static int prompt_timezone(int rfd, sd_varlink **mute_console_link) { if (arg_timezone) return 0; - r = read_credential("firstboot.timezone", (void**) &arg_timezone, NULL); + _cleanup_free_ char *tz = NULL; + r = read_credential("firstboot.timezone", (void**) &tz, NULL); if (r < 0) log_debug_errno(r, "Failed to read credential firstboot.timezone, ignoring: %m"); - else { + else if (timezone_is_valid(tz, LOG_DEBUG)) { log_debug("Acquired timezone from credential."); + arg_timezone = TAKE_PTR(tz); return 0; - } + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Timezone '%s' supplied via credential is not valid, ignoring.", tz); if (!arg_prompt_timezone) { log_debug("Prompting for timezone was not requested."); From 47b45453dde42131139f1fba2a2772c6609b3fbd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 5 Mar 2026 10:39:28 +0100 Subject: [PATCH 02/18] firstboot: permit setting the static hostname via a system credential For the IMDS case there's value in being able to set the static hostname, instead of just the transient one. Let's introduce firstboot.hostname, which only applies to first boot, and write the static hostname. This is different from system.hostname which applies to any boot, and writes the transient hostname. --- man/systemd-firstboot.xml | 10 ++++++++++ man/systemd.system-credentials.xml | 23 +++++++++++++++++++---- src/firstboot/firstboot.c | 11 +++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/man/systemd-firstboot.xml b/man/systemd-firstboot.xml index 86a85f0bf2855..8d5e9a724d4dd 100644 --- a/man/systemd-firstboot.xml +++ b/man/systemd-firstboot.xml @@ -447,6 +447,16 @@ + + + firstboot.hostname + + This credential specifies the static system hostname setting to set during first + boot, in place of prompting the user. Note that this controls the static hostname, not the transient + hostname, and only has an effect on first boot, unlike system.hostname. + + + Note that by default the systemd-firstboot.service unit file is set up to diff --git a/man/systemd.system-credentials.xml b/man/systemd.system-credentials.xml index e3e2887207784..4ea8c6a14c9ef 100644 --- a/man/systemd.system-credentials.xml +++ b/man/systemd.system-credentials.xml @@ -44,7 +44,7 @@ firstboot.keymap - The console key mapping to set (e.g. de). Read by + The console key mapping to set (e.g. de). Read by systemd-firstboot1, and only honoured if no console keymap has been configured before. @@ -52,6 +52,20 @@ + + firstboot.hostname + + This credential specifies the static system hostname setting to set during first + boot, in place of prompting the user. Note that this controls the static hostname, not the transient + hostname, and only has an effect on first boot, unlike system.hostname (see + below). Read by + systemd-firstboot1 + and only honoured if no static hostname has been configured before. + + + + + firstboot.locale firstboot.locale-messages @@ -398,9 +412,10 @@ system.hostname Accepts a (transient) hostname to configure during early boot. The static hostname specified - in /etc/hostname, if configured, takes precedence over this setting. - Interpreted by the service manager (PID 1). For details see - systemd1. + in /etc/hostname, if configured, takes precedence over this setting. + Interpreted by the service manager (PID 1). For details see + systemd1. Also + see firstboot.hostname above. diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 3a1067194d2ac..f1cec62adb1ce 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -653,6 +653,17 @@ static int prompt_hostname(int rfd, sd_varlink **mute_console_link) { if (arg_hostname) return 0; + _cleanup_free_ char *hn = NULL; + r = read_credential("firstboot.hostname", (void**) &hn, NULL); + if (r < 0) + log_debug_errno(r, "Failed to read credential firstboot.hostname, ignoring: %m"); + else if (hostname_is_valid(hn, VALID_HOSTNAME_TRAILING_DOT|VALID_HOSTNAME_QUESTION_MARK)) { + log_debug("Acquired hostname from credentials."); + arg_hostname = TAKE_PTR(hn); + return 0; + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Hostname '%s' supplied via credential is not valid, ignoring.", hn); + if (!arg_prompt_hostname) { log_debug("Prompting for hostname was not requested."); return 0; From e26654b934a53407fe953422b3717353f4197751 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 5 Mar 2026 09:30:48 +0100 Subject: [PATCH 03/18] macro: split out DEFER_VOID_CALL() into its own header This stuff needs assert() defined, but we don't really want to pull in assert-fundamental.h into macro.h just for this, hence split this out. --- src/basic/defer-util.h | 18 ++++++++++++++++++ src/basic/macro.h | 13 ------------- src/firstboot/firstboot.c | 1 + src/home/homectl.c | 1 + 4 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 src/basic/defer-util.h diff --git a/src/basic/defer-util.h b/src/basic/defer-util.h new file mode 100644 index 0000000000000..320301a068ade --- /dev/null +++ b/src/basic/defer-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "assert-fundamental.h" +#include "macro.h" + +typedef void (*void_func_t)(void); + +static inline void dispatch_void_func(void_func_t *f) { + assert(f); + assert(*f); + (*f)(); +} + +/* Inspired by Go's "defer" construct, but much more basic. This basically just calls a void function when + * the current scope is left. Doesn't do function parameters (i.e. no closures). */ +#define DEFER_VOID_CALL(x) _DEFER_VOID_CALL(UNIQ, x) +#define _DEFER_VOID_CALL(uniq, x) _unused_ _cleanup_(dispatch_void_func) void_func_t UNIQ_T(defer, uniq) = (x) diff --git a/src/basic/macro.h b/src/basic/macro.h index 7001c331399d6..390a9fab38ca3 100644 --- a/src/basic/macro.h +++ b/src/basic/macro.h @@ -205,16 +205,3 @@ static inline size_t size_add(size_t x, size_t y) { for (typeof(entry) _va_sentinel_[1] = {}, _entries_[] = { __VA_ARGS__ __VA_OPT__(,) _va_sentinel_[0] }, *_current_ = _entries_; \ ((long)(_current_ - _entries_) < (long)(ELEMENTSOF(_entries_) - 1)) && ({ entry = *_current_; true; }); \ _current_++) - -typedef void (*void_func_t)(void); - -static inline void dispatch_void_func(void_func_t *f) { - assert(f); - assert(*f); - (*f)(); -} - -/* Inspired by Go's "defer" construct, but much more basic. This basically just calls a void function when - * the current scope is left. Doesn't do function parameters (i.e. no closures). */ -#define DEFER_VOID_CALL(x) _DEFER_VOID_CALL(UNIQ, x) -#define _DEFER_VOID_CALL(uniq, x) _unused_ _cleanup_(dispatch_void_func) void_func_t UNIQ_T(defer, uniq) = (x) diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index f1cec62adb1ce..769c78e0430cc 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -19,6 +19,7 @@ #include "chase.h" #include "copy.h" #include "creds-util.h" +#include "defer-util.h" #include "dissect-image.h" #include "env-file.h" #include "errno-util.h" diff --git a/src/home/homectl.c b/src/home/homectl.c index 2b92ab6481eae..bc75b3451f99c 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -18,6 +18,7 @@ #include "cgroup-util.h" #include "chase.h" #include "creds-util.h" +#include "defer-util.h" #include "dirent-util.h" #include "dns-domain.h" #include "env-util.h" From a42958ef5bd52996d1c643cecc0761e74c22c893 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 27 Feb 2026 10:05:16 +0100 Subject: [PATCH 04/18] fileio: introduce write_data_file_atomic_at() helper This is very similar to write_string_file_atomic(), but is intentionally kept separate (after long consideration). It focusses on arbitrary struct iovec data, not just strings, and hence also doesn't do stdio at all. It's hence a lot more low-level. We might want to consider moving write_string_file*() on top of write_data_file_atomic_at(), but for now don't. --- src/basic/fileio.c | 62 ++++++++++++++++++++++++++++++++++++++++++ src/basic/fileio.h | 6 ++++ src/test/test-fileio.c | 52 +++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 90436f6ecf820..66d06484dc981 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -7,12 +7,15 @@ #include #include "alloc-util.h" +#include "chase.h" #include "errno-util.h" #include "extract-word.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" #include "hexdecoct.h" +#include "io-util.h" +#include "iovec-util.h" #include "label.h" #include "log.h" #include "mkdir.h" @@ -1655,3 +1658,62 @@ int warn_file_is_world_accessible(const char *filename, struct stat *st, const c filename, st->st_mode & 07777); return 0; } + +int write_data_file_atomic_at( + int dir_fd, + const char *path, + const struct iovec *iovec, + WriteDataFileFlags flags) { + + int r; + + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); + + /* This is a cousin of write_string_file_atomic(), but operates with arbitrary struct iovec binary + * data (rather than strings), works without FILE* streams, and does direct syscalls instead. */ + + _cleanup_free_ char *dn = NULL, *fn = NULL; + r = path_split_prefix_filename(path, &dn, &fn); + if (IN_SET(r, -EADDRNOTAVAIL, O_DIRECTORY)) + return -EISDIR; /* path refers to "." or "/" (which are dirs, which we cannot write), or is suffixed with "/" */ + if (r < 0) + return r; + + _cleanup_close_ int mfd = -EBADF; + if (dn) { + /* If there's a directory component, readjust our position */ + r = chaseat(dir_fd, + dn, + FLAGS_SET(flags, WRITE_DATA_FILE_MKDIR_0755) ? CHASE_MKDIR_0755 : 0, + /* ret_path= */ NULL, + &mfd); + if (r < 0) + return r; + + dir_fd = mfd; + } + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = open_tmpfile_linkable_at(dir_fd, fn, O_WRONLY|O_CLOEXEC, &t); + if (fd < 0) + return fd; + + CLEANUP_TMPFILE_AT(dir_fd, t); + + if (iovec_is_set(iovec)) { + r = loop_write(fd, iovec->iov_base, iovec->iov_len); + if (r < 0) + return r; + } + + r = fchmod_umask(fd, 0644); + if (r < 0) + return r; + + r = link_tmpfile_at(fd, dir_fd, t, fn, LINK_TMPFILE_REPLACE); + if (r < 0) + return r; + + t = mfree(t); /* disarm CLEANUP_TMPFILE_AT */ + return 0; +} diff --git a/src/basic/fileio.h b/src/basic/fileio.h index 578c16c0ee394..3e2372c4dddbc 100644 --- a/src/basic/fileio.h +++ b/src/basic/fileio.h @@ -163,3 +163,9 @@ int safe_fgetc(FILE *f, char *ret); int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line); int fopen_mode_to_flags(const char *mode); + +typedef enum WriteDataFileFlags { + WRITE_DATA_FILE_MKDIR_0755 = 1 << 0, +} WriteDataFileFlags; + +int write_data_file_atomic_at(int dir_fd, const char *path, const struct iovec *iovec, WriteDataFileFlags flags); diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c index 38d92299467a7..575e2c52ed7df 100644 --- a/src/test/test-fileio.c +++ b/src/test/test-fileio.c @@ -9,6 +9,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "iovec-util.h" #include "memfd-util.h" #include "parse-util.h" #include "path-util.h" @@ -695,4 +696,55 @@ TEST(fdopen_independent) { f = safe_fclose(f); } +TEST(write_data_file_atomic_at) { + struct iovec a = IOVEC_MAKE_STRING("hallo"); + ASSERT_OK(write_data_file_atomic_at(AT_FDCWD, "/tmp/wdfa", &a, /* flags= */ 0)); + + _cleanup_(iovec_done) struct iovec ra = {}; + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, NULL, &a, /* flags= */ 0), EINVAL); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "", &a, /* flags= */ 0), EINVAL); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/", &a, /* flags= */ 0), EISDIR); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, ".", &a, /* flags= */ 0), EISDIR); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/tmp/", &a, /* flags= */ 0), EISDIR); + + _cleanup_free_ char *cwd = NULL; + ASSERT_OK(safe_getcwd(&cwd)); + ASSERT_OK_ERRNO(chdir("/tmp")); + + ASSERT_OK(write_data_file_atomic_at(AT_FDCWD, "wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK_ERRNO(chdir(cwd)); + + ASSERT_ERROR(write_data_file_atomic_at(XAT_FDROOT, "tmp/zzz/wdfa", &a, /* flags= */ 0), ENOENT); + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/zzz/wdfa", &a, WRITE_DATA_FILE_MKDIR_0755)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/zzz/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/zzz/wdfa")); + + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/tmp/zzz", &a, /* flags= */ 0), EEXIST); + + ASSERT_OK_ERRNO(rmdir("/tmp/zzz")); +} + DEFINE_TEST_MAIN(LOG_DEBUG); From be34e982c64b10bd8802c42d5d686347edeab7c5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 11:24:21 +0100 Subject: [PATCH 05/18] udev: tag DMI id device with "systemd", so that we can order units after it For various usecases it is useful to read relevant data from the DMI udev device, but this means we need a way to wait for it for this to be probed to be race-free. Hence tag it with "systemd", so that sys-devices-virtual-dmi-id.device can be used as synchronization point. --- rules.d/60-dmi-id.rules | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rules.d/60-dmi-id.rules b/rules.d/60-dmi-id.rules index 10b1fe000ca18..ecea74ec60d1c 100644 --- a/rules.d/60-dmi-id.rules +++ b/rules.d/60-dmi-id.rules @@ -2,24 +2,28 @@ ACTION=="remove", GOTO="dmi_end" SUBSYSTEM!="dmi", GOTO="dmi_end" +KERNEL!="id", GOTO="dmi_end" ENV{ID_SYS_VENDOR_IS_RUBBISH}!="1", ENV{ID_VENDOR}="$attr{sys_vendor}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="", ENV{ID_PRODUCT_NAME_IS_RUBBISH}!="1", ENV{ID_MODEL}="$attr{product_name}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="product_name", ENV{ID_MODEL}="$attr{product_name}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="product_version", ENV{ID_MODEL}="$attr{product_version}" -# fallback to board information +# Fallback to board information ENV{ID_VENDOR}=="", ENV{ID_VENDOR}="$attr{board_vendor}" ENV{ID_MODEL}=="", ENV{ID_MODEL}="$attr{board_name}" -# stock keeping unit +# Stock keeping unit ENV{ID_PRODUCT_SKU_IS_RUBBISH}!="1", ENV{ID_SKU}="$attr{product_sku}" -# hardware version +# Hardware version ENV{ID_PRODUCT_VERSION_IS_RUBBISH}!="1", ENV{ID_HARDWARE_VERSION}="$attr{product_version}" ENV{ID_HARDWARE_VERSION}=="", ENV{ID_BOARD_VERSION_IS_RUBBISH}!="1", ENV{ID_HARDWARE_VERSION}="$attr{board_version}" -# chassis asset tag +# Chassis asset tag ENV{MODALIAS}!="", ATTR{chassis_asset_tag}!="", IMPORT{builtin}="hwdb '$attr{modalias}cat$attr{chassis_asset_tag}:'" ENV{ID_CHASSIS_ASSET_TAG_IS_RUBBISH}!="1", ENV{ID_CHASSIS_ASSET_TAG}="$attr{chassis_asset_tag}" +# Allow units to be ordered after the DMI device +TAG+="systemd" + LABEL="dmi_end" From c749f8671dd1b82baaa4b44914502a37feab0db9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 10:10:53 +0100 Subject: [PATCH 06/18] iovec-util: introduce IOVEC_MAKE_BYTE() helper --- src/basic/iovec-util.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/basic/iovec-util.h b/src/basic/iovec-util.h index 0d1d4a7a94d86..00cbb89a7790b 100644 --- a/src/basic/iovec-util.h +++ b/src/basic/iovec-util.h @@ -24,6 +24,12 @@ struct iovec* iovec_make_string(struct iovec *iovec, const char *s); .iov_len = STRLEN(s), \ } +#define IOVEC_MAKE_BYTE(c) \ + (const struct iovec) { \ + .iov_base = (char*) ((const char[]) { c }), \ + .iov_len = 1, \ + } + void iovec_done_erase(struct iovec *iovec); char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value); From 93485fdaf69cfb243b199e76177d2b4fe95b572a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 15:49:24 +0100 Subject: [PATCH 07/18] json-util: add json_dispatch_in6_addr() helper --- src/libsystemd/sd-json/json-util.c | 23 ++++++++++++++++++++++- src/libsystemd/sd-json/json-util.h | 1 + 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/sd-json/json-util.c b/src/libsystemd/sd-json/json-util.c index 7f90b7fc7930c..d9167d9e83baa 100644 --- a/src/libsystemd/sd-json/json-util.c +++ b/src/libsystemd/sd-json/json-util.c @@ -194,7 +194,28 @@ int json_dispatch_in_addr(const char *name, sd_json_variant *variant, sd_json_di return r; if (iov.iov_len != sizeof(struct in_addr)) - return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is array of unexpected size.", strna(name)); + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Expected JSON field '%s' to be an array of %zu bytes.", strna(name), sizeof(struct in_addr)); + + memcpy(address, iov.iov_base, iov.iov_len); + return 0; +} + +int json_dispatch_in6_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { + struct in6_addr *address = ASSERT_PTR(userdata); + _cleanup_(iovec_done) struct iovec iov = {}; + int r; + + if (sd_json_variant_is_null(variant)) { + *address = (struct in6_addr) {}; + return 0; + } + + r = json_dispatch_byte_array_iovec(name, variant, flags, &iov); + if (r < 0) + return r; + + if (iov.iov_len != sizeof(struct in6_addr)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Expected JSON field '%s' to be an array of %zu bytes.", strna(name), sizeof(struct in6_addr)); memcpy(address, iov.iov_base, iov.iov_len); return 0; diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index 847725a41e292..478d2a2a2122b 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -115,6 +115,7 @@ int json_dispatch_user_group_name(const char *name, sd_json_variant *variant, sd int json_dispatch_const_user_group_name(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_const_unit_name(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_in_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); +int json_dispatch_in6_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_const_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_strv_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); From 0cd86c15fcf5c5a538443691343ddc278aa604ce Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 15:50:26 +0100 Subject: [PATCH 08/18] dns-rr: add dns_resource_record_from_json() This only parses a small subset of RR types for now, but we can add more later. Covered are the most important RR types: A, AAAA, PTR. --- src/resolve/test-dns-rr.c | 27 +++++++++++- src/shared/dns-rr.c | 89 +++++++++++++++++++++++++++++++++++++++ src/shared/dns-rr.h | 1 + 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/src/resolve/test-dns-rr.c b/src/resolve/test-dns-rr.c index e45f1d34238b0..b75604251f5a3 100644 --- a/src/resolve/test-dns-rr.c +++ b/src/resolve/test-dns-rr.c @@ -7,6 +7,21 @@ #include "dns-type.h" #include "tests.h" +static void test_to_json_from_json(DnsResourceRecord *rr) { + int r; + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + ASSERT_OK(dns_resource_record_to_json(rr, &j)); + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr2 = NULL; + r = dns_resource_record_from_json(j, &rr2); + if (r == -EOPNOTSUPP) + return; + ASSERT_OK(r); + + ASSERT_TRUE(dns_resource_record_equal(rr, rr2)); +} + /* ================================================================ * DNS_RESOURCE_RECORD_RDATA() * ================================================================ */ @@ -802,6 +817,8 @@ TEST(dns_resource_record_new_address_ipv4) { ASSERT_EQ(rr->key->type, DNS_TYPE_A); ASSERT_STREQ(dns_resource_key_name(rr->key), "www.example.com"); ASSERT_EQ(rr->a.in_addr.s_addr, addr.in.s_addr); + + test_to_json_from_json(rr); } TEST(dns_resource_record_new_address_ipv6) { @@ -818,6 +835,8 @@ TEST(dns_resource_record_new_address_ipv6) { ASSERT_EQ(rr->key->type, DNS_TYPE_AAAA); ASSERT_STREQ(dns_resource_key_name(rr->key), "www.example.com"); ASSERT_EQ(memcmp(&rr->aaaa.in6_addr, &addr.in6, sizeof(struct in6_addr)), 0); + + test_to_json_from_json(rr); } /* ================================================================ @@ -1003,11 +1022,13 @@ TEST(dns_resource_record_equal_cname_copy) { a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_CNAME, "www.example.com"); ASSERT_NOT_NULL(a); - a->cname.name = strdup("example.com"); + a->cname.name = ASSERT_PTR(strdup("example.com")); b = dns_resource_record_copy(a); ASSERT_NOT_NULL(b); ASSERT_TRUE(dns_resource_record_equal(a, b)); + + test_to_json_from_json(a); } TEST(dns_resource_record_equal_cname_fail) { @@ -1220,11 +1241,13 @@ TEST(dns_resource_record_equal_ptr_copy) { a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR, "127.1.168.192.in-addr-arpa"); ASSERT_NOT_NULL(a); - a->ptr.name = strdup("example.com"); + a->ptr.name = ASSERT_PTR(strdup("example.com")); b = dns_resource_record_copy(a); ASSERT_NOT_NULL(b); ASSERT_TRUE(dns_resource_record_equal(a, b)); + + test_to_json_from_json(a); } TEST(dns_resource_record_equal_ptr_fail) { diff --git a/src/shared/dns-rr.c b/src/shared/dns-rr.c index 0fa730c13baa2..a0d308213d3e8 100644 --- a/src/shared/dns-rr.c +++ b/src/shared/dns-rr.c @@ -2513,6 +2513,95 @@ int dns_resource_record_to_json(DnsResourceRecord *rr, sd_json_variant **ret) { } } +int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret) { + int r; + + assert(v); + assert(ret); + + sd_json_variant *k = sd_json_variant_by_key(v, "key"); + if (!k) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Resource record entry lacks key field, refusing."); + + _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL; + r = dns_resource_key_from_json(k, &key); + if (r < 0) + return r; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + rr = dns_resource_record_new(key); + if (!rr) + return log_oom_debug(); + + /* Note, for now we only support the most common subset of RRs for decoding here. Please send patches for more. */ + switch (key->type) { + + case DNS_TYPE_PTR: + case DNS_TYPE_NS: + case DNS_TYPE_CNAME: + case DNS_TYPE_DNAME: { + _cleanup_free_ char *name = NULL; + + static const struct sd_json_dispatch_field table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &name); + if (r < 0) + return r; + + r = dns_name_is_valid(name); + if (r < 0) + return r; + + rr->ptr.name = TAKE_PTR(name); + break; + } + + case DNS_TYPE_A: { + struct in_addr addr = {}; + + static const struct sd_json_dispatch_field table[] = { + { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in_addr, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &addr); + if (r < 0) + return r; + + rr->a.in_addr = addr; + break; + } + + case DNS_TYPE_AAAA: { + struct in6_addr addr = {}; + + static const struct sd_json_dispatch_field table[] = { + { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in6_addr, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &addr); + if (r < 0) + return r; + + rr->aaaa.in6_addr = addr; + break; + } + + default: + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Decoding DNS type %i is currently not supported.", key->type); + } + + *ret = TAKE_PTR(rr); + return 0; +} + static const char* const dnssec_algorithm_table[_DNSSEC_ALGORITHM_MAX_DEFINED] = { /* Mnemonics as listed on https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */ [DNSSEC_ALGORITHM_RSAMD5] = "RSAMD5", diff --git a/src/shared/dns-rr.h b/src/shared/dns-rr.h index c30cd71cfa5c7..d747083aa8a81 100644 --- a/src/shared/dns-rr.h +++ b/src/shared/dns-rr.h @@ -419,6 +419,7 @@ int dns_resource_record_new_from_raw(DnsResourceRecord **ret, const void *data, int dns_resource_key_to_json(DnsResourceKey *key, sd_json_variant **ret); int dns_resource_key_from_json(sd_json_variant *v, DnsResourceKey **ret); int dns_resource_record_to_json(DnsResourceRecord *rr, sd_json_variant **ret); +int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret); void dns_resource_key_hash_func(const DnsResourceKey *k, struct siphash *state); int dns_resource_key_compare_func(const DnsResourceKey *x, const DnsResourceKey *y); From 6920752eceb29cfe89af748a66fba580620ddc1c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 15:51:09 +0100 Subject: [PATCH 09/18] dns-rr: tighten rules on parsing RR keys from JSON let's ensure the name is actually a valid DNS name. --- src/shared/dns-rr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/shared/dns-rr.c b/src/shared/dns-rr.c index a0d308213d3e8..aa7e5ce8434c1 100644 --- a/src/shared/dns-rr.c +++ b/src/shared/dns-rr.c @@ -2215,6 +2215,12 @@ int dns_resource_key_from_json(sd_json_variant *v, DnsResourceKey **ret) { if (r < 0) return r; + r = dns_name_is_valid(p.name); + if (r < 0) + return r; + if (!r) + return -EBADMSG; + key = dns_resource_key_new(p.class, p.type, p.name); if (!key) return -ENOMEM; @@ -2555,6 +2561,8 @@ int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret) { r = dns_name_is_valid(name); if (r < 0) return r; + if (!r) + return -EBADMSG; rr->ptr.name = TAKE_PTR(name); break; @@ -2564,8 +2572,8 @@ int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret) { struct in_addr addr = {}; static const struct sd_json_dispatch_field table[] = { - { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in_addr, 0, SD_JSON_MANDATORY }, - { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in_addr, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, {} }; From 24f457d9ab33f48e591ae68272de0cc0de01c43d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 4 Mar 2026 17:08:03 +0100 Subject: [PATCH 10/18] resolved: also flush /etc/hosts on reload When we are told to reload our configuration also flush out /etc/hosts explicitly. This is particularly relevant since we suppress too frequent reloads, and hence a synchronous way to force a reload is very useful. --- src/resolve/resolved-etc-hosts.c | 1 + src/resolve/resolved-manager.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/resolve/resolved-etc-hosts.c b/src/resolve/resolved-etc-hosts.c index e9100de5229d0..00c76a9977f85 100644 --- a/src/resolve/resolved-etc-hosts.c +++ b/src/resolve/resolved-etc-hosts.c @@ -72,6 +72,7 @@ void etc_hosts_clear(EtcHosts *hosts) { void manager_etc_hosts_flush(Manager *m) { etc_hosts_clear(&m->etc_hosts); m->etc_hosts_stat = (struct stat) {}; + m->etc_hosts_last = USEC_INFINITY; } static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) { diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c index a0fb74ec3567a..19ff92bfca5a9 100644 --- a/src/resolve/resolved-manager.c +++ b/src/resolve/resolved-manager.c @@ -659,6 +659,7 @@ static int manager_dispatch_reload_signal(sd_event_source *s, const struct signa m->unicast_scope = dns_scope_free(m->unicast_scope); m->delegates = hashmap_free(m->delegates); dns_trust_anchor_flush(&m->trust_anchor); + manager_etc_hosts_flush(m); manager_set_defaults(m); From 5e8275b50e0af867858d19b5ac38b7da1d63a282 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 26 Feb 2026 15:54:14 +0100 Subject: [PATCH 11/18] resolved: add ability to define additional local RRs via drop-ins This is an extension of the /etc/hosts concept, but can provide any kind of RRs (well, actually, we only parse A/AAAA/PTR for now, but the concept is open for more). Fixes: #17791 --- man/rules/meson.build | 1 + man/systemd-resolved.service.xml | 1 + man/systemd.rr.xml | 92 +++++++++++ src/resolve/meson.build | 1 + src/resolve/resolved-dns-query.c | 36 +++++ src/resolve/resolved-gperf.gperf | 1 + src/resolve/resolved-manager.c | 5 + src/resolve/resolved-manager.h | 6 + src/resolve/resolved-static-records.c | 215 ++++++++++++++++++++++++++ src/resolve/resolved-static-records.h | 7 + test/units/TEST-75-RESOLVED.sh | 32 ++++ 11 files changed, 397 insertions(+) create mode 100644 man/systemd.rr.xml create mode 100644 src/resolve/resolved-static-records.c create mode 100644 src/resolve/resolved-static-records.h diff --git a/man/rules/meson.build b/man/rules/meson.build index d69793150be97..2502b3f1dd5c1 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1260,6 +1260,7 @@ manpages = [ ['systemd.pcrlock', '5', ['systemd.pcrlock.d'], ''], ['systemd.preset', '5', [], ''], ['systemd.resource-control', '5', [], ''], + ['systemd.rr', '5', [], 'ENABLE_RESOLVE'], ['systemd.scope', '5', [], ''], ['systemd.service', '5', [], ''], ['systemd.slice', '5', [], ''], diff --git a/man/systemd-resolved.service.xml b/man/systemd-resolved.service.xml index a5ab48d2fa05c..f4a542162a53c 100644 --- a/man/systemd-resolved.service.xml +++ b/man/systemd-resolved.service.xml @@ -510,6 +510,7 @@ search foobar.com barbar.com systemd1 resolved.conf5 systemd.dns-delegate5 + systemd.rr5 systemd.dnssd5 dnssec-trust-anchors.d5 nss-resolve8 diff --git a/man/systemd.rr.xml b/man/systemd.rr.xml new file mode 100644 index 0000000000000..58a6f76bd5b9e --- /dev/null +++ b/man/systemd.rr.xml @@ -0,0 +1,92 @@ + + + + + + + + systemd.rr + systemd + + + + systemd.rr + 5 + + + + systemd.rr + Local static DNS resource record definitions + + + + + /etc/systemd/resolve/static.d/*.rr + /run/systemd/resolve/static.d/*.rr + /usr/lib/systemd/resolve/static.d/*.rr + + + + + Description + + *.rr files may be used to define resource record sets ("RRsets") that shall be + resolvable locally, similar in style to address records defined by /etc/hosts (see + hosts5 for + details). These files are read by + systemd-resolved.service8, + and are used to synthesize local responses to local queries matching the defined resource record set. + + These drop-in files are in JSON format. Each file either may contain a single top-level DNS RR + object, or an array of one or more DNS RR objects. Each RR object has at least a key + subobject consisting of name string field and a type integer field + (which contains the RR type in numeric form). Depending on the chosen type the RR object also has the + following fields: + + + For A/AAAA RRs, the RR object should have an address + address field set to an array consisting of 4 or 16 8bit unsigned integers for the IP + address. + + For PTR/NS/CNAME/DNAME RRs, the RR object should have a name field + set to the name the record shall point to. + + + This JSON serialization of DNS RRs matches the one returned by resolvectl. + + Currently no other RR types are supported. + + + + Examples + + Consider a file /run/systemd/resolve/static.d/foobar.rr: + + +{ + "key" : { + "type" : 1, + "name" : "foobar.example.com" + }, + "address" : [ 192, 168, 100, 1 ] +} + + This ensures that local address lookups for foobar.example.com will resolve to the + 192.168.100.1 IPv4 address. + + + + + See Also + + systemd1 + systemd-resolved.service8 + hosts5 + resolvectl1 + + + + diff --git a/src/resolve/meson.build b/src/resolve/meson.build index be2979343f3f0..b9b2e24b18123 100644 --- a/src/resolve/meson.build +++ b/src/resolve/meson.build @@ -36,6 +36,7 @@ systemd_resolved_extract_sources = files( 'resolved-mdns.c', 'resolved-resolv-conf.c', 'resolved-socket-graveyard.c', + 'resolved-static-records.c', 'resolved-util.c', 'resolved-varlink.c', ) diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c index a0ef750447179..6ec6569ae7639 100644 --- a/src/resolve/resolved-dns-query.c +++ b/src/resolve/resolved-dns-query.c @@ -21,6 +21,7 @@ #include "resolved-etc-hosts.h" #include "resolved-hook.h" #include "resolved-manager.h" +#include "resolved-static-records.h" #include "resolved-timeouts.h" #include "set.h" #include "string-util.h" @@ -910,6 +911,33 @@ static int dns_query_try_etc_hosts(DnsQuery *q) { return 1; } +static int dns_query_try_static_records(DnsQuery *q) { + int r; + + assert(q); + + if (FLAGS_SET(q->flags, SD_RESOLVED_NO_SYNTHESIZE)) + return 0; + + _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL; + r = manager_static_records_lookup( + q->manager, + q->question_bypass ? q->question_bypass->question : q->question_utf8, + &answer); + if (r <= 0) + return r; + + dns_query_reset_answer(q); + + q->answer = TAKE_PTR(answer); + q->answer_rcode = DNS_RCODE_SUCCESS; + q->answer_protocol = dns_synthesize_protocol(q->flags); + q->answer_family = dns_synthesize_family(q->flags); + q->answer_query_flags = SD_RESOLVED_AUTHENTICATED|SD_RESOLVED_CONFIDENTIAL|SD_RESOLVED_SYNTHETIC; + + return 1; +} + static int dns_query_go_scopes(DnsQuery *q) { int r; @@ -1038,6 +1066,14 @@ int dns_query_go(DnsQuery *q) { q->state != DNS_TRANSACTION_NULL) return 0; + r = dns_query_try_static_records(q); + if (r < 0) + return r; + if (r > 0) { + dns_query_complete(q, DNS_TRANSACTION_SUCCESS); + return 1; + } + r = dns_query_try_etc_hosts(q); if (r < 0) return r; diff --git a/src/resolve/resolved-gperf.gperf b/src/resolve/resolved-gperf.gperf index c548320449b6f..8b8a66d0369bf 100644 --- a/src/resolve/resolved-gperf.gperf +++ b/src/resolve/resolved-gperf.gperf @@ -31,6 +31,7 @@ Resolve.DNSOverTLS, config_parse_dns_over_tls_mode, 0, Resolve.Cache, config_parse_dns_cache_mode, DNS_CACHE_MODE_YES, offsetof(Manager, enable_cache) Resolve.DNSStubListener, config_parse_dns_stub_listener_mode, 0, offsetof(Manager, dns_stub_listener_mode) Resolve.ReadEtcHosts, config_parse_bool, 0, offsetof(Manager, read_etc_hosts) +Resolve.ReadStaticRecords, config_parse_bool, 0, offsetof(Manager, read_static_records) Resolve.ResolveUnicastSingleLabel, config_parse_bool, 0, offsetof(Manager, resolve_unicast_single_label) Resolve.DNSStubListenerExtra, config_parse_dns_stub_listener_extra, 0, offsetof(Manager, dns_extra_stub_listeners) Resolve.CacheFromLocalhost, config_parse_bool, 0, offsetof(Manager, cache_from_localhost) diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c index 19ff92bfca5a9..25a51ed02b042 100644 --- a/src/resolve/resolved-manager.c +++ b/src/resolve/resolved-manager.c @@ -49,6 +49,7 @@ #include "resolved-mdns.h" #include "resolved-resolv-conf.h" #include "resolved-socket-graveyard.h" +#include "resolved-static-records.h" #include "resolved-util.h" #include "resolved-varlink.h" #include "set.h" @@ -637,6 +638,7 @@ static void manager_set_defaults(Manager *m) { m->enable_cache = DNS_CACHE_MODE_YES; m->dns_stub_listener_mode = DNS_STUB_LISTENER_YES; m->read_etc_hosts = true; + m->read_static_records = true; m->resolve_unicast_single_label = false; m->cache_from_localhost = false; m->stale_retention_usec = 0; @@ -660,6 +662,7 @@ static int manager_dispatch_reload_signal(sd_event_source *s, const struct signa m->delegates = hashmap_free(m->delegates); dns_trust_anchor_flush(&m->trust_anchor); manager_etc_hosts_flush(m); + manager_static_records_flush(m); manager_set_defaults(m); @@ -730,6 +733,7 @@ int manager_new(Manager **ret) { .read_resolv_conf = true, .need_builtin_fallbacks = true, .etc_hosts_last = USEC_INFINITY, + .static_records_last = USEC_INFINITY, .sigrtmin18_info.memory_pressure_handler = manager_memory_pressure, .sigrtmin18_info.memory_pressure_userdata = m, @@ -918,6 +922,7 @@ Manager* manager_free(Manager *m) { dns_trust_anchor_flush(&m->trust_anchor); manager_etc_hosts_flush(m); + manager_static_records_flush(m); while ((sb = hashmap_first(m->dns_service_browsers))) dns_service_browser_free(sb); diff --git a/src/resolve/resolved-manager.h b/src/resolve/resolved-manager.h index 4f595e6d04c24..d6afe10c7c0a1 100644 --- a/src/resolve/resolved-manager.h +++ b/src/resolve/resolved-manager.h @@ -123,6 +123,12 @@ typedef struct Manager { struct stat etc_hosts_stat; bool read_etc_hosts; + /* Data from {/etc,/run,/usr/lib}/systemd/resolve/static.d/ */ + Hashmap *static_records; + usec_t static_records_last; + Set *static_records_stat; + bool read_static_records; + /* List of refused DNS Record Types */ Set *refuse_record_types; diff --git a/src/resolve/resolved-static-records.c b/src/resolve/resolved-static-records.c new file mode 100644 index 0000000000000..205159417fa9d --- /dev/null +++ b/src/resolve/resolved-static-records.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-json.h" + +#include "alloc-util.h" +#include "conf-files.h" +#include "constants.h" +#include "dns-answer.h" +#include "dns-domain.h" +#include "dns-question.h" +#include "dns-rr.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "json-util.h" +#include "log.h" +#include "resolved-manager.h" +#include "resolved-static-records.h" +#include "set.h" +#include "stat-util.h" + +/* This implements a mechanism to extend what systemd-resolved resolves locally, via .rr drop-ins in + * {/etc,/run,/usr/lib}/systemd/resolve/static.d/. These files are in JSON format, and are RR serializations, + * that match the usual way we serialize RRs to JSON. + * + * Note that this deliberately doesn't user the (probably more user-friendly) classic DNS zone file format, + * to keep things a bit simpler, and symmetric to the places we currently already generate JSON + * serializations of DNS RRs. Also note the semantics are different from DNS zone file format, for example + * regarding delegation (i.e. the RRs defined here have no effect on subdomains), which is probably nicer for + * one-off mappings of domains to specific resources. Or in other words, this is supposed to be a drop-in + * based alternative to /etc/hosts, not a one to DNS zone files. (The JSON format is also a lot more + * extensible to us, for example we could teach it to map certain lookups to specific DNS errors, or extend + * it so that subdomains always get NXDOMAIN or similar). + * + * (That said, if there's a good reason, we can also support *.zone files too one day). + */ + +/* Recheck static records at most once every 2s */ +#define STATIC_RECORDS_RECHECK_USEC (2*USEC_PER_SEC) + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + answer_by_name_hash_ops, + char, + dns_name_hash_func, + dns_name_compare_func, + DnsAnswer, + dns_answer_unref); + +static int load_static_record_file_item(sd_json_variant *rj, Hashmap **records) { + int r; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + r = dns_resource_record_from_json(rj, &rr); + if (r < 0) + return log_error_errno(r, "Failed to parse DNS record from JSON: %m"); + + _cleanup_(dns_answer_unrefp) DnsAnswer *a = + hashmap_remove(*records, dns_resource_key_name(rr->key)); + + r = dns_answer_add_extend_full(&a, rr, /* ifindex= */ 0, DNS_ANSWER_AUTHENTICATED, /* rrsig= */ NULL, /* until= */ USEC_INFINITY); + if (r < 0) + return log_error_errno(r, "Failed append RR to DNS answer: %m"); + + DnsAnswerItem *item = ASSERT_PTR(ordered_set_first(a->items)); + + r = hashmap_ensure_put(records, &answer_by_name_hash_ops, dns_resource_key_name(item->rr->key), a); + if (r < 0) + return log_error_errno(r, "Failed to add RR to static record set: %m"); + + TAKE_PTR(a); + + log_debug("Added static resource record: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int load_static_record_file(const ConfFile *cf, Hashmap **records, Set **stats) { + int r; + + assert(cf); + assert(records); + + if (set_contains(*stats, &cf->st)) + return 0; + + _cleanup_free_ struct stat *st_copy = memdup(&cf->st, sizeof(cf->st)); + if (!st_copy) + return log_oom(); + + if (set_ensure_consume(stats, &inode_hash_ops, TAKE_PTR(st_copy)) < 0) + return log_oom(); + + _cleanup_fclose_ FILE *f = NULL; + r = xfopenat(cf->fd, /* path= */ NULL, "re", /* open_flags= */ 0, &f); + if (r < 0) { + log_warning_errno(r, "Failed to open '%s', skipping: %m", cf->result); + return 0; + } + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + unsigned line = 0, column = 0; + r = sd_json_parse_file(f, cf->result, /* flags= */ 0, &j, &line, &column); + if (r < 0) { + if (line > 0) + log_syntax(/* unit= */ NULL, LOG_WARNING, cf->result, line, r, "Failed to parse JSON, skipping: %m"); + else + log_warning_errno(r, "Failed to parse JSON file '%s', skipping: %m", cf->result); + return 0; + } + + if (sd_json_variant_is_array(j)) { + sd_json_variant *i; + JSON_VARIANT_ARRAY_FOREACH(i, j) { + r = load_static_record_file_item(i, records); + if (r < 0) + return r; + } + } else if (sd_json_variant_is_object(j)) { + r = load_static_record_file_item(j, records); + if (r < 0) + return r; + } else { + log_warning("JSON file '%s' contains neither array nor object, skipping.", cf->result); + return 0; + } + + return 1; +} + +static int manager_static_records_read(Manager *m) { + int r; + + usec_t ts; + assert_se(sd_event_now(m->event, CLOCK_BOOTTIME, &ts) >= 0); + + /* See if we check the static records db recently already */ + if (m->static_records_last != USEC_INFINITY && usec_add(m->static_records_last, STATIC_RECORDS_RECHECK_USEC) > ts) + return 0; + + m->static_records_last = ts; + + ConfFile **files = NULL; + size_t n_files = 0; + CLEANUP_ARRAY(files, n_files, conf_file_free_many); + + r = conf_files_list_nulstr_full( + ".rr", + /* root= */ NULL, + CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED|CONF_FILES_WARN, + CONF_PATHS_NULSTR("systemd/resolve/static.d/"), + &files, + &n_files); + if (r < 0) + return log_error_errno(r, "Failed to enumerate static record drop-ins: %m"); + + bool reload; + if (set_size(m->static_records_stat) != n_files) + reload = true; + else { + reload = false; + FOREACH_ARRAY(f, files, n_files) + if (!set_contains(m->static_records_stat, &(*f)->st)) { + reload = true; + break; + } + } + + if (!reload) { + log_debug("No static record files changed, not re-reading."); + return 0; + } + + _cleanup_(hashmap_freep) Hashmap *records = NULL; + _cleanup_(set_freep) Set *stats = NULL; + FOREACH_ARRAY(f, files, n_files) + (void) load_static_record_file(*f, &records, &stats); + + hashmap_free(m->static_records); + m->static_records = TAKE_PTR(records); + + set_free(m->static_records_stat); + m->static_records_stat = TAKE_PTR(stats); + + return 0; +} + +int manager_static_records_lookup(Manager *m, DnsQuestion *q, DnsAnswer **answer) { + int r; + + assert(m); + assert(q); + assert(answer); + + if (!m->read_static_records) + return 0; + + (void) manager_static_records_read(m); + + DnsAnswer *f = hashmap_get(m->static_records, dns_question_first_name(q)); + if (!f) + return 0; + + r = dns_answer_extend(answer, f); + if (r < 0) + return r; + + return 1; +} + +void manager_static_records_flush(Manager *m) { + assert(m); + + m->static_records = hashmap_free(m->static_records); + m->static_records_stat = set_free(m->static_records_stat); + m->static_records_last = USEC_INFINITY; +} diff --git a/src/resolve/resolved-static-records.h b/src/resolve/resolved-static-records.h new file mode 100644 index 0000000000000..f50c70ef459a6 --- /dev/null +++ b/src/resolve/resolved-static-records.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "resolved-forward.h" + +void manager_static_records_flush(Manager *m); +int manager_static_records_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer); diff --git a/test/units/TEST-75-RESOLVED.sh b/test/units/TEST-75-RESOLVED.sh index b3656da94043a..0e69beb438547 100755 --- a/test/units/TEST-75-RESOLVED.sh +++ b/test/units/TEST-75-RESOLVED.sh @@ -1487,6 +1487,38 @@ EOF grep -qF "1.2.3.4" "$RUN_OUT" } +testcase_static_record() { + mkdir -p /run/systemd/resolve/static.d/ + cat >/run/systemd/resolve/static.d/statictest.rr </run/systemd/resolve/static.d/statictest2.rr < Date: Wed, 4 Mar 2026 15:07:09 +0100 Subject: [PATCH 12/18] hwdb: add database for basic IMDS properties --- hwdb.d/40-imds.hwdb | 93 ++++++++++++++++++++++++++++++++++++++++++++ hwdb.d/meson.build | 1 + hwdb.d/parse_hwdb.py | 20 +++++++++- 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 hwdb.d/40-imds.hwdb diff --git a/hwdb.d/40-imds.hwdb b/hwdb.d/40-imds.hwdb new file mode 100644 index 0000000000000..e098e20fbb8df --- /dev/null +++ b/hwdb.d/40-imds.hwdb @@ -0,0 +1,93 @@ +# This file is part of systemd + +# This provides various properties that declare if and +# how IMDS is available on the local system, i.e. we are running in a major +# cloud service that provides something resembling AWS' or Azure's Instance +# Metadata Service. +# +# General IMDS endpoint data: +# IMDS_VENDOR= → Indicates IMDS is available, and which vendor it is +# IMDS_TOKEN_URL= → The URL to request an API token from. If not set, no API token is requested. +# IMDS_REFRESH_HEADER_NAME= → The HTTP request header field (everything before the ":") that contains the refresh TTL when requesting a token. +# IMDS_DATA_URL= → The base URL to request actual IMDS data fields from +# IMDS_DATA_URL_SUFFIX= → Parameters to suffix the URLs with +# IMDS_TOKEN_HEADER_NAME= → The HTTP request header field (everything before the ":") used to pass the token +# IMDS_EXTRA_HEADER=, IMDS_EXTRA_HEADER2=, IMDS_EXTRA_HEADER3, … +# → Additional HTTP headers to pass when requesting a data field (full header, including ":") +# IMDS_ADDRESS_IPV4= → IPv4 address of the IMDS server +# IMDS_ADDRESS_IPV6= → IPv6 address of the IMDS server +# +# Well-known IMDS keys: +# IMDS_KEY_HOSTNAME= → IMDS key for the hostname +# IMDS_KEY_REGION= → IMDS key for the region, if that concept applies +# IMDS_KEY_ZONE= → IMDS key for the zone{, if that concept applies +# IMDS_KEY_IPV4_PUBLIC= → IMDS key for the primary public IPv4 address if there is any +# IMDS_KEY_IPV6_PUBLIC= → IMDS key for the primary public IPv6 address if there is any +# IMDS_KEY_SSH_KEY= → IMDS key for an SSH public key to install in the root account +# IMDS_KEY_USERDATA= → IMDS key for arbitrary userdata (if there's only one) +# IMDS_KEY_USERDATA_BASE= → IMDS key for arbitrary userdata (if there are multiple, this is the common prefix) +# IMDS_KEY_USERDATA_BASE64= → IMDS key for arbitrary userdata (if there's only one, but it is base64 encoded) + +# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html +dmi:bvnAmazonEC2:* + IMDS_VENDOR=amazon-ec2 + IMDS_TOKEN_URL=http://169.254.169.254/latest/api/token + IMDS_REFRESH_HEADER_NAME=X-aws-ec2-metadata-token-ttl-seconds + IMDS_DATA_URL=http://169.254.169.254/latest + IMDS_TOKEN_HEADER_NAME=X-aws-ec2-metadata-token + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_ADDRESS_IPV6=fd00:ec2::254 + IMDS_KEY_HOSTNAME=/meta-data/hostname + IMDS_KEY_REGION=/meta-data/placement/region + IMDS_KEY_ZONE=/meta-data/placement/availability-zone + IMDS_KEY_IPV4_PUBLIC=/meta-data/public-ipv4 + IMDS_KEY_IPV6_PUBLIC=/meta-data/ipv6 + IMDS_KEY_SSH_KEY=/meta-data/public-keys/0/openssh-key + IMDS_KEY_USERDATA=/user-data + +# https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service#instance-metadata +dmi:*:cat7783-7084-3265-9085-8269-3286-77:* + IMDS_VENDOR=microsoft-azure + IMDS_DATA_URL=http://169.254.169.254/metadata + IMDS_DATA_URL_SUFFIX=?api-version=2025-04-07&format=text + IMDS_EXTRA_HEADER=Metadata: true + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/instance/compute/osProfile/computerName + IMDS_KEY_REGION=/instance/compute/location + IMDS_KEY_ZONE=/instance/compute/physicalZone + IMDS_KEY_IPV4_PUBLIC=/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress + IMDS_KEY_IPV6_PUBLIC=/instance/network/interface/0/ipv6/ipAddress/0/publicIpAddress + IMDS_KEY_SSH_KEY=/instance/compute/publicKeys/0/keyData + IMDS_KEY_USERDATA_BASE64=/instance/compute/userData + +# https://docs.cloud.google.com/compute/docs/metadata/predefined-metadata-keys +dmi:*:pnGoogleComputeEngine:* + IMDS_VENDOR=google-gcp + IMDS_DATA_URL=http://169.254.169.254/computeMetadata/v1 + IMDS_EXTRA_HEADER=Metadata-Flavor: Google + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/instance/hostname + IMDS_KEY_REGION=/instance/region + IMDS_KEY_ZONE=/instance/zone + IMDS_KEY_IPV4_PUBLIC=/instance/network-interfaces/0/access-configs/0/external-ip + IMDS_KEY_USERDATA_BASE=/instance/attributes + +# https://docs.hetzner.cloud/reference/cloud#description/server-metadata +dmi:bvnHetzner:* + IMDS_VENDOR=hetzner + IMDS_DATA_URL=http://169.254.169.254/hetzner/v1/metadata + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/hostname + IMDS_KEY_REGION=/region + IMDS_KEY_ZONE=/availability-zone + IMDS_KEY_IPV4_PUBLIC=/public-ipv4 + IMDS_KEY_SSH_KEY=/public-keys/0 + IMDS_KEY_USERDATA=/userdata + +# https://www.scaleway.com/en/docs/instances/how-to/use-cloud-init/ +dmi:*:svnScaleway:* + IMDS_VENDOR=scaleway + IMDS_DATA_URL=http://169.254.42.42 + IMDS_ADDRESS_IPV4=169.254.42.42 + IMDS_ADDRESS_IPV6=fd00:42::42 + IMDS_KEY_USERDATA=/user_data diff --git a/hwdb.d/meson.build b/hwdb.d/meson.build index 9ba73b21d6393..3299eaf8a75bf 100644 --- a/hwdb.d/meson.build +++ b/hwdb.d/meson.build @@ -19,6 +19,7 @@ hwdb_files_notest = files( hwdb_files_test = files( '20-dmi-id.hwdb', '20-net-ifname.hwdb', + '40-imds.hwdb', '60-autosuspend.hwdb', '60-autosuspend-fingerprint-reader.hwdb', '60-evdev.hwdb', diff --git a/hwdb.d/parse_hwdb.py b/hwdb.d/parse_hwdb.py index e98510839b73f..e70b0ff04e94e 100755 --- a/hwdb.d/parse_hwdb.py +++ b/hwdb.d/parse_hwdb.py @@ -125,7 +125,7 @@ def hwdb_grammar(): matchline = (matchline_typed | matchline_general) + EOL propertyline = (White(' ', exact=1).suppress() + - Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/')) + Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/?&')) - Optional(pythonStyleComment)) + EOL) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL @@ -215,6 +215,24 @@ def property_grammar(): ('ID_NET_NAME_FROM_DATABASE', name_literal), ('ID_NET_NAME_INCLUDE_DOMAIN', zero_one), ('TPM2_BROKEN_NVPCR', zero_one), + ('IMDS_VENDOR', name_literal), + ('IMDS_TOKEN_URL', name_literal), + ('IMDS_REFRESH_HEADER_NAME', name_literal), + ('IMDS_DATA_URL', name_literal), + ('IMDS_DATA_URL_SUFFIX', name_literal), + ('IMDS_TOKEN_HEADER_NAME', name_literal), + ('IMDS_EXTRA_HEADER', name_literal), + ('IMDS_ADDRESS_IPV4', name_literal), + ('IMDS_ADDRESS_IPV6', name_literal), + ('IMDS_KEY_HOSTNAME', name_literal), + ('IMDS_KEY_REGION', name_literal), + ('IMDS_KEY_ZONE', name_literal), + ('IMDS_KEY_IPV4_PUBLIC', name_literal), + ('IMDS_KEY_IPV6_PUBLIC', name_literal), + ('IMDS_KEY_SSH_KEY', name_literal), + ('IMDS_KEY_USERDATA', name_literal), + ('IMDS_KEY_USERDATA_BASE', name_literal), + ('IMDS_KEY_USERDATA_BASE64', name_literal), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] From 7668b2e048a73c60557493907fae3398b4db1578 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 4 Mar 2026 15:10:37 +0100 Subject: [PATCH 13/18] imds: add new systemd-imdsd.service that makes IMDS data accessible locally --- man/kernel-command-line.xml | 13 + man/rules/meson.build | 6 + man/systemd-imdsd@.service.xml | 226 ++ meson.build | 8 + meson_options.txt | 4 + src/imds/imds-util.c | 50 + src/imds/imds-util.h | 38 + src/imds/imdsd.c | 3024 +++++++++++++++++ src/imds/io.systemd.imds.policy | 30 + src/imds/meson.build | 18 + src/shared/meson.build | 1 + .../varlink-io.systemd.InstanceMetadata.c | 101 + .../varlink-io.systemd.InstanceMetadata.h | 6 + src/test/test-varlink-idl.c | 2 + sysusers.d/meson.build | 3 +- sysusers.d/systemd-imds.conf.in | 8 + units/meson.build | 12 + units/systemd-imds-early-network.service.in | 23 + units/systemd-imdsd.socket | 28 + units/systemd-imdsd@.service.in | 27 + 20 files changed, 3627 insertions(+), 1 deletion(-) create mode 100644 man/systemd-imdsd@.service.xml create mode 100644 src/imds/imds-util.c create mode 100644 src/imds/imds-util.h create mode 100644 src/imds/imdsd.c create mode 100644 src/imds/io.systemd.imds.policy create mode 100644 src/imds/meson.build create mode 100644 src/shared/varlink-io.systemd.InstanceMetadata.c create mode 100644 src/shared/varlink-io.systemd.InstanceMetadata.h create mode 100644 sysusers.d/systemd-imds.conf.in create mode 100644 units/systemd-imds-early-network.service.in create mode 100644 units/systemd-imdsd.socket create mode 100644 units/systemd-imdsd@.service.in diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml index 4da1796a97ca2..98673e0a51674 100644 --- a/man/kernel-command-line.xml +++ b/man/kernel-command-line.xml @@ -793,6 +793,19 @@ + + systemd.imds= + systemd.imds.*= + + Controls various Instance Metadata Service (IMDS) cloud aspects, see + systemd-imdsd@.service8 + and + systemd-imds-generator8 + for details. + + + + diff --git a/man/rules/meson.build b/man/rules/meson.build index 2502b3f1dd5c1..ccf41743c672f 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1025,6 +1025,12 @@ manpages = [ ['systemd-hostnamed.service', '8', ['systemd-hostnamed'], 'ENABLE_HOSTNAMED'], ['systemd-hwdb', '8', [], 'ENABLE_HWDB'], ['systemd-id128', '1', [], ''], + ['systemd-imdsd@.service', + '8', + ['systemd-imdsd', + 'systemd-imdsd-early-network.service', + 'systemd-imdsd.socket'], + 'ENABLE_IMDS'], ['systemd-import-generator', '8', [], ''], ['systemd-importd.service', '8', ['systemd-importd'], 'ENABLE_IMPORTD'], ['systemd-inhibit', '1', [], ''], diff --git a/man/systemd-imdsd@.service.xml b/man/systemd-imdsd@.service.xml new file mode 100644 index 0000000000000..a92b0ef98ac92 --- /dev/null +++ b/man/systemd-imdsd@.service.xml @@ -0,0 +1,226 @@ + + + + + + + + systemd-imdsd@.service + systemd + + + + systemd-imdsd@.service + 8 + + + + systemd-imdsd@.service + systemd-imdsd + systemd-imdsd.socket + systemd-imdsd-early-network.service + Cloud IMDS (Instance Metadata Service) client + + + + systemd-imdsd@.service + systemd-imdsd.socket + systemd-imdsd-early-network.service + /usr/lib/systemd/systemd-imdsd + + + + Description + + systemd-imdsd@.service is a system service that provides local access to IMDS + (Instance Metadata Service; or equivalent) functionality, as provided by many public clouds. + + The + systemd-imds1 tool may + be used to query information from IMDS from shell environments. + + The service provides a Varlink IPC interface via + /run/systemd/io.systemd.InstanceMetadata to query IMDS fields. + + systemd-imdsd-early-network.service is a system service that generates a + systemd-networkd.service8 + compatible + systemd.network5 file + for configuring the early-boot network in order to be able to contact the IMDS endpoint. + + + + Kernel Command Line Options + + The IMDS endpoint is typically determined automatically via + hwdb7 records, but can + also be configured explicitly via the kernel command line, via the following options: + + + + systemd.imds.network= + + Takes one of off, locked, + unlocked. Controls whether and how to set up networking for IMDS endpoint + access. Unless set to off early boot networking is enabled, ensuring that the + IMDS endpoint can be reached. If set to locked (the default, if not configured + otherwise) direct access to the IMDS endpoint by regular unprivileged processes is disabled via a + "prohibit" route, so that any access must be done through + systemd-imdsd@.service or its associated tools. If set to + unlocked this "prohibit" route is not created, and regular unprivileged + processes can directly contact IMDS. + + + + + + + systemd.imds.vendor= + + A short string identifying the cloud vendor. + + Example: systemd.imds.vendor=foobarcloud + + + + + + + systemd.imds.token_url= + + If a bearer token must be acquired to talk to the IMDS service, this is the URL to acquire it + from. + + + + + + + systemd.imds.refresh_header_name= + + Takes a HTTP header field name (excluding the :) that declares the header + field for passing the TTL value (in seconds) to the HTTP server when acquiring a token. Only + applies if systemd.imds.token_url= is set too. + + + + + + + systemd.imds.data_url= + + Takes the base URL to acquire the IMDS data from (the IMDS "endpoint"). All data fields are + acquired from below this URL. This URL should typically not end in /. + + The data URLs are concatenated from this base URL, the IMDS "key" and the suffix configured + via systemd.imds.data_url_suffix= below. Well-known IMDS "keys" can be + configured via the systemd.imds.key=* options below. + + Example: systemd.imds.data_url=http://169.254.169.254/metadata + + + + + + + systemd.imds.data_url_suffix= + + If specified, this field is appended to the end of the data URL (after appending the IMDS + "key" to the data base URL), see above. + + Example: systemd.imds.data_url_suffix=?api-version=2025-04-07&format=text + + + + + + + systemd.imds.token_header_name= + + Takes a HTTP header field name (excluding the :) that declares the header + field to pass the bearer token acquired from the token URL (see above) in. Only applies if + systemd.imds.token_url= is set too. + + + + + + + systemd.imds.extra_header= + + Takes a full HTTP header expression (both field name and value, separated by a colon + :) to pass to the HTTP server when requesting data. May be used multiple times + to set multiple headers. + + Example: systemd.imds.extra_header=Metadata:true + + + + + + + systemd.imds.ipv4_address= + + Configures the IPv4 address the IMDS endpoint is contacted on. This should typically be the + IP address also configured via systemd.imds.data_url= (if IPv4 is used) and is + used to set up IP routing. + + Example: systemd.imds.ipv4_address=169.254.169.254 + + + + + + + systemd.imds.ipv6_address= + + Configures the IPv6 address the IMDS endpoint is contacted on. This should typically be the + IP address also configured via systemd.imds.data_url= (if IPv6 is used) and is + used to set up IP routing. + + + + + + + systemd.imds.key.hostname= + systemd.imds.key.region= + systemd.imds.key.zone= + systemd.imds.key.ipv4_public= + systemd.imds.key.ipv6_public= + systemd.imds.key.ssh_key= + systemd.imds.key.userdata= + systemd.imds.key.userdata_base= + systemd.imds.key.userdata_base64= + + Configures strings to concatenate to the data base URL (see above) to acquire data for + various "well-known" fields. These strings must begin with a /. They should + return the relevant data in plain text. + + A special case are the three "userdata" keys: the option + systemd.imds.key.userdata_base= should be used if the IMDS service knows a + concept of multiple userdata fields, and a field identifier thus still needs to be appended to the + userdata base URL. The option systemd.imds.key.userdata= should be used if only + a single userdata field is supported. The option systemd.imds.key.userdata64= + should be used in the same case, but only if the userdata field is encoded in Base64. + + Example: systemd.imds.key.hostname=/instance/compute/osProfile/computerName + + + + + + + + + See Also + + systemd1 + systemd-imds1 + systemd-imds-generator8 + systemd-networkd.service8 + + + + diff --git a/meson.build b/meson.build index 3672005d75b17..612aff951991d 100644 --- a/meson.build +++ b/meson.build @@ -895,6 +895,7 @@ foreach option : ['adm-gid', 'video-gid', 'wheel-gid', 'systemd-journal-gid', + 'systemd-imds-uid', 'systemd-network-uid', 'systemd-resolve-uid', 'systemd-timesync-uid'] @@ -1528,6 +1529,11 @@ conf.set('DEFAULT_DNSSEC_MODE', 'DNSSEC_' + default_dnssec.underscorify().to_upper()) conf.set_quoted('DEFAULT_DNSSEC_MODE_STR', default_dnssec) +have = get_option('imds').require( + conf.get('HAVE_LIBCURL') == 1, + error_message : 'curl required').allowed() +conf.set10('ENABLE_IMDS', have) + have = get_option('importd').require( conf.get('HAVE_LIBCURL') == 1 and conf.get('HAVE_OPENSSL') == 1 and @@ -2363,6 +2369,7 @@ subdir('src/home') subdir('src/hostname') subdir('src/hwdb') subdir('src/id128') +subdir('src/imds') subdir('src/import') subdir('src/integritysetup') subdir('src/journal') @@ -3134,6 +3141,7 @@ foreach tuple : [ ['homed'], ['hostnamed'], ['hwdb'], + ['imds'], ['importd'], ['initrd'], ['kernel-install'], diff --git a/meson_options.txt b/meson_options.txt index c1af7ce237492..7835f716662d9 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -142,6 +142,8 @@ option('timedated', type : 'boolean', description : 'install the systemd-timedated daemon') option('timesyncd', type : 'boolean', description : 'install the systemd-timesyncd daemon') +option('imds', type : 'feature', + description : 'install the systemd-imds stack') option('journal-storage-default', type : 'combo', choices : ['persistent', 'auto', 'volatile', 'none'], description : 'default storage mode for journald (main namespace)') option('remote', type : 'feature', deprecated : { 'true' : 'enabled', 'false' : 'disabled' }, @@ -334,6 +336,8 @@ option('systemd-resolve-uid', type : 'integer', value : 0, description : 'soft-static allocation for the systemd-resolve user') option('systemd-timesync-uid', type : 'integer', value : 0, description : 'soft-static allocation for the systemd-timesync user') +option('systemd-imds-uid', type : 'integer', value : 0, + description : 'soft-static allocation for the systemd-imds user') option('dev-kvm-mode', type : 'string', value : '0666', description : '/dev/kvm access mode') diff --git a/src/imds/imds-util.c b/src/imds/imds-util.c new file mode 100644 index 0000000000000..3c67417e4ba5f --- /dev/null +++ b/src/imds/imds-util.c @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "imds-util.h" +#include "string-table.h" +#include "string-util.h" +#include "utf8.h" + +bool imds_key_is_valid(const char *key) { + /* Just some pretty superficial validation. */ + + if (!key) + return false; + + if (!startswith(key, "/")) + return false; + + if (!ascii_is_valid(key)) + return false; + + if (string_has_cc(key, /* ok= */ NULL)) + return false; + + return true; +} + +static const char* const imds_well_known_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_BASE] = "base", + [IMDS_HOSTNAME] = "hostname", + [IMDS_REGION] = "region", + [IMDS_ZONE] = "zone", + [IMDS_IPV4_PUBLIC] = "ipv4-public", + [IMDS_IPV6_PUBLIC] = "ipv6-public", + [IMDS_SSH_KEY] = "ssh-key", + [IMDS_USERDATA] = "userdata", + [IMDS_USERDATA_BASE] = "userdata-base", + [IMDS_USERDATA_BASE64] = "userdata-base64", +}; + +DEFINE_STRING_TABLE_LOOKUP(imds_well_known, ImdsWellKnown); + + +static const char* const imds_network_mode_table[_IMDS_NETWORK_MODE_MAX] = { + [IMDS_NETWORK_OFF] = "off", + [IMDS_NETWORK_LOCKED] = "locked", + [IMDS_NETWORK_UNLOCKED] = "unlocked", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(imds_network_mode, ImdsNetworkMode, IMDS_NETWORK_LOCKED); diff --git a/src/imds/imds-util.h b/src/imds/imds-util.h new file mode 100644 index 0000000000000..55ab79510f44e --- /dev/null +++ b/src/imds/imds-util.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" +#include "string-table.h" /* IWYU pragma: keep */ + +typedef enum ImdsNetworkMode { + IMDS_NETWORK_OFF, /* No automatic pre-IMDS network configuration, something else has to do this. (Also: no "prohibit" route) */ + IMDS_NETWORK_LOCKED, /* "Prohibit" route for the IMDS server, unless you have SO_MARK set to 0x7FFF0815 */ + IMDS_NETWORK_UNLOCKED, /* No "prohibit" route for the IMDS server */ + _IMDS_NETWORK_MODE_MAX, + _IMDS_NETWORK_MODE_INVALID = -EINVAL, +} ImdsNetworkMode; + +/* Various well-known keys */ +typedef enum ImdsWellKnown { + IMDS_BASE, /* The same as "/", typically suffixed */ + IMDS_HOSTNAME, + IMDS_REGION, + IMDS_ZONE, + IMDS_IPV4_PUBLIC, + IMDS_IPV6_PUBLIC, + IMDS_SSH_KEY, + IMDS_USERDATA, + IMDS_USERDATA_BASE, /* typically suffixed */ + IMDS_USERDATA_BASE64, + _IMDS_WELL_KNOWN_MAX, + _IMDS_WELL_KNOWN_INVALID = -EINVAL, +} ImdsWellKnown; + +static inline bool imds_well_known_can_suffix(ImdsWellKnown wk) { + return IN_SET(wk, IMDS_BASE, IMDS_USERDATA_BASE); +} + +bool imds_key_is_valid(const char *key); + +DECLARE_STRING_TABLE_LOOKUP(imds_well_known, ImdsWellKnown); +DECLARE_STRING_TABLE_LOOKUP(imds_network_mode, ImdsNetworkMode); diff --git a/src/imds/imdsd.c b/src/imds/imdsd.c new file mode 100644 index 0000000000000..3caf8d9a00263 --- /dev/null +++ b/src/imds/imdsd.c @@ -0,0 +1,3024 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "sd-bus.h" +#include "sd-device.h" +#include "sd-event.h" +#include "sd-json.h" +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "build-path.h" +#include "build.h" +#include "bus-polkit.h" +#include "chase.h" +#include "copy.h" +#include "device-private.h" +#include "dns-rr.h" +#include "errno-util.h" +#include "escape.h" +#include "event-util.h" +#include "fd-util.h" +#include "format-ifname.h" +#include "hash-funcs.h" +#include "hashmap.h" +#include "imds-util.h" +#include "in-addr-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "json-util.h" +#include "log.h" +#include "main-func.h" +#include "netlink-util.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "path-util.h" +#include "pretty-print.h" +#include "proc-cmdline.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "utf8.h" +#include "varlink-io.systemd.InstanceMetadata.h" +#include "varlink-util.h" +#include "web-util.h" +#include "xattr-util.h" + +#include "../import/curl-util.h" + +/* This implements a client to the AWS' and Azure's "Instance Metadata Service", as well as GCP's "VM + * Metadata", i.e.: + * + * https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html + * https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service + * https://docs.cloud.google.com/compute/docs/metadata/overview + * https://docs.hetzner.cloud/reference/cloud#description/server-metadata + * + * Some notes: + * - IMDS service are heavily rate limited, and hence we want to centralize requests in one place and cache + * - In order to isolate IMDS access this expects that traffic to the IMDS address 169.254.169.254 is + * generally prohibited (via a prohibit route), but our service uses fwmark 0x7FFF0815, which (via source + * routing) can bypass this route. + * - To be robust to situations with multiple interfaces, if we have no hint which interface we shall use, + * we'll fork our own binary off, once for each interface, and communicate to it via Varlink. + * - This is supposed to run under its own UID, but with CAP_NET_ADMIN held (since we want to use + * SO_BINDTODEVICE + SO_MARK) + * - This daemon either be invoked manually from the command line, to do a single request, mostly for + * debugging purposes. Or it can be invoked as a Varlink service, which is the primary intended mode of + * operation. + */ + +#define TOKEN_SIZE_MAX (4096U) +#define DATA_SIZE_MAX (4*1024*1024U) +#define FWMARK_DEFAULT UINT32_C(0x7FFF0815) +#define REFRESH_USEC_DEFAULT (15U * USEC_PER_MINUTE) +#define REFRESH_USEC_MIN (1U * USEC_PER_SEC) +#define DIRECT_OVERALL_TIMEOUT_SEC (40U * USEC_PER_SEC) /* a bit shorter than the default D-Bus/Varlink method call time-out) */ +#define INDIRECT_OVERALL_TIMEOUT_SEC (DIRECT_OVERALL_TIMEOUT_SEC + 5U * USEC_PER_SEC) +#define RETRY_MIN_USEC (20U * USEC_PER_MSEC) +#define RETRY_MAX_USEC (3U * USEC_PER_SEC) +#define RETRY_MAX 10U + +/* Which endpoint configuration source has been used, in order of preference */ +typedef enum EndpointSource { + ENDPOINT_USER, /* Explicit command line options */ + ENDPOINT_ENVIRONMENT, /* Fallback environment variables */ + ENDPOINT_PROC_CMDLINE, /* Acquired via kernel command line */ + ENDPOINT_UDEV, /* Acquired via udev SMBIOS object */ + _ENDPOINT_SOURCE_MAX, + _ENDPOINT_SOURCE_INVALID = -EINVAL, +} EndpointSource; + +static char *arg_ifname = NULL; +static usec_t arg_refresh_usec = REFRESH_USEC_DEFAULT; +static uint32_t arg_fwmark = FWMARK_DEFAULT; +static bool arg_fwmark_set = true; +static ImdsWellKnown arg_well_known = _IMDS_WELL_KNOWN_INVALID; +static char* arg_key = NULL; +static bool arg_cache = true; +static bool arg_wait = false; +static bool arg_varlink = false; +static ImdsNetworkMode arg_network_mode = _IMDS_NETWORK_MODE_INVALID; +static bool arg_setup_network = false; + +/* The follow configure the IMDS service endpoint details */ +static EndpointSource arg_endpoint_source = _ENDPOINT_SOURCE_INVALID; +static char *arg_vendor = NULL; +static char *arg_token_url = NULL; +static char *arg_refresh_header_name = NULL; +static char *arg_data_url = NULL; +static char *arg_data_url_suffix = NULL; +static char *arg_token_header_name = NULL; +static char **arg_extra_header = NULL; +static struct in_addr arg_address_ipv4 = {}; +static struct in6_addr arg_address_ipv6 = {}; +static char *arg_well_known_key[_IMDS_WELL_KNOWN_MAX] = {}; + +static void imds_well_known_key_free(typeof(arg_well_known_key) *array) { + FOREACH_ARRAY(i, *array, _IMDS_WELL_KNOWN_MAX) + free(*i); +} + +STATIC_DESTRUCTOR_REGISTER(arg_ifname, freep); +STATIC_DESTRUCTOR_REGISTER(arg_key, freep); +STATIC_DESTRUCTOR_REGISTER(arg_vendor, freep); +STATIC_DESTRUCTOR_REGISTER(arg_token_url, freep); +STATIC_DESTRUCTOR_REGISTER(arg_refresh_header_name, freep); +STATIC_DESTRUCTOR_REGISTER(arg_data_url, freep); +STATIC_DESTRUCTOR_REGISTER(arg_data_url_suffix, freep); +STATIC_DESTRUCTOR_REGISTER(arg_token_header_name, freep); +STATIC_DESTRUCTOR_REGISTER(arg_extra_header, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_well_known_key, imds_well_known_key_free); + +typedef struct Context Context; + +typedef struct ChildData { + /* If there are multiple network interfaces, and we are not sure where to look for things, we'll fork + * additional instances of ourselves, one for each interface. */ + Context *context; + int ifindex; + sd_varlink *link; /* outing varlink connection towards the child */ + bool retry; /* If true then new information came to light and we should restart the request */ +} ChildData; + +struct Context { + /* Fields shared between requests (these remain allocated between Varlink requests) */ + sd_event *event; + sd_netlink *rtnl; + bool rtnl_attached; + sd_bus *system_bus; /* for polkit */ + CurlGlue *glue; + struct iovec token; /* token in binary */ + char *token_string; /* token as string, once complete and validated */ + int cache_dir_fd; + Hashmap *polkit_registry; + + /* Request-specific fields (these get reset whenever we start processing a new Varlink call) */ + int ifindex; + usec_t timestamp; /* CLOCK_BOOTTIME */ + int cache_fd; + char *cache_filename, *cache_temporary_filename; + uint64_t data_size; + usec_t refresh_usec; + char *key; + ImdsWellKnown well_known; + bool write_stdout; + struct iovec write_iovec; + bool cache; + bool wait; + sd_varlink *current_link; /* incoming varlink connection we are processing */ + uint32_t fwmark; + bool fwmark_set; + sd_event_source *overall_timeout_source; + + /* Mode 1 "direct": we go directly to the network (this is done if we know the interface index to + * use) */ + CURL *curl_token; + CURL *curl_data; + struct curl_slist *request_header_token, *request_header_data; + sd_event_source *retry_source; + unsigned n_retry; + usec_t retry_interval_usec; + + /* Mode 2 "indirect": we fork off a number of children which go to the network on behalf of us, + * because we have multiple network interfaces to deal with. */ + Hashmap *child_data; + sd_netlink_slot *address_change_slot; +}; + +#define CONTEXT_NULL \ + (Context) { \ + .cache_dir_fd = -EBADF, \ + .cache_fd = -EBADF, \ + .well_known = _IMDS_WELL_KNOWN_INVALID, \ + } + +/* Log helpers that cap at debug logging if we are are operating on behalf of a Varlink client */ +#define context_log_errno(c, level, r, fmt, ...) \ + log_full_errno((c)->current_link ? LOG_DEBUG : (level), r, fmt, ##__VA_ARGS__) +#define context_log(c, level, fmt, ...) \ + log_full((c)->current_link ? LOG_DEBUG : (level), fmt, ##__VA_ARGS__) +#define context_log_oom(c) \ + (c)->current_link ? log_oom_debug() : log_oom() + +static int context_acquire_data(Context *c); +static int context_acquire_token(Context *c); +static int context_spawn_child(Context *c, int ifindex, sd_varlink **ret); + +static ChildData* child_data_free(ChildData *cd) { + if (!cd) + return NULL; + + if (cd->context) + hashmap_remove(cd->context->child_data, INT_TO_PTR(cd->ifindex)); + + sd_varlink_close_unref(cd->link); + return mfree(cd); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(ChildData*, child_data_free); + +static void context_reset_token(Context *c) { + assert(c); + + iovec_done(&c->token); + c->token_string = mfree(c->token_string); +} + +static void context_flush_token(Context *c) { + + if (c->cache_dir_fd >= 0) + (void) unlinkat(c->cache_dir_fd, "token", /* flags= */ 0); + + context_reset_token(c); +} + +static void context_reset_for_refresh(Context *c) { + assert(c); + + /* Flush out all fields, up to the point we can restart the current request */ + + if (c->curl_token) { + curl_glue_remove_and_free(c->glue, c->curl_token); + c->curl_token = NULL; + } + + if (c->curl_data) { + curl_glue_remove_and_free(c->glue, c->curl_data); + c->curl_data = NULL; + } + + curl_slist_free_all(c->request_header_token); + c->request_header_token = NULL; + curl_slist_free_all(c->request_header_data); + c->request_header_data = NULL; + + c->cache_fd = safe_close(c->cache_fd); + c->cache_filename = mfree(c->cache_filename); + + if (c->cache_temporary_filename && c->cache_dir_fd >= 0) + (void) unlinkat(c->cache_dir_fd, c->cache_temporary_filename, /* flags= */ 0); + + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + iovec_done(&c->write_iovec); + + c->child_data = hashmap_free(c->child_data); + c->data_size = 0; + + sd_event_source_set_enabled(c->retry_source, SD_EVENT_OFF); +} + +static void context_reset_full(Context *c) { + assert(c); + + /* Flush out all fields relevant to the current request, comprehensively */ + + context_reset_for_refresh(c); + c->key = mfree(c->key); + c->well_known = _IMDS_WELL_KNOWN_INVALID; + c->current_link = sd_varlink_unref(c->current_link); + c->address_change_slot = sd_netlink_slot_unref(c->address_change_slot); + c->retry_source = sd_event_source_unref(c->retry_source); + c->overall_timeout_source = sd_event_source_unref(c->overall_timeout_source); + c->cache_dir_fd = safe_close(c->cache_dir_fd); +} + +static void context_new_request(Context *c) { + assert(c); + + /* Flush everything out from the previous request */ + context_reset_full(c); + + /* Reinitialize settings from defaults. */ + c->ifindex = 0; + c->timestamp = now(CLOCK_BOOTTIME); + c->refresh_usec = arg_refresh_usec; + c->cache = arg_cache; + c->wait = arg_wait; + c->fwmark = arg_fwmark; + c->fwmark_set = arg_fwmark_set; + c->n_retry = 0; +} + +static void context_done(Context *c) { + assert(c); + + /* Flush out everything specific to the current request first */ + context_reset_full(c); + context_reset_token(c); + + /* And then also flush out everything shared between requests */ + c->glue = curl_glue_unref(c->glue); + c->rtnl = sd_netlink_unref(c->rtnl); + c->event = sd_event_unref(c->event); + c->polkit_registry = hashmap_free(c->polkit_registry); + c->system_bus = sd_bus_flush_close_unref(c->system_bus); +} + +static void context_fail_full(Context *c, int r, const char *varlink_error) { + assert(c); + assert(r != 0); + + /* Called whenever the current retrieval fails asynchronously */ + + r = -abs(r); + + if (varlink_error) + context_log_errno(c, LOG_ERR, r, "Operation failed (%s).", varlink_error); + else + context_log_errno(c, LOG_ERR, r, "Operation failed (%m)."); + + /* If we are running in Varlink mode, return the error on the connection */ + if (c->current_link) { + if (varlink_error) + (void) sd_varlink_error(c->current_link, varlink_error, NULL); + else + (void) sd_varlink_error_errno(c->current_link, r); + } else + /* Otherwise terminate the whole process. */ + sd_event_exit(c->event, r); + + context_reset_full(c); +} + +static void context_fail(Context *c, int r) { + context_fail_full(c, r, /* varlink_error= */ NULL); +} + +static void context_success(Context *c) { + int r; + + assert(c); + + /* Called whenever the current retrieval succeeds asynchronously */ + + context_log(c, LOG_DEBUG, "Operation succeeded."); + + if (c->current_link) { + r = sd_varlink_replybo( + c->current_link, + JSON_BUILD_PAIR_IOVEC_BASE64("data", &c->write_iovec), + SD_JSON_BUILD_PAIR_CONDITION(c->ifindex > 0, "interface", SD_JSON_BUILD_INTEGER(c->ifindex))); + if (r < 0) + context_log_errno(c, LOG_WARNING, r, "Failed to reply to Varlink call, ignoring: %m"); + } else + sd_event_exit(c->event, 0); + + context_reset_full(c); +} + +static int setsockopt_callback(void *userdata, curl_socket_t curlfd, curlsocktype purpose) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(curlfd >= 0); + + if (purpose != CURLSOCKTYPE_IPCXN) + return CURL_SOCKOPT_OK; + + r = socket_set_unicast_if(curlfd, AF_UNSPEC, c->ifindex); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to bind HTTP socket to interface: %m")); + return CURL_SOCKOPT_ERROR; + } + + if (c->fwmark_set && + setsockopt(curlfd, SOL_SOCKET, SO_MARK, &c->fwmark, sizeof(c->fwmark)) < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, errno, "Failed to set firewall mark on HTTP socket: %m")); + return CURL_SOCKOPT_ERROR; + } + + return CURL_SOCKOPT_OK; +} + +static int context_combine_key(Context *c, char **ret) { + assert(ret); + + /* Combines the well known key with the explicitly configured key */ + + char *s; + if (c->well_known < 0 || c->well_known == IMDS_BASE) { + if (!c->key) + return -ENODATA; + + s = strdup(c->key); + } else { + const char *wk = arg_well_known_key[c->well_known]; + if (!wk) + return -ENODATA; + if (c->key) + s = strjoin(wk, c->key); + else + s = strdup(wk); + } + if (!s) + return -ENOMEM; + + *ret = TAKE_PTR(s); + return 0; +} + +static const char *context_get_runtime_directory(Context *c) { + assert(c); + + /* Returns the discovered runtime directory, but only if caching is enabled. */ + + if (!c->cache) { + context_log(c, LOG_DEBUG, "Cache disabled."); + return NULL; + } + + const char *e = secure_getenv("RUNTIME_DIRECTORY"); + if (!e) { + context_log(c, LOG_DEBUG, "Not using cache as $RUNTIME_DIRECTORY is not set."); + return NULL; + } + + return e; +} + +static int context_save_ifname(Context *c) { + int r; + + assert(c); + + /* Saves the used interface name for later retrievals, so that we don't have to wildcard search on + * all interfaces anymore. */ + + if (c->ifindex <= 0) + return 0; + + const char *d = context_get_runtime_directory(c); + if (!d) + return 0; + + _cleanup_close_ int dirfd = open(d, O_PATH|O_CLOEXEC); + if (dirfd < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to open runtime directory: %m"); + + _cleanup_free_ char *ifname = NULL; + r = rtnl_get_ifname_full(&c->rtnl, c->ifindex, &ifname, /* ret_altnames= */ NULL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to resolve interface index %i: %m", c->ifindex); + + r = write_string_file_at(dirfd, "ifname", ifname, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write 'ifname' file: %m"); + + return 1; +} + +typedef enum CacheResult { + CACHE_RESULT_DISABLED, /* caching is disabled */ + CACHE_RESULT_HIT, /* found a positive entry */ + CACHE_RESULT_MISS, /* did not find an entry */ + CACHE_RESULT_KEY_NOT_FOUND, /* found a negative entry */ + CACHE_RESULT_NOT_CACHEABLE, /* not suitable for caching */ + _CACHE_RESULT_MAX, + _CACHE_RESULT_INVALID = -EINVAL, + _CACHE_RESULT_ERRNO_MAX = -ERRNO_MAX, +} CacheResult; + +static CacheResult context_process_cache(Context *c) { + int r; + + assert(c); + + assert(c->key || c->well_known >= 0); + assert(c->cache_fd < 0); + assert(c->cache_dir_fd < 0); + assert(!c->cache_filename); + assert(!c->cache_temporary_filename); + + /* Checks the local cache – if we have one – for the current request */ + + const char *e = context_get_runtime_directory(c); + if (!e) + return CACHE_RESULT_DISABLED; + + char ifname[IF_NAMESIZE]; + r = format_ifname(c->ifindex, ifname); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to format interface name: %m"); + + if (!filename_is_valid(ifname)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Network interface name '%s' is not a valid filename, refusing.", ifname); + + _cleanup_free_ char *cache_dir = path_join("cache", ifname); + if (!cache_dir) + return context_log_oom(c); + + r = chase(cache_dir, + e, + CHASE_PROHIBIT_SYMLINKS|CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY|CHASE_PREFIX_ROOT, + /* ret_path= */ NULL, + &c->cache_dir_fd); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to open cache directory: %m"); + + _cleanup_free_ char *k = NULL; + r = context_combine_key(c, &k); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine IMDS key: %m"); + + _cleanup_free_ char *escaped = xescape(k, "/."); + if (!escaped) + return context_log_oom(c); + + _cleanup_free_ char *fn = strjoin("key-", escaped); + if (!fn) + return context_log_oom(c); + + if (!filename_is_valid(fn)) { + context_log(c, LOG_WARNING, "Cache filename for '%s' is not valid, not caching.", fn); + return CACHE_RESULT_NOT_CACHEABLE; + } + + c->cache_filename = TAKE_PTR(fn); + + _cleanup_close_ int fd = openat(c->cache_dir_fd, c->cache_filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return context_log_errno(c, LOG_ERR, errno, "Failed to open cache file '%s': %m", c->cache_filename); + } else { + _cleanup_free_ char *d = NULL; + size_t l; + + context_log(c, LOG_DEBUG, "Found cached file '%s'.", c->cache_filename); + + r = fgetxattr_malloc(fd, "user.imds.timestamp", &d, &l); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read timestamp from cache file: %m"); + if (l != sizeof(usec_t)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EBADMSG), "Invalid timestamp xattr on cache file '%s': %m", c->cache_filename); + + usec_t *u = (usec_t*) d; + if (usec_add(*u, c->refresh_usec) > c->timestamp) { + _cleanup_free_ char *result = NULL; + r = fgetxattr_malloc(fd, "user.imds.result", &result, /* ret_size= */ NULL); + if (r == -ENODATA) { + /* No user.imds.result xattr means: hit! */ + if (c->write_stdout) { + r = copy_bytes(fd, STDOUT_FILENO, /* max_bytes= */ UINT64_MAX, /* copy_flags= */ 0); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write cached data to standard output: %m"); + } else { + assert(!iovec_is_set(&c->write_iovec)); + r = read_full_file_at(fd, /* filename= */ NULL, (char**) &c->write_iovec.iov_base, &c->write_iovec.iov_len); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read cache data: %m"); + } + + return CACHE_RESULT_HIT; + } + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read 'user.imds.result' extended attribute: %m"); + + if (streq(result, "key-not-found")) + return CACHE_RESULT_KEY_NOT_FOUND; + + context_log(c, LOG_WARNING, "Unexpected 'user.imds.result' extended attribute value, ignoring: %s", result); + (void) unlinkat(c->cache_dir_fd, c->cache_filename, /* flags= */ 0); + } else { + context_log(c, LOG_DEBUG, "Cached data is older than '%s', ignoring.", FORMAT_TIMESPAN(c->refresh_usec, 0)); + (void) unlinkat(c->cache_dir_fd, c->cache_filename, /* flags= */ 0); + } + } + + /* So the above was not conclusive, let's then at least try to reuse the token */ + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + r = sd_json_parse_file_at(/* f= */ NULL, c->cache_dir_fd, "token", /* flags= */ 0, &j, /* reterr_line= */ NULL, /* reterr_column= */ NULL); + if (r == -ENOENT) { + context_log_errno(c, LOG_DEBUG, r, "No cached token"); + return CACHE_RESULT_MISS; + } + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read cached token: %m"); + + struct { + const char *token; + uint64_t until; + } d = {}; + + static const sd_json_dispatch_field table[] = { + { "token", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, token), SD_JSON_MANDATORY }, + { "validUntilUSec", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(d, until), SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(j, table, SD_JSON_ALLOW_EXTENSIONS, &d); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to decode cached token data: %m"); + + if (d.until > c->timestamp) { + c->token_string = strdup(d.token); + if (!c->token_string) + return context_log_oom(c); + + context_log(c, LOG_INFO, "Reusing cached token."); + } else + context_log(c, LOG_DEBUG, "Cached token is stale, not using."); + + return CACHE_RESULT_MISS; +} + +static int on_retry(sd_event_source *s, uint64_t usec, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(s); + + /* Invoked whenever the retry timer event elapses and we need to retry again */ + + context_log(c, LOG_DEBUG, "Retrying..."); + + /* Maybe some other instance was successful in the menatime and already found something? */ + CacheResult cr = context_process_cache(c); + if (cr < 0) { + context_fail(c, cr); + return 0; + } + if (cr == CACHE_RESULT_HIT) { + context_success(c); + return 0; + } + if (cr == CACHE_RESULT_KEY_NOT_FOUND) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found")); + return 0; + } + + r = context_acquire_token(c); + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_acquire_data(c); + if (r < 0) + context_fail(c, r); + + return 0; +} + +static int context_schedule_retry(Context *c) { + int r; + + assert(c); + + /* Schedules a new retry via a timer event */ + + if (c->n_retry >= RETRY_MAX) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EUCLEAN), "Retry limits reached, refusing."); + + if (c->n_retry == 0) + c->retry_interval_usec = RETRY_MIN_USEC; + else if (c->retry_interval_usec < RETRY_MAX_USEC / 2) + c->retry_interval_usec *= 2; + else + c->retry_interval_usec = RETRY_MAX_USEC; + + c->n_retry++; + context_log(c, LOG_DEBUG, "Retry attempt #%u in %s...", c->n_retry, FORMAT_TIMESPAN(c->retry_interval_usec, USEC_PER_MSEC)); + + context_reset_for_refresh(c); + + r = event_reset_time_relative( + c->event, + &c->retry_source, + CLOCK_BOOTTIME, + c->retry_interval_usec, + /* accuracy= */ 0, + on_retry, + c, + /* priority= */ 0, + "imds-retry", + /* force_reset= */ true); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to reset retry timer event source: %m"); + + return 0; +} + +static int context_acquire_http_status(Context *c, CURL *curl, long *ret_status) { + assert(c); + assert(ret_status); + + /* Acquires the HTTP status code, and does some generic validation that applies to both the token and + * the data transfer. + * + * Error handling as per: + * https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-returns + * https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service#rate-limiting + */ + + long status; + CURLcode code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status); + if (code != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", curl_easy_strerror(code)); + + context_log(c, LOG_DEBUG, "Got HTTP error code %li.", status); + + if (status == 403) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EADDRNOTAVAIL), "IMDS is not available"); + + /* Automatically retry on some transient errors from HTTP */ + if (IN_SET(status, + 503, /* AWS + GCP */ + 429 /* Azure + GCP */)) + return context_schedule_retry(c); + + if (status < 200 || status > 600) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request finished with unexpected code %li.", status); + + *ret_status = status; + return 0; +} + +static int context_validate_token_http_status(Context *c, long status) { + assert(c); + + /* Specific HTTP status checks for the token transfer */ + + if (status >= 300) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request for token finished with unexpected code %li.", status); + + return 0; +} + +static int context_validate_data_http_status(Context *c, long status) { + int r; + + assert(c); + + /* Specific HTTP status checks for the data transfer */ + + if (status == 401 && arg_token_url) { + /* We need a new a new token */ + context_log(c, LOG_DEBUG, "Server requested a new token..."); + + /* Count token requests as a retry */ + if (c->n_retry >= RETRY_MAX) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EUCLEAN), "Retry limits reached, refusing."); + c->n_retry++; + + context_flush_token(c); + context_reset_for_refresh(c); + + r = context_acquire_token(c); + if (r < 0) + return r; + + return context_acquire_data(c); + } + + if (status == 404) { + _cleanup_free_ char *key = NULL; + r = context_combine_key(c, &key); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine IMDS key: %m"); + + /* Do negative caching for not found */ + if (c->cache_fd >= 0) { + if (fsetxattr(c->cache_fd, "user.imds.result", "key-not-found", STRLEN("key-not-found"), /* flags= */ 0) < 0) + context_log_errno(c, LOG_DEBUG, errno, "Failed to set result xattr on '%s', ignoring: %m", c->cache_filename); + else { + r = link_tmpfile_at(c->cache_fd, c->cache_dir_fd, c->cache_temporary_filename, c->cache_filename, LINK_TMPFILE_REPLACE); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to move cache file into place: %m"); + + c->cache_fd = safe_close(c->cache_fd); + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + context_log(c, LOG_DEBUG, "Cached negative entry for '%s'.", key); + } + } + + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Key '%s' not found.", key); + } + + if (status >= 300) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request for data finished with unexpected code %li.", status); + + return 0; +} + +static int context_validate_token(Context *c) { + int r; + + assert(c); + + /* Validates that the downloaded token data actually forms a valid string */ + + _cleanup_free_ char *t = NULL; + r = make_cstring( + c->token.iov_base, + c->token.iov_len, + MAKE_CSTRING_REFUSE_TRAILING_NUL, + &t); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to convert token into C string: %m"); + + if (string_has_cc(t, NULL) || + !utf8_is_valid(t)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Token not valid UTF-8 or contains control characters, refusing."); + + free_and_replace(c->token_string, t); + return 0; +} + +static int context_save_token(Context *c) { + int r; + + assert(c); + assert(c->token_string); + + /* Save the acquired token in the cache, so that we can reuse it later */ + + if (c->cache_dir_fd < 0) + return 0; + + /* Only store half the valid time, to make sure we have ample time to use it */ + usec_t until = usec_add(c->timestamp, c->refresh_usec/2); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + r = sd_json_buildo( + &j, + SD_JSON_BUILD_PAIR_STRING("token", c->token_string), + SD_JSON_BUILD_PAIR_UNSIGNED("validUntilUSec", until)); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to build token JSON: %m"); + + _cleanup_free_ char *t = NULL; + r = sd_json_variant_format(j, SD_JSON_FORMAT_NEWLINE, &t); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to format JSON: %m"); + + r = write_string_file_at(c->cache_dir_fd, "token", t, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MODE_0600); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write token cache file: %m"); + + return 0; +} + +static int context_save_data(Context *c) { + int r; + + assert(c); + + /* Finalize saving of the acquired data in the cache */ + + if (c->cache_fd < 0) + return 0; + + r = link_tmpfile_at(c->cache_fd, c->cache_dir_fd, c->cache_temporary_filename, c->cache_filename, LINK_TMPFILE_REPLACE); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to move cache file into place: %m"); + + c->cache_fd = safe_close(c->cache_fd); + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + context_log(c, LOG_DEBUG, "Cached data."); + return 0; +} + +static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { + int r; + + assert(g); + + /* Called whenever libcurl did its thing and reports a download being complete or having failed */ + + Context *c = NULL; + if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char**) &c) != CURLE_OK) + return; + + switch (result) { + + case CURLE_OK: /* yay! */ + break; + + case CURLE_WRITE_ERROR: + /* CURLE_WRITE_ERROR we'll see if the data callbacks failed already. But if they did, they + * already cancelled the whole operation, and we don't have to do anything here */ + return; + + case CURLE_COULDNT_CONNECT: + case CURLE_OPERATION_TIMEDOUT: + case CURLE_GOT_NOTHING: + case CURLE_SEND_ERROR: + case CURLE_RECV_ERROR: + context_log(c, LOG_INFO, "Connection error from curl: %s", curl_easy_strerror(result)); + + /* Automatically retry on some transient errors from curl itself */ + r = context_schedule_retry(c); + if (r < 0) + return context_fail(c, r); + + return; + + default: + return context_fail_full( + c, + context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EHOSTDOWN), "Transfer failed: %s", curl_easy_strerror(result)), + "io.systemd.InstanceMetadata.CommunicationFailure"); + } + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + long status; + r = context_acquire_http_status(c, curl, &status); + if (r == -EADDRNOTAVAIL) + return context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + if (r < 0) + return context_fail(c, r); + + if (curl == c->curl_token) { + r = context_validate_token_http_status(c, status); + if (r < 0) + return context_fail(c, r); + + r = context_validate_token(c); + if (r < 0) + return context_fail(c, r); + + context_log(c, LOG_DEBUG, "Token successfully acquired: %s", c->token_string); + + r = context_save_token(c); + if (r < 0) + return context_fail(c, r); + + r = context_acquire_data(c); + if (r < 0) + return context_fail(c, r); + + } else if (curl == c->curl_data) { + + r = context_validate_data_http_status(c, status); + if (r == -ENOENT) + return context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + if (r < 0) + return context_fail(c, r); + + context_log(c, LOG_DEBUG, "Data download successful."); + + r = context_save_data(c); + if (r < 0) + return context_fail(c, r); + + context_success(c); + } else + assert_not_reached(); +} + +static int context_acquire_glue(Context *c) { + int r; + + assert(c); + + /* Allocates a curl object if we don't have one yet */ + + if (c->glue) + return 0; + + r = curl_glue_new(&c->glue, c->event); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to allocate curl glue: %m"); + + c->glue->on_finished = curl_glue_on_finished; + c->glue->userdata = c; + + return 0; +} + +static size_t data_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) { + Context *c = ASSERT_PTR(userdata); + size_t sz = size * nmemb; + int r; + + /* Called whenever we receive new payload from the server */ + assert(contents); + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + /* Before we use acquired data, let's verify the HTTP status */ + long status; + r = context_acquire_http_status(c, c->curl_data, &status); + if (r == -EADDRNOTAVAIL) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_validate_data_http_status(c, status); + if (r == -ENOENT) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + if (sz > UINT64_MAX - c->data_size || + c->data_size + sz > DATA_SIZE_MAX) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(E2BIG), "Data too large, refusing.")); + return 0; + } + + c->data_size += sz; + + if (c->write_stdout) + fwrite(contents, sz, nmemb, stdout); + else if (!iovec_append(&c->write_iovec, &IOVEC_MAKE(contents, sz))) { + context_fail(c, context_log_oom(c)); + return 0; + } + + if (c->cache_fd >= 0) { + r = loop_write(c->cache_fd, contents, sz); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to write data to cache: %m")); + return 0; + } + } + + return sz; +} + +static int context_acquire_data(Context *c) { + int r; + + assert(c); + assert(c->key || c->well_known >= 0); + + /* Called to initiate getting the actual IMDS key payload */ + + if (arg_token_url && !c->token_string) + return 0; /* If we need a token first, let's not do anything */ + + _cleanup_free_ char *k = NULL; + r = context_combine_key(c, &k); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine key: %m"); + + context_log(c, LOG_INFO, "Requesting data for key '%s'.", k); + + if (c->cache_dir_fd >= 0 && + c->cache_filename && + c->cache_fd < 0) { + c->cache_fd = open_tmpfile_linkable_at(c->cache_dir_fd, c->cache_filename, O_WRONLY|O_CLOEXEC, &c->cache_temporary_filename); + if (c->cache_fd < 0) + return context_log_errno(c, LOG_ERR, c->cache_fd, "Failed to create cache file '%s': %m", c->cache_filename); + + if (fchmod(c->cache_fd, 0600) < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to adjust cache node access mode: %m"); + + if (fsetxattr(c->cache_fd, "user.imds.timestamp", &c->timestamp, sizeof(c->timestamp), /* flags= */ 0) < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to set timestamp xattr on '%s': %m", c->cache_filename); + } + + r = context_acquire_glue(c); + if (r < 0) + return r; + + _cleanup_free_ char *url = strjoin(arg_data_url, k, arg_data_url_suffix); + if (!url) + return context_log_oom(c); + + r = curl_glue_make(&c->curl_data, url, c); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for data: %m"); + + if (c->token_string) { + _cleanup_free_ char *token_header = strjoin(arg_token_header_name, ": ", c->token_string); + if (!token_header) + return context_log_oom(c); + + struct curl_slist *n = curl_slist_append(c->request_header_data, token_header); + if (!n) + return context_log_oom(c); + + c->request_header_data = n; + } + + STRV_FOREACH(i, arg_extra_header) { + struct curl_slist *n = curl_slist_append(c->request_header_data, *i); + if (!n) + return context_log_oom(c); + + c->request_header_data = n; + } + + if (c->request_header_data) + if (curl_easy_setopt(c->curl_data, CURLOPT_HTTPHEADER, c->request_header_data) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_WRITEFUNCTION, data_write_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_WRITEDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion userdata."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORT, 1L) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port"); + + if (curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORTRANGE, 1023L) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port range"); + + r = curl_glue_add(c->glue, c->curl_data); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + + return 0; +} + +static size_t token_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) { + Context *c = ASSERT_PTR(userdata); + size_t sz = size * nmemb; + int r; + + /* Called whenever we get data from the token download */ + assert(contents); + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + /* Before we use acquired data, let's verify the HTTP status */ + long status; + r = context_acquire_http_status(c, c->curl_token, &status); + if (r == -EADDRNOTAVAIL) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_validate_token_http_status(c, status); + if (r < 0) { + context_fail(c, r); + return 0; + } + + if (sz > SIZE_MAX - c->token.iov_len || + c->token.iov_len + sz > TOKEN_SIZE_MAX) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(E2BIG), "IMDS token too large.")); + return 0; + } + + if (!iovec_append(&c->token, &IOVEC_MAKE(contents, sz))) { + context_fail(c, context_log_oom(c)); + return 0; + } + + return sz; +} + +static int context_acquire_token(Context *c) { + int r; + + assert(c); + + /* Called to initiate getting the token if we need one. */ + + if (c->token_string || !arg_token_url) + return 0; + + context_log(c, LOG_INFO, "Requesting token."); + + r = context_acquire_glue(c); + if (r < 0) + return r; + + r = curl_glue_make(&c->curl_token, arg_token_url, c); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for API token: %m"); + + if (arg_refresh_header_name) { + _cleanup_free_ char *ttl_header = NULL; + if (asprintf(&ttl_header, + "%s: %" PRIu64, + arg_refresh_header_name, + DIV_ROUND_UP(c->refresh_usec, USEC_PER_SEC)) < 0) + return context_log_oom(c); + + c->request_header_token = curl_slist_new(ttl_header, NULL); + if (!c->request_header_token) + return context_log_oom(c); + } + + if (curl_easy_setopt(c->curl_token, CURLOPT_HTTPHEADER, c->request_header_token) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_CUSTOMREQUEST, "PUT") != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request method."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_WRITEFUNCTION, token_write_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_WRITEDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion userdata."); + + r = curl_glue_add(c->glue, c->curl_token); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + + return 0; +} + +static int vl_on_reply(sd_varlink *link, sd_json_variant *m, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { + ChildData *cd = ASSERT_PTR(userdata); + Context *c = ASSERT_PTR(cd->context); + int r; + + assert(link); + assert(m); + + /* When we spawned off worker instances of ourselves (one for each local network interface), then + * we'll get a response from them via a Varlink reply. Handle it. */ + + if (error_id) { + r = sd_varlink_error_to_errno(error_id, m); + if (r == -EBADR) + context_log_errno(c, LOG_WARNING, r, "Varlink error from interface %i: %s", cd->ifindex, error_id); + else + context_log_errno(c, LOG_WARNING, r, "Varlink error from interface %i: %m", cd->ifindex); + + /* Propagate these errors immediately */ + if (streq(error_id, "io.systemd.InstanceMetadata.KeyNotFound")) { + context_fail_full(c, -ENOENT, error_id); + return 0; + } + if (streq(error_id, "io.systemd.InstanceMetadata.WellKnownKeyUnset")) { + context_fail_full(c, -ENODATA, error_id); + return 0; + } + if (streq(error_id, "io.systemd.InstanceMetadata.NotAvailable")) { + context_fail_full(c, -EADDRNOTAVAIL, error_id); + return 0; + } + + /* The other errors we consider transient. Let's see if we shall immediately restart the request. */ + if (cd->retry) { + context_log(c, LOG_DEBUG, "Child for network interface %i was scheduled for immediate retry, executing now.", cd->ifindex); + cd->link = sd_varlink_close_unref(cd->link); + cd->retry = false; + + r = context_spawn_child(c, cd->ifindex, &cd->link); + if (r < 0) { + context_fail(c, r); + return 0; + } + + sd_varlink_set_userdata(cd->link, cd); + return 0; + } + + /* We shall no retry immediately. In that case, we give up on the child, and propagate the + * error if it was the last child, otherwise we continue until the last one dies too. */ + cd = child_data_free(cd); + + if (hashmap_isempty(c->child_data) && !c->wait) { + /* This is the last child, propagate the error */ + context_log(c, LOG_DEBUG, "Last child failed, propagating error."); + + if (streq(error_id, "io.systemd.InstanceMetadata.CommunicationFailure")) + context_fail_full(c, -EHOSTDOWN, error_id); + else if (streq(error_id, "io.systemd.InstanceMetadata.Timeout")) + context_fail_full(c, -ETIMEDOUT, error_id); + else + context_fail_full(c, r, error_id); + + return 0; + } + + context_log(c, LOG_DEBUG, "Pending children remaining, continuing to wait."); + return 0; + } + + assert(!iovec_is_set(&c->write_iovec)); + + static const sd_json_dispatch_field table[] = { + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(Context, write_iovec), SD_JSON_MANDATORY }, + { "ifindex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(Context, ifindex), 0 }, + {} + }; + + r = sd_json_dispatch(m, table, SD_JSON_ALLOW_EXTENSIONS, c); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to decode reply data: %m")); + return 0; + } + + if (c->write_stdout) { + r = loop_write(STDOUT_FILENO, c->write_iovec.iov_base, c->write_iovec.iov_len); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to output data: %m")); + return 0; + } + } + + context_success(c); + return 0; +} + +static int context_load_ifname(Context *c) { + int r; + + assert(c); + + /* Tries to load the previously used interface name, so that we don't have to wildcard search on all + * interfaces. */ + + const char *e = context_get_runtime_directory(c); + if (!e) + return 0; + + _cleanup_close_ int dirfd = open(e, O_PATH|O_CLOEXEC); + if (dirfd < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to open runtime directory: %m"); + + _cleanup_free_ char *ifname = NULL; + r = read_one_line_file_at(dirfd, "ifname", &ifname); + if (r == -ENOENT) + return 0; + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to load 'ifname' file from runtime directory: %m"); + + if (!ifname_valid(ifname)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Loaded interface name not valid, refusing: %s", ifname); + + c->ifindex = rtnl_resolve_interface(&c->rtnl, ifname); + if (c->ifindex < 0) { + (void) unlinkat(dirfd, "ifname", /* flags= */ 0); + context_log_errno(c, LOG_ERR, c->ifindex, "Failed to resolve saved interface name '%s', assuming interface disappeared, ignoring: %m", ifname); + return c->ifindex; + } + + log_debug("Using previously pinned interface '%s' (ifindex: %i).", ifname, c->ifindex); + return 1; +} + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + child_data_hash_ops, + void, + trivial_hash_func, + trivial_compare_func, + ChildData, + child_data_free); + +static int context_spawn_child(Context *c, int ifindex, sd_varlink **ret) { + int r; + + assert(c); + assert(ifindex > 0); + assert(ret); + + /* If we don't know yet on which network interface the IMDS server can be found, let's spawn separate + * instances of ourselves, one for earch interface, and collect the results. We communicate with + * each one via Varlink, the same way as clients talk to us. */ + + context_log(c, LOG_DEBUG, "Spawning child for interface '%i'.", ifindex); + + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = pin_callout_binary(LIBEXECDIR "/systemd-imdsd", &p); + if (fd < 0) + return context_log_errno(c, LOG_ERR, fd, "Failed to find imdsd binary: %m"); + + _cleanup_strv_free_ char **argv = strv_new( + p, + "--vendor", strempty(arg_vendor), + "--token-url", strempty(arg_token_url), + "--refresh-header-name", strempty(arg_refresh_header_name), + "--data-url", strempty(arg_data_url), + "--data-url-suffix", strempty(arg_data_url_suffix), + "--token-header-name", strempty(arg_token_header_name), + "--address-ipv4", in4_addr_is_null(&arg_address_ipv4) ? "" : IN4_ADDR_TO_STRING(&arg_address_ipv4), + "--address-ipv6", in6_addr_is_null(&arg_address_ipv6) ? "" : IN6_ADDR_TO_STRING(&arg_address_ipv6)); + if (!argv) + return log_oom(); + + STRV_FOREACH(i, arg_extra_header) + if (strv_extend_strv(&argv, STRV_MAKE("--extra-header", *i), /* filter_duplicates= */ false) < 0) + return log_oom(); + + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) { + if (!arg_well_known_key[wk]) + continue; + + if (strv_extendf(&argv, "--well-known-key=%s:%s", imds_well_known_to_string(wk), arg_well_known_key[wk]) < 0) + return log_oom(); + } + + if (DEBUG_LOGGING) { + _cleanup_free_ char *cmdline = quote_command_line(argv, SHELL_ESCAPE_EMPTY); + log_debug("About to fork off: %s", strnull(cmdline)); + } + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_exec(&vl, p, argv); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to fork off imdsd binary for interface %i: %m", ifindex); + + r = sd_varlink_attach_event( + vl, + c->event, + SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to attach Varlink connection to event loop: %m"); + + r = sd_varlink_bind_reply(vl, vl_on_reply); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to bind reply callback: %m"); + + r = sd_varlink_invokebo( + vl, + "io.systemd.InstanceMetadata.Get", + JSON_BUILD_PAIR_STRING_NON_EMPTY("key", c->key), + SD_JSON_BUILD_PAIR_CONDITION(c->well_known >= 0, "wellKnown", JSON_BUILD_STRING_UNDERSCORIFY(imds_well_known_to_string(c->well_known))), + SD_JSON_BUILD_PAIR_INTEGER("interface", ifindex), + SD_JSON_BUILD_PAIR_INTEGER("refreshUSec", c->refresh_usec), + SD_JSON_BUILD_PAIR_BOOLEAN("cache", c->cache), + SD_JSON_BUILD_PAIR_CONDITION(c->fwmark_set, "firewallMark", SD_JSON_BUILD_UNSIGNED(c->fwmark)), + SD_JSON_BUILD_PAIR_CONDITION(!c->fwmark_set, "firewallMark", SD_JSON_BUILD_NULL)); /* explicitly turn of fwmark, if not set */ + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to issue Get() command to Varlink child: %m"); + + *ret = TAKE_PTR(vl); + return 0; +} + +static int context_spawn_new_child(Context *c, int ifindex) { + int r; + + assert(c); + + /* Spawn a child, and keep track of it */ + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = context_spawn_child(c, ifindex, &vl); + if (r < 0) + return r; + + _cleanup_(child_data_freep) ChildData *cd = new(ChildData, 1); + if (!cd) + return context_log_oom(c); + + *cd = (ChildData) { + .ifindex = ifindex, + .link = sd_varlink_ref(vl), + }; + + sd_varlink_set_userdata(vl, cd); + + if (hashmap_ensure_put(&c->child_data, &child_data_hash_ops, INT_TO_PTR(ifindex), cd) < 0) + return context_log_oom(c); + + cd->context = c; + TAKE_PTR(cd); + + return 0; +} + +static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int ifindex, r; + + assert(rtnl); + assert(m); + + /* Called whenever an address appears on the network stack. We use that as hint that it is worth to + * invoke a child processing that interface (either for the first time, or again) */ + + r = sd_rtnl_message_addr_get_ifindex(m, &ifindex); + if (r < 0) { + context_log_errno(c, LOG_WARNING, r, "rtnl: could not get ifindex from message, ignoring: %m"); + return 0; + } + if (ifindex <= 0) { + context_log(c, LOG_WARNING, "rtnl: received address message with invalid ifindex %d, ignoring.", ifindex); + return 0; + } + + if (ifindex == LOOPBACK_IFINDEX) { + context_log(c, LOG_DEBUG, "Ignoring loopback device."); + return 0; + } + + if (!c->key && c->well_known < 0) + return 0; + + ChildData *existing = hashmap_get(c->child_data, INT_TO_PTR(ifindex)); + if (existing) { + /* We already have an attempt ongoing for this one? Remember there's a reason now to retry + * this, because new connectivity appeared. */ + context_log(c, LOG_DEBUG, "Child for network interface %i already spawned off, scheduling for immediate retry.", ifindex); + existing->retry = true; + return 0; + } + + return context_spawn_new_child(c, ifindex); +} + +static int context_acquire_rtnl_with_match(Context *c) { + int r; + + assert(c); + assert(c->event); + + /* Acquire a netlink connection and a match if we don't have one yet */ + + if (!c->rtnl) { + r = sd_netlink_open(&c->rtnl); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to connect to netlink: %m"); + } + + if (!c->rtnl_attached) { + /* The netlink connection might have created previously via rtnl_resolve_interface() – which + * however didn't attach it to our event loop. Do so now. */ + r = sd_netlink_attach_event(c->rtnl, c->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to attach netlink socket to event loop: %m"); + + c->rtnl_attached = true; + } + + if (!c->address_change_slot) { + r = sd_netlink_add_match(c->rtnl, &c->address_change_slot, RTM_NEWADDR, on_address_change, /* destroy_callback= */ NULL, c, "newaddr"); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to subscribe to RTM_NEWADDR events: %m"); + } + + return 0; +} + +static int context_spawn_children(Context *c) { + int r; + + assert(c); + assert(c->key || c->well_known >= 0); + + /* If we we don't know yet on which interface to query, let's see which interfaces there are and + * spawwn ourselves, once on each */ + + r = context_acquire_rtnl_with_match(c); + if (r < 0) + return r; + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; + r = sd_rtnl_message_new_addr(c->rtnl, &req, RTM_GETADDR, /* ifindex= */ 0, AF_INET); + if (r < 0) + return r; + + r = sd_netlink_message_set_request_dump(req, true); + if (r < 0) + return r; + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *reply = NULL; + r = sd_netlink_call(c->rtnl, req, 0, &reply); + if (r < 0) + return r; + + for (sd_netlink_message *i = reply; i; i = sd_netlink_message_next(i)) { + r = on_address_change(c->rtnl, i, c); + if (r < 0) + return r; + } + + return 0; +} + +static int imds_configured(int level) { + /* Checks if we have enough endpoint information to operate */ + + if (arg_endpoint_source < 0) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "No IMDS endpoint information provided or detected, cannot operate."); + + if (!arg_data_url) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "No data base URL provided."); + + if (!!arg_token_url != !!arg_token_header_name) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "Incomplete token parameters configured for endpoint."); + + return 0; +} + +static int setup_network(void) { + int r; + + /* Generates a .network file based on the IMDS endpoint information we have */ + + if (arg_network_mode == IMDS_NETWORK_OFF) { + log_debug("IMDS networking turned off, not generating .network file."); + return 0; + } + + _cleanup_close_ int network_dir_fd = -EBADF; + r = chase("/run/systemd/network", + /* root= */ NULL, + CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY, + /* ret_path= */ NULL, + &network_dir_fd); + if (r < 0) + return log_error_errno(r, "Failed to open .network directory: %m"); + + _cleanup_free_ char *t = NULL; + _cleanup_fclose_ FILE *f = NULL; + r = fopen_tmpfile_linkable_at(network_dir_fd, "85-imds-early.network", O_WRONLY|O_CLOEXEC, &t, &f); + if (r < 0) + return log_error_errno(r, "Failed to create 85-imds-early.network file: %m"); + + CLEANUP_TMPFILE_AT(network_dir_fd, t); + + fputs("# Generated by systemd-imdsd, do not edit.\n" + "#\n" + "# This configures Ethernet devices on cloud hosts that support IMDS, given that\n" + "# before doing IMDS we need to activate the network.\n", f); + + if (arg_network_mode != IMDS_NETWORK_UNLOCKED && + (in4_addr_is_set(&arg_address_ipv4) || in6_addr_is_set(&arg_address_ipv6))) + fputs("#\n" + "# Note: this will create a 'prohibit' route to the IMDS endpoint,\n" + "# blocking direct access to IMDS. Direct IMDS access is then only\n" + "# available to traffic marked with fwmark 0x7FFF0815, which can be\n" + "# set via SO_MARK and various other methods, which require\n" + "# privileges.\n", + f); + + fputs("\n" + "[Match]\n" + "Type=ether\n" + "Kind=!*\n" + "\n" + "[Network]\n" + "DHCP=yes\n" + "LinkLocalAddressing=ipv6\n" + "\n" + "[DHCP]\n" + "UseTimezone=yes\n" + "UseHostname=yes\n" + "UseMTU=yes\n", f); + + if (in4_addr_is_set(&arg_address_ipv4)) + fputs("\n" + "[Link]\n" + "RequiredFamilyForOnline=ipv4\n", f); + else if (in6_addr_is_set(&arg_address_ipv6)) + fputs("\n" + "[Link]\n" + "RequiredFamilyForOnline=ipv6\n", f); + + if (arg_network_mode != IMDS_NETWORK_UNLOCKED) { + if (in4_addr_is_set(&arg_address_ipv4)) + fprintf(f, + "\n" + "# Prohibit regular access to IMDS (IPv4)\n" + "[Route]\n" + "Destination=%s\n" + "Type=prohibit\n", + IN4_ADDR_TO_STRING(&arg_address_ipv4)); + + if (in6_addr_is_set(&arg_address_ipv6)) + fprintf(f, + "\n" + "# Prohibit regular access to IMDS (IPv6)\n" + "[Route]\n" + "Destination=%s\n" + "Type=prohibit\n", + IN6_ADDR_TO_STRING(&arg_address_ipv6)); + } + + if (in4_addr_is_set(&arg_address_ipv4)) + fprintf(f, + "\n" + "# Always allow IMDS access via a special routing table (IPv4)\n" + "[Route]\n" + "Destination=%s\n" + "Scope=link\n" + "Table=0x7FFF0815\n" + "\n" + "# Sockets marked with firewall mark 0x7FFF0815 get access to the IMDS route by\n" + "# using the 0x7FFF0815 table populated above.\n" + "[RoutingPolicyRule]\n" + "Family=ipv4\n" + "FirewallMark=0x7FFF0815\n" + "Table=0x7FFF0815\n", + IN4_ADDR_TO_STRING(&arg_address_ipv4)); + + if (in6_addr_is_set(&arg_address_ipv6)) + fprintf(f, + "\n" + "# Always allow IMDS access via a special routing table (IPv6)\n" + "[Route]\n" + "Destination=%s\n" + "Table=0x7FFF0815\n" + "\n" + "# Sockets marked with firewall mark 0x7FFF0815 get access to the IMDS route by\n" + "# using the 0x7FFF0815 table populated above.\n" + "[RoutingPolicyRule]\n" + "Family=ipv6\n" + "FirewallMark=0x7FFF0815\n" + "Table=0x7FFF0815\n", + IN6_ADDR_TO_STRING(&arg_address_ipv6)); + + if (fchmod(fileno(f), 0644) < 0) + return log_error_errno(errno, "Failed set access mode for 85-imds-early.network: %m"); + + r = flink_tmpfile_at(f, network_dir_fd, t, "85-imds-early.network", LINK_TMPFILE_REPLACE); + if (r < 0) + return log_error_errno(r, "Failed to move 85-imds-early.network into place: %m"); + + log_info("Created 85-imds-early.network."); + return 0; +} + +static int add_address_to_json_array(sd_json_variant **array, int family, const union in_addr_union *addr) { + int r; + + assert(array); + assert(IN_SET(family, AF_INET, AF_INET6)); + assert(addr); + + /* Appends the specified IP address, turned into A/AAAA RRs to the specified JSON array */ + + if (in_addr_is_null(family, addr)) + return 0; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + if (dns_resource_record_new_address(&rr, family, addr, "_imds") < 0) + return log_oom(); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *rrj = NULL; + r = dns_resource_record_to_json(rr, &rrj); + if (r < 0) + return log_error_errno(r, "Failed to convert A RR to JSON: %m"); + + r = sd_json_variant_append_array(array, rrj); + if (r < 0) + return log_error_errno(r, "Failed to append A RR to JSON array: %m"); + + log_debug("Writing IMDS RR for: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int setup_address_rrs(void) { + int r; + + /* Creates local RRs (honoured by systemd-resolved) for the IMDS endpoint addresses. */ + + if (arg_network_mode == IMDS_NETWORK_OFF) { + log_debug("IMDS networking turned off, not generating .rr file."); + return 0; + } + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL; + + union in_addr_union u = { .in = arg_address_ipv4 }; + r = add_address_to_json_array(&aj, AF_INET, &u); + if (r < 0) + return r; + + u = (union in_addr_union) { .in6 = arg_address_ipv6 }; + r = add_address_to_json_array(&aj, AF_INET6, &u); + if (r < 0) + return r; + + if (sd_json_variant_elements(aj) == 0) { + log_debug("No IMDS endpoint addresses known, not writing out RRs."); + return 0; + } + + _cleanup_free_ char *text = NULL; + r = sd_json_variant_format(aj, SD_JSON_FORMAT_NEWLINE, &text); + if (r < 0) + return log_error_errno(r, "Failed to format JSON text: %m"); + + r = write_string_file("/run/systemd/resolve/static.d/imds-endpoint.rr", text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to write IMDS RR data: %m"); + + log_info("Created imds-endpoint.rr."); + return 0; +} + +static int on_overall_timeout(sd_event_source *s, uint64_t usec, void *userdata) { + Context *c = ASSERT_PTR(userdata); + + assert(s); + + /* Invoked whenever the overall time-out event elapses, and we just give up */ + + context_fail_full(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ETIMEDOUT), "Overall timeout reached."), "io.systemd.InstanceMetadata.Timeout"); + return 0; +} + +static int context_start_overall_timeout(Context *c, usec_t usec) { + int r; + + assert(c); + + r = event_reset_time_relative( + c->event, + &c->overall_timeout_source, + CLOCK_BOOTTIME, + usec, + /* accuracy= */ 0, + on_overall_timeout, + c, + /* priority= */ 0, + "imds-overall-timeout", + /* force_reset= */ true); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to reset retry timer event source: %m"); + + return 0; +} + +static int cmdline_run(void) { + int r; + + /* Process the request when invoked via the command line (i.e. not via Varlink) */ + + r = imds_configured(LOG_ERR); + if (r < 0) + return r; + + if (arg_setup_network) { + r = setup_network(); + return RET_GATHER(r, setup_address_rrs()); + } + + assert(arg_key || arg_well_known >= 0); + + _cleanup_(context_done) Context c = CONTEXT_NULL; + c.write_stdout = true; + context_new_request(&c); + + c.well_known = arg_well_known; + if (arg_key) { + c.key = strdup(arg_key); + if (!c.key) + return context_log_oom(&c); + } + + if (arg_ifname) { + c.ifindex = rtnl_resolve_interface_or_warn(&c.rtnl, arg_ifname); + if (c.ifindex < 0) + return c.ifindex; + } else { + /* Try to load the previously cached interface */ + r = context_load_ifname(&c); + if (r < 0) + return r; + } + + r = sd_event_default(&c.event); + if (r < 0) + return context_log_errno(&c, LOG_ERR, r, "Failed to allocate event loop: %m"); + + if (c.ifindex > 0) { + CacheResult cr = context_process_cache(&c); + if (cr < 0) + return cr; + if (cr == CACHE_RESULT_HIT) + return 0; + if (cr == CACHE_RESULT_KEY_NOT_FOUND) + return context_log_errno(&c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found"); + + r = context_acquire_token(&c); + if (r < 0) + return r; + + r = context_acquire_data(&c); + if (r < 0) + return r; + + r = context_start_overall_timeout(&c, DIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + return r; + } else { + /* Couldn't find anything, let's spawn off parallel clients for all interfaces */ + r = context_spawn_children(&c); + if (r < 0) + return r; + + r = context_start_overall_timeout(&c, INDIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + return r; + } + + r = sd_event_loop(c.event); + if (r < 0) + return r; + + return 0; +} + +static int context_acquire_system_bus(Context *c) { + int r; + + assert(c); + + /* Connect to the bus if we haven't yet */ + + if (c->system_bus) + return 0; + + r = sd_bus_default_system(&c->system_bus); + if (r < 0) + return r; + + r = sd_bus_attach_event(c->system_bus, c->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return r; + + return 0; +} + +static JSON_DISPATCH_ENUM_DEFINE(dispatch_well_known, ImdsWellKnown, imds_well_known_from_string); + +static int dispatch_fwmark(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + /* Parses a firewall mark passed via Varlink/JSON. Note that any 32bit fwmark is valid, hence we keep + * track if it is set or not in a separate boolean. */ + + if (sd_json_variant_is_null(variant)) { + c->fwmark_set = false; + return 0; + } + + r = sd_json_dispatch_uint32(name, variant, flags, &c->fwmark); + if (r < 0) + return r; + + c->fwmark_set = true; + return 0; +} + +static int vl_method_get(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(link); + + if (!c->event) + c->event = sd_event_ref(sd_varlink_get_event(link)); + + context_new_request(c); + + static const sd_json_dispatch_field dispatch_table[] = { + { "wellKnown", SD_JSON_VARIANT_STRING, dispatch_well_known, offsetof(Context, well_known), 0 }, + { "key", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(Context, key), 0 }, + { "interface", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(Context, ifindex), 0 }, + { "refreshUSec", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(Context, refresh_usec), 0 }, + { "firewallMark", _SD_JSON_VARIANT_TYPE_INVALID, dispatch_fwmark, 0, 0 }, + { "cache", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(Context, cache), 0 }, + { "wait", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(Context, wait), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, c); + if (r != 0) + return r; + + if (c->key) { + if (!imds_key_is_valid(c->key)) + return sd_varlink_error_invalid_parameter_name(link, "key"); + + if (c->well_known < 0) + c->well_known = IMDS_BASE; + else if (!imds_well_known_can_suffix(c->well_known)) + return sd_varlink_error_invalid_parameter_name(link, "key"); + } else if (c->well_known < 0) + return sd_varlink_error_invalid_parameter_name(link, "key"); + + if (c->refresh_usec < REFRESH_USEC_MIN) + c->refresh_usec = REFRESH_USEC_MIN; + + uid_t peer_uid; + r = sd_varlink_get_peer_uid(link, &peer_uid); + if (r < 0) + return r; + + if (peer_uid != 0 && peer_uid != getuid()) { + /* Ask polkit if client is not privileged */ + + r = context_acquire_system_bus(c); + if (r < 0) + return r; + + const char* l[5]; + size_t k = 0; + if (c->well_known >= 0) { + l[k++] = "wellKnown"; + l[k++] = imds_well_known_to_string(c->well_known); + } + if (c->key) { + l[k++] = "key"; + l[k++] = c->key; + } + l[k] = NULL; + + r = varlink_verify_polkit_async( + link, + c->system_bus, + "io.freedesktop.imds.get", + l, + &c->polkit_registry); + if (r <= 0) + return r; + } + + if (imds_configured(LOG_DEBUG) < 0) + return sd_varlink_error(link, "io.systemd.InstanceMetadata.NotSupported", NULL); + + /* Up to this point we only validated/parsed stuff. Now we actually execute stuff, hence from now on + * we need to go through context_fail() when failing (context_success() if we succeed early), to + * release resources we might have allocated. */ + assert(!c->current_link); + c->current_link = sd_varlink_ref(link); + + _cleanup_free_ char *k = NULL; /* initialize here, to avoid that this remains uninitialized due to the gotos below */ + + if (c->ifindex <= 0) { + /* Try to load the previously used network interface */ + r = context_load_ifname(c); + if (r < 0) + goto fail; + } + + r = context_combine_key(c, &k); + if (r == -ENODATA) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.WellKnownKeyUnset"); + return r; + } + if (r < 0) + goto fail; + + context_log(c, LOG_DEBUG, "Will request '%s' now.", k); + + if (c->ifindex > 0) { + CacheResult cr = context_process_cache(c); + if (cr < 0) { + r = cr; + goto fail; + } + if (cr == CACHE_RESULT_HIT) { + context_success(c); + return 0; + } + if (cr == CACHE_RESULT_KEY_NOT_FOUND) { + r = context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found"); + context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + return r; + } + + r = context_acquire_token(c); + if (r < 0) + goto fail; + + r = context_acquire_data(c); + if (r < 0) + goto fail; + + r = context_start_overall_timeout(c, DIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + goto fail; + } else { + r = context_spawn_children(c); + if (r < 0) + goto fail; + + r = context_start_overall_timeout(c, INDIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + goto fail; + } + + context_log(c, LOG_DEBUG, "Incoming method call is now pending"); + return 1; + +fail: + context_fail(c, r); + return r; +} + +static int vl_method_get_vendor_info(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(link); + + r = sd_varlink_dispatch(link, parameters, /* dispatch_table= */ NULL, c); + if (r != 0) + return r; + + /* NB! We allow access to this call without Polkit */ + + if (imds_configured(LOG_DEBUG) < 0) + return sd_varlink_error(link, "io.systemd.InstanceMetadata.NotSupported", NULL); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *wkj = NULL; + for (ImdsWellKnown i = 0; i < _IMDS_WELL_KNOWN_MAX; i++) { + if (!arg_well_known_key[i]) + continue; + + r = sd_json_variant_set_field_string(&wkj, imds_well_known_to_string(i), arg_well_known_key[i]); + if (r < 0) + return r; + } + + return sd_varlink_replybo( + link, + JSON_BUILD_PAIR_STRING_NON_EMPTY("vendor", arg_vendor), + JSON_BUILD_PAIR_STRING_NON_EMPTY("tokenUrl", arg_token_url), + JSON_BUILD_PAIR_STRING_NON_EMPTY("refreshHeaderName", arg_refresh_header_name), + JSON_BUILD_PAIR_STRING_NON_EMPTY("dataUrl", arg_data_url), + JSON_BUILD_PAIR_STRING_NON_EMPTY("dataUrlSuffix", arg_data_url_suffix), + JSON_BUILD_PAIR_STRING_NON_EMPTY("tokenHeaderName", arg_token_header_name), + JSON_BUILD_PAIR_STRV_NON_EMPTY("extraHeader", arg_extra_header), + JSON_BUILD_PAIR_IN4_ADDR_NON_NULL("addressIPv4", &arg_address_ipv4), + JSON_BUILD_PAIR_IN6_ADDR_NON_NULL("addressIPv6", &arg_address_ipv6), + JSON_BUILD_PAIR_VARIANT_NON_EMPTY("wellKnown", wkj)); +} + +static int vl_server(void) { + _cleanup_(context_done) Context c = CONTEXT_NULL; + int r; + + /* Invocation as Varlink service */ + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *varlink_server = NULL; + r = varlink_server_new( + &varlink_server, + SD_VARLINK_SERVER_INHERIT_USERDATA, + &c); + if (r < 0) + return log_error_errno(r, "Failed to allocate Varlink server: %m"); + + r = sd_varlink_server_add_interface(varlink_server, &vl_interface_io_systemd_InstanceMetadata); + if (r < 0) + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + varlink_server, + "io.systemd.InstanceMetadata.Get", vl_method_get, + "io.systemd.InstanceMetadata.GetVendorInfo", vl_method_get_vendor_info); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); + + r = sd_varlink_server_loop_auto(varlink_server); + if (r < 0) + return log_error_errno(r, "Failed to run Varlink event loop: %m"); + + return 0; +} + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-imdsd", "1", &link); + if (r < 0) + return log_oom(); + + printf("%1$s [OPTIONS...] [KEY]\n" + "\n%5$sLow-level IMDS data acquisition.%6$s\n" + "\n%3$sOptions:%4$s\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -i --interface=INTERFACE\n" + " Use the specified interface\n" + " --refresh=SEC Set token refresh time\n" + " --fwmark=INTEGER Choose firewall mark for HTTP traffic\n" + " --cache=no Disable cache use\n" + " -w --wait=yes Wait for connectivity\n" + " -K --well-known= Select well-known key\n" + " --setup-network Generate .network and .rr files\n" + "\n%3$sManual Endpoint Configuration:%4$s\n" + " --vendor=VENDOR Specify IMDS vendor literally\n" + " --token-url=URL URL for acquiring token\n" + " --refresh-header-name=NAME\n" + " Header name for passing refresh time\n" + " --data-url=URL Base URL for acquiring data\n" + " --data-url-suffix=STRING\n" + " Suffix to append to data URL\n" + " --token-header-name=NAME\n" + " Header name for passing token string\n" + " --extra-header='NAME: VALUE'\n" + " Additional header to pass to data transfer\n" + " --address-ipv4=ADDRESS\n" + " --address-ipv6=ADDRESS\n" + " Configure the IPv4 and IPv6 address of the IMDS server\n" + " --well-known-key=NAME:KEY\n" + " Configure the location of well-known keys\n" + "\nSee the %2$s for details.\n", + program_invocation_short_name, + link, + ansi_underline(), + ansi_normal(), + ansi_highlight(), + ansi_normal()); + + return 0; +} + +static bool http_header_name_valid(const char *a) { + return a && ascii_is_valid(a) && !string_has_cc(a, /* ok= */ NULL) && !strchr(a, ':'); +} + +static bool http_header_valid(const char *a) { + return a && ascii_is_valid(a) && !string_has_cc(a, /* ok= */ NULL) && strchr(a, ':'); +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_REFRESH, + ARG_FWMARK, + ARG_CACHE, + ARG_WAIT, + ARG_VENDOR, + ARG_TOKEN_URL, + ARG_REFRESH_HEADER_NAME, + ARG_DATA_URL, + ARG_DATA_URL_SUFFIX, + ARG_TOKEN_HEADER_NAME, + ARG_EXTRA_HEADER, + ARG_ADDRESS_IPV4, + ARG_ADDRESS_IPV6, + ARG_WELL_KNOWN_KEY, + ARG_SETUP_NETWORK, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "interface", required_argument, NULL, 'i' }, + { "refresh", required_argument, NULL, ARG_REFRESH }, + { "fwmark", required_argument, NULL, ARG_FWMARK }, + { "cache", required_argument, NULL, ARG_CACHE }, + { "wait", required_argument, NULL, ARG_WAIT }, + { "well-known", required_argument, NULL, 'K' }, + { "setup-network", no_argument, NULL, ARG_SETUP_NETWORK }, + + /* The following all configure endpoint information explicitly */ + { "vendor", required_argument, NULL, ARG_VENDOR }, + { "token-url", required_argument, NULL, ARG_TOKEN_URL }, + { "refresh-header-name", required_argument, NULL, ARG_REFRESH_HEADER_NAME }, + { "data-url", required_argument, NULL, ARG_DATA_URL }, + { "data-url-suffix", required_argument, NULL, ARG_DATA_URL_SUFFIX }, + { "token-header-name", required_argument, NULL, ARG_TOKEN_HEADER_NAME }, + { "extra-header", required_argument, NULL, ARG_EXTRA_HEADER }, + { "address-ipv4", required_argument, NULL, ARG_ADDRESS_IPV4 }, + { "address-ipv6", required_argument, NULL, ARG_ADDRESS_IPV6 }, + { "well-known-key", required_argument, NULL, ARG_WELL_KNOWN_KEY }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hi:wK:", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case 'i': + if (isempty(optarg)) { + arg_ifname = mfree(arg_ifname); + break; + } + + if (!ifname_valid_full(optarg, IFNAME_VALID_ALTERNATIVE|IFNAME_VALID_NUMERIC)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface name '%s' is not valid.", optarg); + + r = free_and_strdup_warn(&arg_ifname, optarg); + if (r < 0) + return r; + + break; + + case ARG_REFRESH: { + if (isempty(optarg)) { + arg_refresh_usec = REFRESH_USEC_DEFAULT; + break; + } + + usec_t t; + r = parse_sec(optarg, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse refresh timeout: %s", optarg); + if (t < REFRESH_USEC_MIN) { + log_warning("Increasing specified refresh time to %s, lower values are not supported.", FORMAT_TIMESPAN(REFRESH_USEC_MIN, 0)); + arg_refresh_usec = REFRESH_USEC_MIN; + } else + arg_refresh_usec = t; + break; + } + + case ARG_FWMARK: + if (isempty(optarg)) { + arg_fwmark_set = false; + break; + } + + if (streq(optarg, "default")) { + arg_fwmark = FWMARK_DEFAULT; + arg_fwmark_set = true; + break; + } + + r = safe_atou32(optarg, &arg_fwmark); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse --fwmark= parameter: %s", optarg); + + arg_fwmark_set = true; + break; + + case ARG_CACHE: + r = parse_boolean_argument("--cache", optarg, &arg_cache); + if (r < 0) + return r; + + break; + + case ARG_WAIT: + r = parse_boolean_argument("--wait", optarg, &arg_wait); + if (r < 0) + return r; + + break; + + case 'w': + arg_wait = true; + break; + + case 'K': { + if (isempty(optarg)) { + arg_well_known = _IMDS_WELL_KNOWN_INVALID; + break; + } + + ImdsWellKnown wk = imds_well_known_from_string(optarg); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known= parameter: %m"); + + arg_well_known = wk; + break; + } + + case ARG_VENDOR: + if (isempty(optarg)) { + arg_vendor = mfree(arg_vendor); + break; + } + + r = free_and_strdup_warn(&arg_vendor, optarg); + if (r < 0) + return r; + break; + + case ARG_TOKEN_URL: + if (isempty(optarg)) { + arg_token_url = mfree(arg_token_url); + break; + } + + if (!http_url_is_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL: %s", optarg); + + r = free_and_strdup_warn(&arg_token_url, optarg); + if (r < 0) + return r; + + break; + + case ARG_REFRESH_HEADER_NAME: + if (isempty(optarg)) { + arg_refresh_header_name = mfree(arg_refresh_header_name); + break; + } + + if (!http_header_name_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header name: %s", optarg); + + r = free_and_strdup_warn(&arg_refresh_header_name, optarg); + if (r < 0) + return r; + + break; + + case ARG_DATA_URL: + if (isempty(optarg)) { + arg_data_url = mfree(arg_data_url); + break; + } + + if (!http_url_is_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL: %s", optarg); + + r = free_and_strdup_warn(&arg_data_url, optarg); + if (r < 0) + return r; + + break; + + case ARG_DATA_URL_SUFFIX: + if (isempty(optarg)) { + arg_data_url_suffix = mfree(arg_data_url_suffix); + break; + } + + if (!ascii_is_valid(optarg) || string_has_cc(optarg, /* ok= */ NULL)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL suffix: %s", optarg); + + r = free_and_strdup_warn(&arg_data_url_suffix, optarg); + if (r < 0) + return r; + + break; + + case ARG_TOKEN_HEADER_NAME: + if (isempty(optarg)) { + arg_token_header_name = mfree(arg_token_header_name); + break; + } + + if (!http_header_name_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header name: %s", optarg); + + r = free_and_strdup_warn(&arg_token_header_name, optarg); + if (r < 0) + return r; + + break; + + case ARG_EXTRA_HEADER: + if (isempty(optarg)) { + arg_extra_header = strv_free(arg_extra_header); + break; + } + + if (!http_header_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header: %s", optarg); + + if (strv_extend(&arg_extra_header, optarg) < 0) + return log_oom(); + + break; + + case ARG_ADDRESS_IPV4: { + if (isempty(optarg)) { + arg_address_ipv4 = (struct in_addr) {}; + break; + } + + union in_addr_union u; + r = in_addr_from_string(AF_INET, optarg, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse IPv4 address: %s", optarg); + arg_address_ipv4 = u.in; + break; + } + + case ARG_ADDRESS_IPV6: { + if (isempty(optarg)) { + arg_address_ipv6 = (struct in6_addr) {}; + break; + } + + union in_addr_union u; + r = in_addr_from_string(AF_INET6, optarg, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse IPv6 address: %s", optarg); + arg_address_ipv6 = u.in6; + break; + } + + case ARG_WELL_KNOWN_KEY: { + if (isempty(optarg)) { + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) + arg_well_known_key[wk] = mfree(arg_well_known_key[wk]); + break; + } + + const char *e = strchr(optarg, ':'); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--well-known-key= expects colon separate name and key pairs."); + + _cleanup_free_ char *name = strndup(optarg, e - optarg); + if (!name) + return log_oom(); + + ImdsWellKnown wk = imds_well_known_from_string(name); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known-key= argument: %m"); + + e++; + if (!imds_key_is_valid(e)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Well known key '%s' is not valid.", e); + + r = free_and_strdup_warn(arg_well_known_key + wk, e); + if (r < 0) + return r; + + break; + } + + case ARG_SETUP_NETWORK: + arg_setup_network = true; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + } + + if (arg_vendor || arg_token_url || arg_refresh_header_name || arg_data_url || arg_data_url_suffix || arg_token_header_name || arg_extra_header) + arg_endpoint_source = ENDPOINT_USER; + + r = sd_varlink_invocation(SD_VARLINK_ALLOW_ACCEPT); + if (r < 0) + return log_error_errno(r, "Failed to check if invoked in Varlink mode: %m"); + + arg_varlink = r; + + if (!arg_varlink) { + + if (arg_setup_network) { + if (optind != argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No argument expected."); + } else { + if (arg_well_known < 0) { + /* if no --well-known= parameter was specified we require an argument */ + if (argc != optind+1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "A single argument expected."); + } else if (argc > optind+1) /* if not, then the additional parameter is optional */ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "At most a single argument expected."); + + if (argc > optind) { + if (!imds_key_is_valid(argv[optind])) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified IMDS key is not valid, refusing: %s", argv[optind]); + + r = free_and_strdup_warn(&arg_key, argv[optind]); + if (r < 0) + return r; + } + } + } + + return 1; +} + +static int device_get_property_ip_address( + sd_device *d, + const char *name, + int family, + union in_addr_union *ret) { + + int r; + + /* Parses an IP address stored in the udev database for a device */ + + assert(d); + assert(name); + assert(IN_SET(family, AF_INET, AF_INET6)); + + const char *v = NULL; + r = sd_device_get_property_value(d, name, &v); + if (r < 0) + return r; + + return in_addr_from_string(family, v, ret); +} + +static const char * const imds_well_known_udev_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "IMDS_KEY_HOSTNAME", + [IMDS_REGION] = "IMDS_KEY_REGION", + [IMDS_ZONE] = "IMDS_KEY_ZONE", + [IMDS_IPV4_PUBLIC] = "IMDS_KEY_IPV4_PUBLIC", + [IMDS_IPV6_PUBLIC] = "IMDS_KEY_IPV6_PUBLIC", + [IMDS_SSH_KEY] = "IMDS_KEY_SSH_KEY", + [IMDS_USERDATA] = "IMDS_KEY_USERDATA", + [IMDS_USERDATA_BASE] = "IMDS_KEY_USERDATA_BASE", + [IMDS_USERDATA_BASE64] = "IMDS_KEY_USERDATA_BASE64", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(imds_well_known_udev, ImdsWellKnown); + +static int smbios_server_info(void) { + int r; + + /* Acquires IMDS server information from udev/hwdb */ + + if (arg_endpoint_source >= 0) + return 0; + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + r = sd_device_new_from_syspath(&d, "/sys/class/dmi/id/"); + if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) { + log_debug_errno(r, "Failed to open /sys/class/dmi/id/ device, ignoring: %m"); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to open /sys/class/dmi/id/ device: %m"); + + const char *vendor; + r = sd_device_get_property_value(d, "IMDS_VENDOR", &vendor); + if (r == -ENOENT) { + log_debug_errno(r, "IMDS_VENDOR= property not set on DMI device, skipping."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to read IMDS_SUPPORTED= property of DMI device: %m"); + + log_debug("Detected IMDS vendor support '%s'.", vendor); + + r = free_and_strdup_warn(&arg_vendor, vendor); + if (r < 0) + return r; + + struct { + const char *property; + char **variable; + } table[] = { + { "IMDS_TOKEN_URL", &arg_token_url }, + { "IMDS_REFRESH_HEADER_NAME", &arg_refresh_header_name }, + { "IMDS_DATA_URL", &arg_data_url }, + { "IMDS_DATA_URL_SUFFIX", &arg_data_url_suffix }, + { "IMDS_TOKEN_HEADER_NAME", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + const char *v = NULL; + + r = sd_device_get_property_value(d, i->property, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", i->property); + + r = free_and_strdup_warn(i->variable, v); + if (r < 0) + return r; + } + + for (size_t i = 0; i < 64U; i++) { + _cleanup_free_ char *property = NULL; + const char *p = NULL; + if (i > 0) { + if (asprintf(&property, "IMDS_EXTRA_HEADER%zu", i + 1) < 0) + return log_oom(); + p = property; + } else + p = "IMDS_EXTRA_HEADER"; + + const char *v = NULL; + r = sd_device_get_property_value(d, p, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", p); + + if (v) + if (strv_extend(&arg_extra_header, v) < 0) + return log_oom(); + } + + union in_addr_union u; + r = device_get_property_ip_address(d, "IMDS_ADDRESS_IPV4", AF_INET, &u); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property 'IMDS_ADDRESS_IPV4' of DMI: %m"); + else if (r >= 0) + arg_address_ipv4 = u.in; + + r = device_get_property_ip_address(d, "IMDS_ADDRESS_IPV6", AF_INET6, &u); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property 'IMDS_ADDRESS_IPV6' of DMI: %m"); + else if (r >= 0) + arg_address_ipv6 = u.in6; + + for (ImdsWellKnown k = 0; k < _IMDS_WELL_KNOWN_MAX; k++) { + const char *p = imds_well_known_udev_to_string(k); + if (!p) + continue; + + const char *v = NULL; + r = sd_device_get_property_value(d, p, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", p); + + r = free_and_strdup_warn(arg_well_known_key + k, v); + if (r < 0) + return r; + } + + log_debug("IMDS endpoint data set from SMBIOS device."); + arg_endpoint_source = ENDPOINT_UDEV; + return 0; +} + +static int secure_getenv_ip_address( + const char *name, + int family, + union in_addr_union *ret) { + + assert(name); + assert(IN_SET(family, AF_INET, AF_INET6)); + + /* Parses and IP address specified in an environment variable */ + + const char *e = secure_getenv(name); + if (!e) + return -ENXIO; + + return in_addr_from_string(family, e, ret); +} + +static const char * const imds_well_known_environment_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "SYSTEMD_IMDS_KEY_HOSTNAME", + [IMDS_REGION] = "SYSTEMD_IMDS_KEY_REGION", + [IMDS_ZONE] = "SYSTEMD_IMDS_KEY_ZONE", + [IMDS_IPV4_PUBLIC] = "SYSTEMD_IMDS_KEY_IPV4_PUBLIC", + [IMDS_IPV6_PUBLIC] = "SYSTEMD_IMDS_KEY_IPV6_PUBLIC", + [IMDS_SSH_KEY] = "SYSTEMD_IMDS_KEY_SSH_KEY", + [IMDS_USERDATA] = "SYSTEMD_IMDS_KEY_USERDATA", + [IMDS_USERDATA_BASE] = "SYSTEMD_IMDS_KEY_USERDATA_BASE", + [IMDS_USERDATA_BASE64] = "SYSTEMD_IMDS_KEY_USERDATA_BASE64", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(imds_well_known_environment, ImdsWellKnown); + +static int environment_server_info(void) { + int r; + + /* Acquires IMDS endpoint info from environment variables */ + + if (arg_endpoint_source >= 0) + return 0; + + static const struct { + const char *name; + char **variable; + } table[] = { + { "SYSTEMD_IMDS_VENDOR", &arg_vendor }, + { "SYSTEMD_IMDS_TOKEN_URL", &arg_token_url }, + { "SYSTEMD_IMDS_REFRESH_HEADER_NAME", &arg_refresh_header_name }, + { "SYSTEMD_IMDS_DATA_URL", &arg_data_url }, + { "SYSTEMD_IMDS_DATA_URL_SUFFIX", &arg_data_url_suffix }, + { "SYSTEMD_IMDS_TOKEN_HEADER_NAME", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + const char *e = secure_getenv(i->name); + if (!e) + continue; + + r = free_and_strdup_warn(i->variable, e); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + for (unsigned u = 1; u < 64; u++) { + _cleanup_free_ char *name = NULL; + + if (u > 1 && asprintf(&name, "SYSTEMD_IMDS_EXTRA_HEADER%u", u) < 0) + return log_oom(); + + const char *e = secure_getenv(name ?: "SYSTEMD_IMDS_EXTRA_HEADER"); + if (!e) + break; + + if (strv_extend(&arg_extra_header, e) < 0) + return log_oom(); + + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + union in_addr_union u; + r = secure_getenv_ip_address("SYSTEMD_IMDS_ADDRESS_IPV4", AF_INET, &u); + if (r < 0 && r != -ENXIO) + return log_error_errno(r, "Failed read IPv4 address from environment variable 'SYSTEMD_IMDS_ADDRESS_IPV4': %m"); + if (r >= 0) { + arg_address_ipv4 = u.in; + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + r = secure_getenv_ip_address("SYSTEMD_IMDS_ADDRESS_IPV6", AF_INET6, &u); + if (r < 0 && r != -ENXIO) + return log_error_errno(r, "Failed read IPv6 address from environment variable 'SYSTEMD_IMDS_ADDRESS_IPV6': %m"); + if (r >= 0) { + arg_address_ipv6 = u.in6; + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + for (ImdsWellKnown k = 0; k < _IMDS_WELL_KNOWN_MAX; k++) { + const char *n = imds_well_known_environment_to_string(k); + if (!n) + continue; + + const char *e = secure_getenv(n); + if (!e) + continue; + + r = free_and_strdup_warn(arg_well_known_key + k, e); + if (r < 0) + return r; + } + + if (arg_endpoint_source >= 0) + log_debug("IMDS endpoint data set from environment."); + + return 0; +} + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + int r; + + assert(key); + + /* Called for each kernel command line option. */ + + if (proc_cmdline_key_streq(key, "systemd.imds.network")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + ImdsNetworkMode m = imds_network_mode_from_string(value); + if (m < 0) + return log_warning_errno(m, "Failed to parse systemd.imds.network= value: %m"); + + arg_network_mode = m; + return 0; + } + + /* The other kernel command line options configured IMDS endpoint data. We'll only check it if no + * other configuration source for it has been used */ + if (arg_endpoint_source >= 0 && arg_endpoint_source != ENDPOINT_PROC_CMDLINE) + return 0; + + static const struct { + const char *key; + char **variable; + } table[] = { + { "systemd.imds.vendor", &arg_vendor }, + { "systemd.imds.token_url", &arg_token_url }, + { "systemd.imds.refresh_header_name", &arg_refresh_header_name }, + { "systemd.imds.data_url", &arg_data_url }, + { "systemd.imds.data_url_suffix", &arg_data_url_suffix }, + { "systemd.imds.token_header_name", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + if (!proc_cmdline_key_streq(key, i->key)) + continue; + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = free_and_strdup_warn(i->variable, value); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.extra_header")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (isempty(value)) + arg_extra_header = strv_free(arg_extra_header); + else if (strv_extend(&arg_extra_header, value) < 0) + return log_oom(); + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.address_ipv4")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + union in_addr_union u; + r = in_addr_from_string(AF_INET, value, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse 'systemd.imds.address_ipv4=' parameter: %s", value); + + arg_address_ipv4 = u.in; + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.address_ipv6")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + union in_addr_union u; + r = in_addr_from_string(AF_INET6, value, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse 'systemd.imds.address_ipv6=' parameter: %s", value); + + arg_address_ipv6 = u.in6; + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + static const char * const well_known_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "systemd.imds.key.hostname", + [IMDS_REGION] = "systemd.imds.key.region", + [IMDS_ZONE] = "systemd.imds.key.zone", + [IMDS_IPV4_PUBLIC] = "systemd.imds.key.ipv4_public", + [IMDS_IPV6_PUBLIC] = "systemd.imds.key.ipv6_public", + [IMDS_SSH_KEY] = "systemd.imds.key.ssh_key", + [IMDS_USERDATA] = "systemd.imds.key.userdata", + [IMDS_USERDATA_BASE] = "systemd.imds.key.userdata_base", + [IMDS_USERDATA_BASE64] = "systemd.imds.key.userdata_base64", + }; + + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) { + const char *k = well_known_table[wk]; + if (!k) + continue; + + if (!proc_cmdline_key_streq(key, k)) + continue; + + r = free_and_strdup_warn(arg_well_known_key + wk, value); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + return 0; +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + r = environment_server_info(); + if (r < 0) + return r; + + r = proc_cmdline_parse(parse_proc_cmdline_item, /* userdata= */ NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + if (r < 0) + log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); + + r = smbios_server_info(); + if (r < 0) + return r; + + if (arg_varlink) + return vl_server(); + + return cmdline_run(); +} + +DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); diff --git a/src/imds/io.systemd.imds.policy b/src/imds/io.systemd.imds.policy new file mode 100644 index 0000000000000..e844f60b600bc --- /dev/null +++ b/src/imds/io.systemd.imds.policy @@ -0,0 +1,30 @@ + + + + + + + + The systemd Project + https://systemd.io + + + Acquire IMDS instance metadata. + Authentication is required for an application to acquire IMDS instance metadata. + + auth_admin_keep + auth_admin_keep + auth_admin_keep + + + diff --git a/src/imds/meson.build b/src/imds/meson.build new file mode 100644 index 0000000000000..65c5a7c7998b4 --- /dev/null +++ b/src/imds/meson.build @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +if conf.get('ENABLE_IMDS') != 1 + subdir_done() +endif + +executables += [ + libexec_template + { + 'name' : 'systemd-imdsd', + 'public' : true, + 'sources' : files('imdsd.c', '../import/curl-util.c', 'imds-util.c'), + 'dependencies' : [ libcurl ], + }, +] + +install_data( + 'io.systemd.imds.policy', + install_dir : polkitpolicydir) diff --git a/src/shared/meson.build b/src/shared/meson.build index bbc0307999324..f24b28efe651f 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -204,6 +204,7 @@ shared_sources = files( 'varlink-io.systemd.FactoryReset.c', 'varlink-io.systemd.Hostname.c', 'varlink-io.systemd.Import.c', + 'varlink-io.systemd.InstanceMetadata.c', 'varlink-io.systemd.Journal.c', 'varlink-io.systemd.JournalAccess.c', 'varlink-io.systemd.Login.c', diff --git a/src/shared/varlink-io.systemd.InstanceMetadata.c b/src/shared/varlink-io.systemd.InstanceMetadata.c new file mode 100644 index 0000000000000..eb5992dbffa95 --- /dev/null +++ b/src/shared/varlink-io.systemd.InstanceMetadata.c @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-polkit.h" +#include "varlink-io.systemd.InstanceMetadata.h" + +static SD_VARLINK_DEFINE_ENUM_TYPE( + WellKnown, + SD_VARLINK_DEFINE_ENUM_VALUE(base), + SD_VARLINK_DEFINE_ENUM_VALUE(hostname), + SD_VARLINK_DEFINE_ENUM_VALUE(region), + SD_VARLINK_DEFINE_ENUM_VALUE(zone), + SD_VARLINK_DEFINE_ENUM_VALUE(ipv4_public), + SD_VARLINK_DEFINE_ENUM_VALUE(ipv6_public), + SD_VARLINK_DEFINE_ENUM_VALUE(ssh_key), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata_base), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata_base64)); + +static SD_VARLINK_DEFINE_METHOD( + Get, + SD_VARLINK_FIELD_COMMENT("The key to retrieve"), + SD_VARLINK_DEFINE_INPUT(key, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Start with a well-known key"), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(wellKnown, WellKnown, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The network interface to use"), + SD_VARLINK_DEFINE_INPUT(interface, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Refresh cached data if older (CLOCK_BOOTTIME, µs)"), + SD_VARLINK_DEFINE_INPUT(refreshUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Whether to accept cached data"), + SD_VARLINK_DEFINE_INPUT(cache, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The firewall mark value to use"), + SD_VARLINK_DEFINE_INPUT(firewallMark, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + VARLINK_DEFINE_POLKIT_INPUT, + SD_VARLINK_FIELD_COMMENT("The data in Base64 encoding."), + SD_VARLINK_DEFINE_OUTPUT(data, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("The interface the data was found on."), + SD_VARLINK_DEFINE_OUTPUT(interface, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_METHOD( + GetVendorInfo, + SD_VARLINK_FIELD_COMMENT("The detected cloud vendor"), + SD_VARLINK_DEFINE_OUTPUT(vendor, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The URL to acquire the token from"), + SD_VARLINK_DEFINE_OUTPUT(tokenUrl, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The HTTP header to configure the refresh timeout for the token in"), + SD_VARLINK_DEFINE_OUTPUT(refreshHeaderName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The base URL to acquire the data from"), + SD_VARLINK_DEFINE_OUTPUT(dataUrl, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("A suffix to append to the data URL"), + SD_VARLINK_DEFINE_OUTPUT(dataUrlSuffix, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The HTTP header to pass the token in when requesting data"), + SD_VARLINK_DEFINE_OUTPUT(tokenHeaderName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Additional HTTP headers to pass when acquiring data"), + SD_VARLINK_DEFINE_OUTPUT(extraHeader, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("IPv4 address of IMDS server"), + SD_VARLINK_DEFINE_OUTPUT(addressIPv4, SD_VARLINK_INT, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("IPv6 address of IMDS server"), + SD_VARLINK_DEFINE_OUTPUT(addressIPv6, SD_VARLINK_INT, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("Well-known fields"), + SD_VARLINK_DEFINE_OUTPUT(wellKnown, SD_VARLINK_OBJECT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_ERROR( + KeyNotFound); + +static SD_VARLINK_DEFINE_ERROR( + WellKnownKeyUnset); + +static SD_VARLINK_DEFINE_ERROR( + NotAvailable); + +static SD_VARLINK_DEFINE_ERROR( + NotSupported); + +static SD_VARLINK_DEFINE_ERROR( + CommunicationFailure); + +static SD_VARLINK_DEFINE_ERROR( + Timeout); + +SD_VARLINK_DEFINE_INTERFACE( + io_systemd_InstanceMetadata, + "io.systemd.InstanceMetadata", + SD_VARLINK_INTERFACE_COMMENT("APIs for acquiring cloud IMDS information."), + SD_VARLINK_SYMBOL_COMMENT("Well known data fields"), + &vl_type_WellKnown, + SD_VARLINK_SYMBOL_COMMENT("Acquire data."), + &vl_method_Get, + SD_VARLINK_SYMBOL_COMMENT("Get information about cloud vendor and IMDS connectivity."), + &vl_method_GetVendorInfo, + SD_VARLINK_SYMBOL_COMMENT("The requested key is not found on the IMDS server."), + &vl_error_KeyNotFound, + SD_VARLINK_SYMBOL_COMMENT("IMDS is disabled or otherwise not available."), + &vl_error_NotAvailable, + SD_VARLINK_SYMBOL_COMMENT("IMDS is not supported."), + &vl_error_NotSupported, + SD_VARLINK_SYMBOL_COMMENT("Well-known key is not set."), + &vl_error_WellKnownKeyUnset, + SD_VARLINK_SYMBOL_COMMENT("Communication with IMDS failed."), + &vl_error_CommunicationFailure, + SD_VARLINK_SYMBOL_COMMENT("Timeout reached"), + &vl_error_Timeout); diff --git a/src/shared/varlink-io.systemd.InstanceMetadata.h b/src/shared/varlink-io.systemd.InstanceMetadata.h new file mode 100644 index 0000000000000..60920bd9c9f55 --- /dev/null +++ b/src/shared/varlink-io.systemd.InstanceMetadata.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-varlink-idl.h" + +extern const sd_varlink_interface vl_interface_io_systemd_InstanceMetadata; diff --git a/src/test/test-varlink-idl.c b/src/test/test-varlink-idl.c index 039d36a85e42d..07fd33d67afa7 100644 --- a/src/test/test-varlink-idl.c +++ b/src/test/test-varlink-idl.c @@ -24,6 +24,7 @@ #include "varlink-io.systemd.FactoryReset.h" #include "varlink-io.systemd.Hostname.h" #include "varlink-io.systemd.Import.h" +#include "varlink-io.systemd.InstanceMetadata.h" #include "varlink-io.systemd.Journal.h" #include "varlink-io.systemd.JournalAccess.h" #include "varlink-io.systemd.Login.h" @@ -190,6 +191,7 @@ TEST(parse_format) { &vl_interface_io_systemd_FactoryReset, &vl_interface_io_systemd_Hostname, &vl_interface_io_systemd_Import, + &vl_interface_io_systemd_InstanceMetadata, &vl_interface_io_systemd_Journal, &vl_interface_io_systemd_JournalAccess, &vl_interface_io_systemd_Login, diff --git a/sysusers.d/meson.build b/sysusers.d/meson.build index 84fadfe3f7020..3c2e450a183bb 100644 --- a/sysusers.d/meson.build +++ b/sysusers.d/meson.build @@ -15,7 +15,8 @@ in_files = [['basic.conf', true], ['systemd-journal.conf', true], ['systemd-network.conf', conf.get('ENABLE_NETWORKD') == 1], ['systemd-resolve.conf', conf.get('ENABLE_RESOLVE') == 1], - ['systemd-timesync.conf', conf.get('ENABLE_TIMESYNCD') == 1]] + ['systemd-timesync.conf', conf.get('ENABLE_TIMESYNCD') == 1], + ['systemd-imds.conf', conf.get('ENABLE_IMDS') == 1]] foreach tuple : in_files file = tuple[0] diff --git a/sysusers.d/systemd-imds.conf.in b/sysusers.d/systemd-imds.conf.in new file mode 100644 index 0000000000000..adb8d5b1fb1c6 --- /dev/null +++ b/sysusers.d/systemd-imds.conf.in @@ -0,0 +1,8 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +u! systemd-imds {{SYSTEMD_IMDS_UID}} "systemd Instance Metadata" diff --git a/units/meson.build b/units/meson.build index b2cf9bd8f39ce..782d1ecadfbe4 100644 --- a/units/meson.build +++ b/units/meson.build @@ -392,6 +392,18 @@ units = [ 'file' : 'systemd-hybrid-sleep.service.in', 'conditions' : ['ENABLE_HIBERNATE'], }, + { + 'file' : 'systemd-imdsd@.service.in', + 'conditions' : ['ENABLE_IMDS'], + }, + { + 'file' : 'systemd-imdsd.socket', + 'conditions' : ['ENABLE_IMDS'], + }, + { + 'file' : 'systemd-imds-early-network.service.in', + 'conditions' : ['ENABLE_IMDS'], + }, { 'file' : 'systemd-importd.service.in', 'conditions' : ['ENABLE_IMPORTD'], diff --git a/units/systemd-imds-early-network.service.in b/units/systemd-imds-early-network.service.in new file mode 100644 index 0000000000000..b4241237f0983 --- /dev/null +++ b/units/systemd-imds-early-network.service.in @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Enable Pre-IMDS Networking +Documentation=man:systemd-imdsd@.service(8) +DefaultDependencies=no +Before=network-pre.target +Wants=network-pre.target +Conflicts=shutdown.target initrd-switch-root.target +Before=shutdown.target initrd-switch-root.target +After=sys-devices-virtual-dmi-id.device + +[Service] +ExecStart={{LIBEXECDIR}}/systemd-imdsd --setup-network +Type=oneshot +RemainAfterExit=yes diff --git a/units/systemd-imdsd.socket b/units/systemd-imdsd.socket new file mode 100644 index 0000000000000..daeb7840b3ec0 --- /dev/null +++ b/units/systemd-imdsd.socket @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Cloud Instance Metadata Access (IMDS) +Documentation=man:systemd-imdsd@.service(8) +DefaultDependencies=no +Before=sockets.target + +[Socket] +ListenStream=/run/systemd/io.systemd.InstanceMetadata +Symlinks=/run/varlink/registry/io.systemd.InstanceMetadata +FileDescriptorName=varlink +SocketMode=0666 +Accept=yes +MaxConnectionsPerSource=16 +RemoveOnStop=yes + +# Note that this is typically pulled in automatically by +# systemd-imds-generator, but you can also enable it manually if you like. +[Install] +WantedBy=sockets.target diff --git a/units/systemd-imdsd@.service.in b/units/systemd-imdsd@.service.in new file mode 100644 index 0000000000000..2650e3b4de322 --- /dev/null +++ b/units/systemd-imdsd@.service.in @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Cloud Instance Metadata Access (IMDS) +Documentation=man:systemd-imdsd@.service(8) +DefaultDependencies=no +Conflicts=shutdown.target initrd-switch-root.target +Before=shutdown.target initrd-switch-root.target +After=sys-devices-virtual-dmi-id.device + +[Service] +ExecStart=-{{LIBEXECDIR}}/systemd-imdsd +User=systemd-imds +RuntimeDirectory=systemd/imds +RuntimeDirectoryPreserve=yes +# CAP_NET_ADMIN is required to set SO_FWMARK and bypass the routing restrictions, and CAP_NET_BIND_SERVICE to bind to a low port +AmbientCapabilities=CAP_NET_ADMIN CAP_NET_BIND_SERVICE +LockPersonality=yes +MemoryDenyWriteExecute=yes +NoNewPrivileges=yes From edcb9fd9fadc22585485201dccebafe250648e8b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 4 Mar 2026 15:13:25 +0100 Subject: [PATCH 14/18] imds: add "systemd-imds" tool that is a simple client to "systemd-imdsd" --- man/rules/meson.build | 1 + man/systemd-imds.xml | 171 +++++ src/imds/imds-tool.c | 897 +++++++++++++++++++++++++++ src/imds/meson.build | 5 + src/shared/pcrextend-util.c | 68 ++ src/shared/pcrextend-util.h | 6 +- units/meson.build | 4 + units/systemd-imds-import.service.in | 25 + 8 files changed, 1176 insertions(+), 1 deletion(-) create mode 100644 man/systemd-imds.xml create mode 100644 src/imds/imds-tool.c create mode 100644 units/systemd-imds-import.service.in diff --git a/man/rules/meson.build b/man/rules/meson.build index ccf41743c672f..0886efaacb0e7 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1025,6 +1025,7 @@ manpages = [ ['systemd-hostnamed.service', '8', ['systemd-hostnamed'], 'ENABLE_HOSTNAMED'], ['systemd-hwdb', '8', [], 'ENABLE_HWDB'], ['systemd-id128', '1', [], ''], + ['systemd-imds', '1', ['systemd-imds-import.service'], 'ENABLE_IMDS'], ['systemd-imdsd@.service', '8', ['systemd-imdsd', diff --git a/man/systemd-imds.xml b/man/systemd-imds.xml new file mode 100644 index 0000000000000..b0d1f5297ed12 --- /dev/null +++ b/man/systemd-imds.xml @@ -0,0 +1,171 @@ + + + + + + + + systemd-imds + systemd + + + + systemd-imds + 1 + + + + systemd-imds + systemd-imds-import.service + Cloud IMDS (Instance Metadata Service) tool + + + + systemd-imds-import.service + + systemd-imds OPTIONS KEY + + + + + Description + + systemd-imds is a tool for acquiring data from IMDS (Instance Metadata Service), + as provided in many cloud environments. It is a client to + systemd-imdsd@.service8, + and provides access to IMDS data from shell environments. + + The tool can operate in one of four modes: + + + Without positional arguments (and without the switch) + general IMDS service data and a few well known fields are displayed in human friendly + form. + + With a positional argument (and without ) the IMDS data + referenced by the specified key is acquired and written to standard output, in unprocessed form. IMDS + keys are the part of the IMDS acquisition URL that are suffixed to the base URL. IMDS keys must begin + with a slash (/). Note that IMDS keys are typically implementation + specific. + + With the option specified (see below), the indicated + well-known field is written to standard output, in unprocessed form. The concept of well-known fields + abstracts IMDS implementation differences to some level, exposing a unified interface for IMDS fields + that typically exist on many different implementations, but under implementation-specific + keys. + + With the option specified (see below) the "userdata" + provided via IMDS is written to standard output. Under the hood this is similar to + , or + . Each of the three is tried in turn (in this order), and + the first available data is returned. For the + systemd-userdata userdata item is requested. For + the returned data is automatically + Base64-decoded. + + + + + Options and Commands + + + + + + + Takes one of hostname, region, + zone, ipv4-public, ipv6-public, + ssh-key, userdata, userdata-base, + userdata-base64. Acquires a specific "well-known" field from IMDS. Many of + these fields are commonly supported by various IMDS implementations, but typically some fields are + not. Note that if is used an additional subkey must be + specified as positional argument, which encodes the specific userdata item to + acquire. + + + + + + + + Takes a time in seconds as argument, and indicates the required "freshness" of the + data, in case cached data is used. + + + + + + + + Takes a boolean. If set to false local caching of IMDS is disabled, and the data is + always acquired fresh from the IMDS endpoint. + + + + + + + + + Acquire this instance's IMDS user data, if available. See above for + details. + + + + + + + + Acquires IMDS data and writes relevant fields as credentials to + /run/credstore/. This currently covers: + + + If the IMDS user data is a valid JSON object containing a field + systemd.credentials (with a JSON array as value) it is processed, importing + arbitrary credentials listed in the array. Each array item must have a name + field indicating the credential name. It may have one text, + data or encrypted field, containing the credential data. If + text is used the value shall be a literal string of the credential value. If + data is used the value may be arbitrary binary data encoded in a Base64 + string. If encrypted is used the value shall be a Base64 encoded encrypted + credential. See + systemd.system-credentials7 + for information about credentials that may be imported this way. + + If the well-known ssh-key field is available, its value will be + imported into the ssh.authorized_keys credential. + + If the well-known hostname field is available, its value will be + imported into the firstboot.hostname credential. + + + This command is invoked by the systemd-imds-import.service run at + boot. + + + + + + + + + + + Exit status + + On success, 0 is returned, a non-zero failure code otherwise. + + + + See Also + + systemd1 + systemd-imdsd@.service8 + systemd-imds-generator8 + systemd.system-credentials7 + + + + diff --git a/src/imds/imds-tool.c b/src/imds/imds-tool.c new file mode 100644 index 0000000000000..e715578e1cbbf --- /dev/null +++ b/src/imds/imds-tool.c @@ -0,0 +1,897 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "build.h" +#include "build-path.h" +#include "creds-util.h" +#include "dns-rr.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-table.h" +#include "format-util.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "imds-util.h" +#include "in-addr-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "json-util.h" +#include "log.h" +#include "main-func.h" +#include "parse-argument.h" +#include "pcrextend-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" + +static enum { + ACTION_SUMMARY, + ACTION_GET, + ACTION_USERDATA, + ACTION_IMPORT, + _ACTION_INVALID = -EINVAL, +} arg_action = _ACTION_INVALID; +static char *arg_key = NULL; +static ImdsWellKnown arg_well_known = _IMDS_WELL_KNOWN_INVALID; +static int arg_cache = -1; +static usec_t arg_refresh_usec = 0; +static bool arg_refresh_usec_set = false; + +STATIC_DESTRUCTOR_REGISTER(arg_key, freep); + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-imds", "1", &link); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...] [KEY...]\n" + "\n%sIMDS data acquisition.%s\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -K --well-known=[hostname|region|zone|ipv4-public|ipv6-public|ssh-key|\n" + " userdata|userdata-base|userdata-base64]\n" + " Select well-known key/base\n" + " --refresh=SEC Set token refresh time\n" + " --cache=no Disable cache use\n" + " -u --userdata Dump user data\n" + " --import Import system credentials from IMDS userdata\n" + " and place them in /run/credstore/\n" + "\nSee the %s for details.\n", + program_invocation_short_name, + ansi_highlight(), + ansi_normal(), + link); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_REFRESH, + ARG_CACHE, + ARG_IMPORT, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "well-known", required_argument, NULL, 'K' }, + { "refresh", required_argument, NULL, ARG_REFRESH }, + { "cache", required_argument, NULL, ARG_CACHE }, + { "userdata", no_argument, NULL, 'u' }, + { "import", no_argument, NULL, ARG_IMPORT }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hK:u", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case 'K': { + if (isempty(optarg)) { + arg_well_known = _IMDS_WELL_KNOWN_INVALID; + break; + } + + if (streq(optarg, "help")) + return DUMP_STRING_TABLE(imds_well_known, ImdsWellKnown, _IMDS_WELL_KNOWN_MAX); + + ImdsWellKnown wk = imds_well_known_from_string(optarg); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known= argument: %s", optarg); + + arg_well_known = wk; + break; + } + + case ARG_CACHE: + r = parse_tristate_argument_with_auto("--cache", optarg, &arg_cache); + if (r < 0) + return r; + + break; + + case ARG_REFRESH: { + if (isempty(optarg)) { + arg_refresh_usec_set = false; + break; + } + + usec_t t; + r = parse_sec(optarg, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse refresh timeout: %s", optarg); + + arg_refresh_usec = t; + arg_refresh_usec_set = true; + break; + } + + case 'u': + arg_action = ACTION_USERDATA; + break; + + case ARG_IMPORT: + arg_action = ACTION_IMPORT; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + } + + if (IN_SET(arg_action, ACTION_USERDATA, ACTION_IMPORT)) { + if (argc != optind) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No parameters expected."); + + } else { + assert(arg_action < 0); + + if (argc > optind + 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "None or one argument expected."); + + if (argc == optind && arg_well_known < 0) + arg_action = ACTION_SUMMARY; + else { + if (arg_well_known < 0) + arg_well_known = IMDS_BASE; + + if (argc > optind) { + if (!imds_key_is_valid(argv[optind])) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified IMDS key is not valid, refusing: %s", argv[optind]); + + if (!imds_well_known_can_suffix(arg_well_known)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Well known key '%s' does not take a key suffix, refusing.", imds_well_known_to_string(arg_well_known)); + + r = free_and_strdup_warn(&arg_key, argv[optind]); + if (r < 0) + return r; + } + + arg_action = ACTION_GET; + } + } + + return 1; +} + +static int acquire_imds_key( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + struct iovec *ret) { + + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + const char *error_id = NULL; + sd_json_variant *reply = NULL; + r = sd_varlink_callbo( + link, + "io.systemd.InstanceMetadata.Get", + &reply, + &error_id, + SD_JSON_BUILD_PAIR_CONDITION(wk != IMDS_BASE, "wellKnown", JSON_BUILD_STRING_UNDERSCORIFY(imds_well_known_to_string(wk))), + JSON_BUILD_PAIR_STRING_NON_EMPTY("key", key), + SD_JSON_BUILD_PAIR_CONDITION(arg_refresh_usec_set, "refreshUSec", SD_JSON_BUILD_UNSIGNED(arg_refresh_usec)), + SD_JSON_BUILD_PAIR_CONDITION(arg_cache >= 0, "cache", SD_JSON_BUILD_BOOLEAN(arg_cache))); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.InstanceMetadata.Get(): %m"); + if (error_id) { + if (STR_IN_SET(error_id, "io.systemd.InstanceMetadata.KeyNotFound", "io.systemd.InstanceMetadata.WellKnownKeyUnset")) { + *ret = (struct iovec) {}; + return 0; + } + + return log_error_errno(sd_varlink_error_to_errno(error_id, reply), "Failed to issue io.systemd.InstanceMetadata.Get(): %s", error_id); + } + + _cleanup_(iovec_done) struct iovec data = {}; + static const sd_json_dispatch_field dispatch_table[] = { + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, 0, SD_JSON_MANDATORY }, + {}, + }; + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &data); + if (r < 0) + return r; + + *ret = TAKE_STRUCT(data); + return 1; +} + +static int acquire_imds_key_as_string( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + char **ret) { + + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, wk, key, &data); + if (r < 0) + return r; + if (r == 0) { + *ret = NULL; + return 0; + } + + _cleanup_free_ char *s = NULL; + r = make_cstring(data.iov_base, data.iov_len, MAKE_CSTRING_REFUSE_TRAILING_NUL, &s); + if (r < 0) + return r; + + *ret = TAKE_PTR(s); + return 1; +} + +static int acquire_imds_key_as_ip_address( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + int family, + union in_addr_union *ret) { + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + _cleanup_free_ char *s = NULL; + r = acquire_imds_key_as_string(link, wk, key, &s); + if (r < 0) + return r; + if (r == 0 || isempty(s)) { + *ret = (union in_addr_union) {}; + return 0; + } + + r = in_addr_from_string(family, s, ret); + if (r < 0) + return r; + + return 1; +} + +static int action_summary(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(table_unrefp) Table *table = table_new_vertical(); + if (!table) + return log_oom(); + + const char *error_id = NULL; + sd_json_variant *reply = NULL; + r = sd_varlink_call( + link, + "io.systemd.InstanceMetadata.GetVendorInfo", + /* parameters= */ NULL, + &reply, + &error_id); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.InstanceMetadata.GetStatus(): %m"); + if (error_id) + return log_error_errno(sd_varlink_error_to_errno(error_id, reply), "Failed to issue io.systemd.InstanceMetadata.GetStatus(): %s", error_id); + + const char *vendor = NULL; + static const sd_json_dispatch_field dispatch_table[] = { + { "vendor", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, 0, 0 }, + {} + }; + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &vendor); + if (r < 0) + return r; + if (vendor) { + r = table_add_many(table, + TABLE_FIELD, "Vendor", + TABLE_SET_JSON_FIELD_NAME, "vendor", + TABLE_STRING, vendor); + if (r < 0) + return table_log_add_error(r); + } + + static const struct { + ImdsWellKnown well_known; + const char *field; + } wktable[] = { + { IMDS_HOSTNAME, "Hostname" }, + { IMDS_REGION, "Region" }, + { IMDS_ZONE, "Zone" }, + { IMDS_IPV4_PUBLIC, "Public IPv4 Address" }, + { IMDS_IPV6_PUBLIC, "Public IPv6 Address" }, + }; + FOREACH_ELEMENT(i, wktable) { + _cleanup_free_ char *text = NULL; + + r = acquire_imds_key_as_string(link, i->well_known, /* key= */ NULL, &text); + if (r < 0) + return r; + if (r == 0 || isempty(text)) + continue; + + r = table_add_many(table, + TABLE_FIELD, i->field, + TABLE_SET_JSON_FIELD_NAME, imds_well_known_to_string(i->well_known), + TABLE_STRING, text); + if (r < 0) + return table_log_add_error(r); + } + + if (table_isempty(table)) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "No well-known IMDS data available."); + + r = table_print(table, NULL); + if (r < 0) + return table_log_print_error(r); + + return 0; +} + +static const char *detect_json_object(const char *text) { + assert(text); + + /* Checks if the provided text looks like a JSON object. It checks if the first non-whitespace + * characters are {" or {}. */ + + text += strspn(text, WHITESPACE); + if (*text != '{') + return NULL; + + const char *e = text + 1; + e += strspn(e, WHITESPACE); + if (!IN_SET(*e, '"', '}')) + return NULL; + + return text; +} + +static int write_credential(const char *dir, const char *name, const struct iovec *data) { + int r; + + assert(dir); + assert(name); + + _cleanup_close_ int dfd = open_mkdir(dir, O_CLOEXEC|O_PATH, 0700); + if (dfd < 0) + return log_error_errno(dfd, "Failed to open credential directory '%s': %m", dir); + + if (faccessat(dfd, name, F_OK, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno != ENOENT) + return log_error_errno(errno, "Failed to check if '%s' exists in credential directory '%s': %m", name, dir); + } else { + log_notice("Skipping importing of credential '%s', it already exists locally in '%s'.", name, dir); + return 0; + } + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = open_tmpfile_linkable_at(dfd, name, O_WRONLY|O_CLOEXEC, &t); + if (fd < 0) + return log_error_errno(fd, "Failed to create credential file '%s/%s': %m", dir, name); + + CLEANUP_TMPFILE_AT(dfd, t); + + r = loop_write(fd, data->iov_base, data->iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write credential file '%s/%s': %m", dir, name); + + if (fchmod(fd, 0400) < 0) + return log_error_errno(errno, "Failed to set access mode on credential file '%s/%s': %m", dir, name); + + r = link_tmpfile_at(fd, dfd, t, name, /* flags= */ 0); + if (r < 0) + return log_error_errno(r, "Failed to move credential file '%s/%s' into place: %m", dir, name); + + t = mfree(t); /* Disarm auto-cleanup */ + return 1; +} + +typedef struct CredentialData { + const char *name; + const char *text; + struct iovec data, encrypted; +} CredentialData; + +static void credential_data_done(CredentialData *d) { + assert(d); + + iovec_done(&d->data); + iovec_done(&d->encrypted); +} + +static int import_credential_one(CredentialData *d) { + int r; + + assert(d); + assert(d->name); + + log_debug("Importing credential '%s' from IMDS.", d->name); + + const char *dir = "/run/credstore"; + struct iovec *v, _v; + if (d->text) { + _v = IOVEC_MAKE_STRING(d->text); + v = &_v; + } else if (iovec_is_set(&d->data)) + v = &d->data; + else if (iovec_is_set(&d->encrypted)) { + dir = "/run/credstore.encrypted"; + v = &d->encrypted; + } else + assert_not_reached(); + + r = write_credential(dir, d->name, v); + if (r <= 0) + return r; + + log_info("Imported credential '%s' from IMDS (%s).", d->name, FORMAT_BYTES(v->iov_len)); + return 1; +} + +static int import_credentials(const char *text) { + int r; + + assert(text); + + /* We cannot be sure if the data is actually intended for us. Hence let's be somewhat defensive, and + * accept data in two ways: either immediately as a JSON object, or alternatively marked with a first + * line of "#systemd-userdata". The latter mimics the markers cloud-init employs. */ + + const char *e = startswith(text, "#systemd-userdata\n"); + if (!e) { + e = detect_json_object(text); + if (!e) { + log_info("IMDS user data does not look like JSON or systemd userdata, not processing."); + return 0; + } + } + + log_debug("Detected JSON userdata"); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + unsigned line = 0, column = 0; + r = sd_json_parse(e, /* flags= */ 0, &j, &line, &column); + if (r < 0) { + if (line > 0) + log_syntax(/* unit= */ NULL, LOG_WARNING, /* filename= */ NULL, line, r, "JSON parse failure."); + else + log_error_errno(r, "Failed to parse IMDS userdata JSON: %m"); + return 0; + } + + static const sd_json_dispatch_field top_table[] = { + { "systemd.credentials", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, 0, 0 }, + {}, + }; + + sd_json_variant *creds = NULL; + r = sd_json_dispatch(j, top_table, SD_JSON_ALLOW_EXTENSIONS|SD_JSON_LOG, &creds); + if (r < 0) + return r; + + unsigned n_imported = 0; + int ret = 0; + if (creds) { + log_debug("Found 'systemd.credentials' field"); + + sd_json_variant *c; + JSON_VARIANT_ARRAY_FOREACH(c, creds) { + static const sd_json_dispatch_field credential_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(CredentialData, name), SD_JSON_MANDATORY }, + { "text", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(CredentialData, text), 0 }, + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(CredentialData, data), 0 }, + { "encrypted", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(CredentialData, encrypted), 0 }, + {}, + }; + + _cleanup_(credential_data_done) CredentialData d = {}; + r = sd_json_dispatch(c, credential_table, SD_JSON_LOG|SD_JSON_WARNING, &d); + if (r < 0) { + RET_GATHER(ret, r); + continue; + } + + if (!credential_name_valid(d.name)) { + RET_GATHER(ret, log_warning_errno(SYNTHETIC_ERRNO(EBADMSG), "Credential name '%s' is not valid, refusing.", d.name)); + continue; + } + + if ((!!d.text + !!iovec_is_set(&d.data) + !!iovec_is_set(&d.encrypted)) != 1) { + RET_GATHER(ret, log_warning_errno(SYNTHETIC_ERRNO(EBADMSG), "Exactly one of 'text', 'data', 'encrypted' must be set for credential '%s', refusing.", d.name)); + continue; + } + + r = import_credential_one(&d); + if (r < 0) + RET_GATHER(ret, r); + else + n_imported++; + } + } + + log_full(n_imported == 0 ? LOG_DEBUG : LOG_INFO, "Imported %u credentials from IMDS.", n_imported); + return ret; +} + +static int add_public_address_to_json_array(sd_json_variant **array, int family, const union in_addr_union *addr) { + int r; + + assert(array); + assert(IN_SET(family, AF_INET, AF_INET6)); + assert(addr); + + if (in_addr_is_null(family, addr)) + return 0; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + if (dns_resource_record_new_address(&rr, family, addr, "_public") < 0) + return log_oom(); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *rrj = NULL; + r = dns_resource_record_to_json(rr, &rrj); + if (r < 0) + return log_error_errno(r, "Failed to convert A RR to JSON: %m"); + + r = sd_json_variant_append_array(array, rrj); + if (r < 0) + return log_error_errno(r, "Failed to append A RR to JSON array: %m"); + + log_debug("Writing IMDS RR for: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int import_imds_public_addresses(sd_varlink *link) { + int r, ret = 0; + + assert(link); + + /* Creates local RRs (honoured by systemd-resolved) for our public addresses. */ + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL; + + union in_addr_union u = {}; + r = acquire_imds_key_as_ip_address(link, IMDS_IPV4_PUBLIC, /* key= */ NULL, AF_INET, &u); + if (r < 0) + RET_GATHER(ret, r); + else if (r > 0) { + r = add_public_address_to_json_array(&aj, AF_INET, &u); + if (r < 0) + return r; + } + + u = (union in_addr_union) {}; + r = acquire_imds_key_as_ip_address(link, IMDS_IPV6_PUBLIC, /* key= */ NULL, AF_INET6, &u); + if (r < 0) + RET_GATHER(ret, r); + else if (r > 0) { + r = add_public_address_to_json_array(&aj, AF_INET6, &u); + if (r < 0) + return r; + } + + if (sd_json_variant_elements(aj) == 0) { + log_debug("No IMDS public addresses known, not writing our RRs."); + return 0; + } + + _cleanup_free_ char *text = NULL; + r = sd_json_variant_format(aj, SD_JSON_FORMAT_NEWLINE, &text); + if (r < 0) + return log_error_errno(r, "Failed to format JSON text: %m"); + + r = write_string_file("/run/systemd/resolve/static.d/imds-public.rr", text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to write IMDS RR data: %m"); + + log_debug("IMDS public addresses written out."); + return 1; +} + +static int import_imds_ssh_key(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_SSH_KEY, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r == 0 || !iovec_is_set(&data)) { + log_debug("No SSH key supplied via IMDS, not importing."); + return 0; + } + + r = write_credential("/run/credstore", "ssh.authorized_keys.root", &data); + if (r <= 0) + return r; + + log_info("Imported SSH key as credential 'ssh.authorized_keys.root'."); + return 0; +} + +static int import_imds_hostname(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_HOSTNAME, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r == 0 || !iovec_is_set(&data)) { + log_debug("No hostname supplied via IMDS, not importing."); + return 0; + } + + r = write_credential("/run/credstore", "firstboot.hostname", &data); + if (r <= 0) + return 0; + + log_info("Imported hostname as credential 'firstboot.hostname'."); + return 0; +} + +static int acquire_imds_userdata(sd_varlink *link, struct iovec *ret) { + int r; + + assert(link); + assert(ret); + + /* First try our private namespace, if the concept exists, and then fall back to the singleton */ + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_USERDATA_BASE, "/systemd-userdata", &data); + if (r == 0) + r = acquire_imds_key(link, IMDS_USERDATA, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r > 0) { + if (!iovec_is_set(&data)) { /* Treat empty user data like empty */ + *ret = (struct iovec) {}; + return 0; + } + + *ret = TAKE_STRUCT(data); + return 1; + } + + r = acquire_imds_key(link, IMDS_USERDATA_BASE64, /* key= */ NULL, &data); + if (r < 0) + return r; + _cleanup_(iovec_done) struct iovec decoded = {}; + if (r > 0) { + r = unbase64mem_full(data.iov_base, data.iov_len, /* secure= */ false, &decoded.iov_base, &decoded.iov_len); + if (r < 0) + return r; + } + + if (!iovec_is_set(&decoded)) { /* Treat empty user data like empty */ + *ret = (struct iovec) {}; + return 0; + } + + *ret = TAKE_STRUCT(decoded); + return 1; +} + +static int action_get(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, arg_well_known, arg_key, &data); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Key not available."); + + r = loop_write(STDOUT_FILENO, data.iov_base, data.iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write data to standard output: %m"); + + return 0; +} + +static int action_userdata(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_userdata(link, &data); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "User data not available."); + + r = loop_write(STDOUT_FILENO, data.iov_base, data.iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write data to standard output: %m"); + + return 0; +} + +static int remove_userdata(const char *path) { + assert(path); + + if (unlink(path) < 0) { + + if (errno != ENOENT) + log_debug_errno(errno, "Failed to remove '%s', ignoring: %m", path); + + return 0; + } + + log_debug("Removed '%s'.", path); + return 1; +} + +static int save_userdata(const struct iovec *data, const char *path) { + int r; + + assert(data); + assert(path); + + if (!iovec_is_set(data)) + return remove_userdata(path); + + r = write_data_file_atomic_at(AT_FDCWD, path, data, WRITE_DATA_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to save userdata to '%s': %m", path); + + log_debug("Saved userdata to '%s'.", path); + return 1; +} + +static int action_import(sd_varlink *link) { + int r; + + assert(link); + + int ret = import_imds_public_addresses(link); + RET_GATHER(ret, import_imds_hostname(link)); + RET_GATHER(ret, import_imds_ssh_key(link)); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_userdata(link, &data); + if (r < 0) + return RET_GATHER(ret, r); + if (r == 0) { + log_info("Not IMDS data available, not importing credentials."); + (void) remove_userdata("/run/systemd/imds/userdata"); + return ret; + } + + /* Measure the userdata before we use it */ + (void) pcrextend_imds_userdata_now(&data); + + /* Keep a pristince copy of the userdata we actually applied. (Note that this data is typically also + * kept as cached item on systemd-imdsd, but that one is possibly subject to cache invalidation, + * while this one is supposed to pin the data actually in effect.) */ + (void) save_userdata(&data, "/run/systemd/imds/userdata"); + + /* Ensure no inner NUL byte */ + if (memchr(data.iov_base, 0, data.iov_len)) { + log_info("IMDS user data contains NUL byte, not processing."); + return ret; + } + + /* Turn this into a proper C string */ + if (!iovec_append(&data, &IOVEC_MAKE_BYTE(0))) + return log_oom(); + + return RET_GATHER(ret, import_credentials(data.iov_base)); +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = sd_varlink_connect_address(&link, "/run/systemd/io.systemd.InstanceMetadata"); + if (r < 0) { + if (r != -ENOENT && !ERRNO_IS_NEG_DISCONNECT(r)) + return log_error_errno(r, "Failed to connect to systemd-imdsd: %m"); + + log_debug_errno(r, "Couldn't connect to /run/systemd/io.systemd.InstanceMetadata, will try to fork off systemd-imdsd as child now."); + + /* Try to fork off systemd-imdsd as a child as a fallback. If we have privileges and the + * SO_FWMARK trickery is not necessary, then this might just work. */ + _cleanup_free_ char *p = NULL; + _cleanup_close_ int pin_fd = + pin_callout_binary("/usr/lib/systemd/systemd-imdsd", &p); + if (pin_fd < 0) + return log_error_errno(pin_fd, "Failed to pick up imdsd binary: %m"); + + r = sd_varlink_connect_exec(&link, p, /* argv[]= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to connect to imdsd service: %m"); + } + + switch (arg_action) { + + case ACTION_SUMMARY: + return action_summary(link); + + case ACTION_GET: + return action_get(link); + + case ACTION_USERDATA: + return action_userdata(link); + + case ACTION_IMPORT: + return action_import(link); + + default: + assert_not_reached(); + } + + return 0; +} + +DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); diff --git a/src/imds/meson.build b/src/imds/meson.build index 65c5a7c7998b4..2f46d04df75a0 100644 --- a/src/imds/meson.build +++ b/src/imds/meson.build @@ -11,6 +11,11 @@ executables += [ 'sources' : files('imdsd.c', '../import/curl-util.c', 'imds-util.c'), 'dependencies' : [ libcurl ], }, + libexec_template + { + 'name' : 'systemd-imds', + 'public' : true, + 'sources' : files('imds-tool.c', 'imds-util.c'), + }, ] install_data( diff --git a/src/shared/pcrextend-util.c b/src/shared/pcrextend-util.c index 8586e85cbbd3f..eef159898d7ad 100644 --- a/src/shared/pcrextend-util.c +++ b/src/shared/pcrextend-util.c @@ -18,6 +18,7 @@ #include "mountpoint-util.h" #include "pcrextend-util.h" #include "pkcs7-util.h" +#include "sha256.h" #include "string-util.h" #include "strv.h" @@ -291,3 +292,70 @@ int pcrextend_verity_now( return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM2 support disabled, not measuring Verity root hashes and signatures."); #endif } + +#define IMDS_USERDATA_TRUNCATED_MAX 256U + +int pcrextend_imds_userdata_word(const struct iovec *data, char **ret) { + assert(iovec_is_set(data)); + assert(ret); + + /* We include both a hash of the complete user data, and a truncated version of the data in the word + * we measure. The former protects the actual data, the latter is useful for debugging. */ + + _cleanup_free_ char *hash = hexmem(SHA256_DIRECT(data->iov_base, data->iov_len), SHA256_DIGEST_SIZE); + if (!hash) + return log_oom(); + + _cleanup_free_ char *data_encoded = NULL; + if (base64mem_full(data->iov_base, MIN(data->iov_len, IMDS_USERDATA_TRUNCATED_MAX), /* line_break= */ SIZE_MAX, &data_encoded) < 0) + return log_oom(); + + _cleanup_free_ char *word = strjoin("imds-userdata:", hash, ":", data_encoded); + if (!word) + return log_oom(); + + *ret = TAKE_PTR(word); + return 0; +} + +int pcrextend_imds_userdata_now(const struct iovec *data) { + +#if HAVE_TPM2 + int r; + + _cleanup_free_ char *word = NULL; + r = pcrextend_imds_userdata_word(data, &word); + if (r < 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.PCRExtend"); + if (r < 0) + return r; + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = sd_varlink_callbo( + vl, + "io.systemd.PCRExtend.Extend", + /* ret_reply= */ NULL, + &error_id, + SD_JSON_BUILD_PAIR_INTEGER("pcr", 12), + SD_JSON_BUILD_PAIR_STRING("text", word), + SD_JSON_BUILD_PAIR_STRING("eventType", "imds_userdata")); + if (r < 0) + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %m"); + if (error_id) { + r = sd_varlink_error_to_errno(error_id, reply); + if (r != -EBADR) + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %m"); + + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %s", error_id); + } + + log_debug("Measurement of '%s' into PCR 12 completed.", word); + return 1; +#else + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM2 support disabled, not measuring IMDS userdata."); +#endif +} diff --git a/src/shared/pcrextend-util.h b/src/shared/pcrextend-util.h index 00bc5b9b48dc7..eadc2d5cffc98 100644 --- a/src/shared/pcrextend-util.h +++ b/src/shared/pcrextend-util.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include + int pcrextend_file_system_word(const char *path, char **ret, char **ret_normalized_path); int pcrextend_machine_id_word(char **ret); int pcrextend_product_id_word(char **ret); int pcrextend_verity_word(const char *name, const struct iovec *root_hash, const struct iovec *root_hash_sig, char **ret); +int pcrextend_imds_userdata_word(const struct iovec *data, char **ret); -int pcrextend_verity_now(const char *name, const struct iovec *root_hash,const struct iovec *root_hash_sig); +int pcrextend_verity_now(const char *name, const struct iovec *root_hash, const struct iovec *root_hash_sig); +int pcrextend_imds_userdata_now(const struct iovec *data); diff --git a/units/meson.build b/units/meson.build index 782d1ecadfbe4..ca17237dd0b16 100644 --- a/units/meson.build +++ b/units/meson.build @@ -404,6 +404,10 @@ units = [ 'file' : 'systemd-imds-early-network.service.in', 'conditions' : ['ENABLE_IMDS'], }, + { + 'file' : 'systemd-imds-import.service.in', + 'conditions' : ['ENABLE_IMDS'], + }, { 'file' : 'systemd-importd.service.in', 'conditions' : ['ENABLE_IMPORTD'], diff --git a/units/systemd-imds-import.service.in b/units/systemd-imds-import.service.in new file mode 100644 index 0000000000000..9704557fbb5bf --- /dev/null +++ b/units/systemd-imds-import.service.in @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Import System Credentials from IMDS +Documentation=man:systemd-imds(1) +Documentation=man:systemd.system-credentials(7) +DefaultDependencies=no +Wants=systemd-imdsd.socket network-online.target +After=systemd-imdsd.socket network-online.target +Before=sysinit.target systemd-firstboot.service +Conflicts=shutdown.target +Before=shutdown.target +ConditionPathExists=/etc/initrd-release + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart={{LIBEXECDIR}}/systemd-imds --import From a2fab8a3e460e59e57913faf3858a891d304611f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 4 Mar 2026 15:16:14 +0100 Subject: [PATCH 15/18] imds: add generator that hooks in IMDS logic on cloud guests --- man/rules/meson.build | 1 + man/systemd-imds-generator.xml | 104 +++++++++++++++++ src/imds/imds-generator.c | 197 +++++++++++++++++++++++++++++++++ src/imds/meson.build | 4 + 4 files changed, 306 insertions(+) create mode 100644 man/systemd-imds-generator.xml create mode 100644 src/imds/imds-generator.c diff --git a/man/rules/meson.build b/man/rules/meson.build index 0886efaacb0e7..376681709ca0a 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1025,6 +1025,7 @@ manpages = [ ['systemd-hostnamed.service', '8', ['systemd-hostnamed'], 'ENABLE_HOSTNAMED'], ['systemd-hwdb', '8', [], 'ENABLE_HWDB'], ['systemd-id128', '1', [], ''], + ['systemd-imds-generator', '8', [], 'ENABLE_IMDS'], ['systemd-imds', '1', ['systemd-imds-import.service'], 'ENABLE_IMDS'], ['systemd-imdsd@.service', '8', diff --git a/man/systemd-imds-generator.xml b/man/systemd-imds-generator.xml new file mode 100644 index 0000000000000..6e1a3475caba5 --- /dev/null +++ b/man/systemd-imds-generator.xml @@ -0,0 +1,104 @@ + + + + + + + + systemd-imds-generator + systemd + + + + systemd-imds-generator + 8 + + + + systemd-imds-generator + Generator to automatically enable IMDS on supporting environments + + + + /usr/lib/systemd/system-generators/systemd-imds-generator + + + + Description + + systemd-imds-generator is a generator that enables IMDS (Instance Metadata + Service) functionality at boot on systems that support it. Specifically it does three things: + + + It pulls the systemd-imdsd@.service unit into the initial + transaction, which provides IMDS access to local applications via Varlink IPC. + + It pulls the systemd-imds-early-network.service unit into the + initial transaction, which generates a suitable + systemd.network5 + network configuration file that allows early-boot network access to the IMDS + functionality. + + It pulls the systemd-imds-import.service unit into the initial + transaction, which automatically imports various credentials from IMDS into the local system, storing + them in /run/credstore/. + + + By default, whether to pull in these services or not is decided based on + hwdb7 information, + that detects various IMDS environments automatically. However, this logic may be overridden via + systemd.imds=, see below. + + systemd-imds-generator implements + systemd.generator7. + + + + Kernel Command Line + + systemd-imds-generator understands the following kernel command line + parameters: + + + + + systemd.imds= + + Takes a boolean argument, and may be used to enable or disable the IMDS logic. Note that this + controls only whether the relevant services (as listed above) are automatically pulled into the + initial transaction, it has no effect if some other unit or the user explicitly activate the + relevant units. If this option is not used automatic detection of IMDS is used, see above. + + + + + + + + + systemd.imds.import= + + Takes a boolean argument. If false the systemd-imds-import.service (see + above) is not pulled into the initial transaction, i.e. no credentials are imported from + IMDS. Defaults to true. + + + + + + + + + + See Also + + systemd1 + systemd-imds1 + systemd-imdsd@.service8 + systemd.system-credentials7 + + + + diff --git a/src/imds/imds-generator.c b/src/imds/imds-generator.c new file mode 100644 index 0000000000000..ea3c5b8c614f3 --- /dev/null +++ b/src/imds/imds-generator.c @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-hwdb.h" + +#include "dropin.h" +#include "fileio.h" +#include "generator.h" +#include "imds-util.h" +#include "log.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "special.h" +#include "string-util.h" +#include "virt.h" + +static int arg_enabled = -1; /* Whether we shall offer local IMDS APIs */ +static bool arg_import = true; /* Whether we shall import IMDS credentials, SSH keys, … into the local system */ +static ImdsNetworkMode arg_network_mode = IMDS_NETWORK_LOCKED; + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + int r; + + assert(key); + + if (proc_cmdline_key_streq(key, "systemd.imds")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = parse_tristate_full(value, "auto", &arg_enabled); + if (r < 0) + return log_warning_errno(r, "Failed to parse systemd.imds= value: %m"); + + } else if (proc_cmdline_key_streq(key, "systemd.imds.import")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = parse_boolean(value); + if (r < 0) + return log_warning_errno(r, "Failed to parse systemd.imds.import= value: %m"); + + arg_import = r; + } else if (proc_cmdline_key_streq(key, "systemd.imds.network")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + ImdsNetworkMode m = imds_network_mode_from_string(value); + if (m < 0) + return log_warning_errno(m, "Failed to parse systemd.imds.network= value: %m"); + + arg_network_mode = m; + } + + return 0; +} + +static int smbios_get_modalias(char **ret) { + int r; + + assert(ret); + + _cleanup_free_ char *modalias = NULL; + r = read_virtual_file("/sys/devices/virtual/dmi/id/modalias", SIZE_MAX, &modalias, /* ret_size= */ NULL); + if (r < 0) + return r; + + truncate_nl(modalias); + + /* To detect Azure we need to check the chassis assert tag. Unfortunately the kernel does not include + * it in the modalias string right now. Let's hence append it manually. This matches similar logic in + * rules.d/60-dmi-id.rules. */ + _cleanup_free_ char *cat = NULL; + r = read_virtual_file("/sys/devices/virtual/dmi/id/chassis_asset_tag", SIZE_MAX, &cat, /* ret_size= */ NULL); + if (r < 0) + log_debug_errno(r, "Failed to read chassis asset tag, ignoring: %m"); + else { + truncate_nl(cat); + + if (!string_has_cc(cat, /* ok= */ NULL) && !isempty(cat) && !strextend(&modalias, "cat", cat, ":")) + return -ENOMEM; + } + + log_debug("Constructed SMBIOS modalias string: %s", modalias); + *ret = TAKE_PTR(modalias); + return 0; +} + +static int smbios_query(void) { + int r; + + /* Let's check whether the DMI device's hwdb data suggests IMDS support is available. Note, we cannot + * ask udev for this, before we typically run long before udev. Hence we'll do the hwdb lookup via + * sd-hwdb directly. */ + + _cleanup_free_ char *modalias = NULL; + r = smbios_get_modalias(&modalias); + if (r == -ENOENT) { + log_debug("No DMI device found, assuming IMDS is not available."); + return false; + } + if (r < 0) + return log_error_errno(r, "Failed to read DMI modalias: %m"); + + _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL; + r = sd_hwdb_new(&hwdb); + if (r < 0) + return log_error_errno(r, "Failed to open hwdb: %m"); + + r = sd_hwdb_seek(hwdb, modalias); + if (r == -ENOENT) { + log_debug("No hwdb data for DMI device found, assuming IMDS is off."); + return false; + } + if (r < 0) + return log_error_errno(r, "Failed to seek in hwdb for '%s': %m", modalias); + + for (;;) { + const char *key, *value; + r = sd_hwdb_enumerate(hwdb, &key, &value); + if (r < 0) + return log_error_errno(r, "Failed to enumerate hwdb entry for '%s': %m", modalias); + if (r == 0) + break; + + if (streq(key, "IMDS_VENDOR")) + return true; + } + + log_debug("IMDS_VENDOR= property for DMI device not set, assuming IMDS is not available."); + return false; +} + +static int run(const char *dest, const char *dest_early, const char *dest_late) { + int r; + + r = proc_cmdline_parse(parse_proc_cmdline_item, /* userdata= */ NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + if (r < 0) + log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); + + if (arg_enabled < 0) { + Virtualization v = detect_container(); + if (v < 0) + log_debug_errno(v, "Container detection failed, ignoring: %m"); + if (v > 0) { + log_debug("Running in a container, disabling IMDS logic."); + arg_enabled = false; + } else { + r = smbios_query(); + if (r < 0) + return r; + } + + arg_enabled = r > 0; + } + + if (!arg_enabled) { + log_debug("IMDS not enabled, skipping generator."); + return 0; + } + + log_info("IMDS support enabled, pull in IMDS units."); + + /* Enable IMDS early networking, so that we can actually reach the IMDS server. */ + if (arg_network_mode != IMDS_NETWORK_OFF) { + r = generator_add_symlink(dest_early, SPECIAL_SYSINIT_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imds-early-network.service"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imds-early-network.service: %m"); + } + + /* Enable the IMDS service socket */ + r = generator_add_symlink(dest_early, SPECIAL_SOCKETS_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imdsd.socket"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imdsd.socket: %m"); + + /* We now know the SMBIOS device exists, hence it's safe now to order the IMDS service after it, so + * that it has all properties properly initialized. */ + r = write_drop_in( + dest_early, + "systemd-imdsd@.service", + 50, "dmi-id", + "# Automatically generated by systemd-imds-generator\n\n" + "[Unit]\n" + "Wants=sys-devices-virtual-dmi-id.device\n" + "After=sys-devices-virtual-dmi-id.device\n"); + if (r < 0) + return log_error_errno(r, "Failed to hook DMI id device before systemd-imdsd@.service: %m"); + + if (arg_import) { + /* Enable that we import IMDS data */ + r = generator_add_symlink(dest_early, SPECIAL_SYSINIT_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imds-import.service"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imds-import.service: %m"); + } + + return 0; +} + +DEFINE_MAIN_GENERATOR_FUNCTION(run); diff --git a/src/imds/meson.build b/src/imds/meson.build index 2f46d04df75a0..82f41a10cd314 100644 --- a/src/imds/meson.build +++ b/src/imds/meson.build @@ -16,6 +16,10 @@ executables += [ 'public' : true, 'sources' : files('imds-tool.c', 'imds-util.c'), }, + generator_template + { + 'name' : 'systemd-imds-generator', + 'sources' : files('imds-generator.c', 'imds-util.c'), + }, ] install_data( From bcee65e62bbc162f67b820d87fc89605f244c4e8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 6 Mar 2026 17:31:10 +0100 Subject: [PATCH 16/18] test: add simple integration test for systemd-imdsd --- mkosi/mkosi.sanitizers/mkosi.postinst | 1 + .../TEST-74-AUX-UTILS.units/fake-imds.py | 51 ++++++++++++++++ test/units/TEST-74-AUX-UTILS.imds.sh | 60 +++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100755 test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py create mode 100755 test/units/TEST-74-AUX-UTILS.imds.sh diff --git a/mkosi/mkosi.sanitizers/mkosi.postinst b/mkosi/mkosi.sanitizers/mkosi.postinst index d4d00907ed07f..7e836e7ab7bf8 100755 --- a/mkosi/mkosi.sanitizers/mkosi.postinst +++ b/mkosi/mkosi.sanitizers/mkosi.postinst @@ -43,6 +43,7 @@ fi wrap=( /usr/lib/polkit-1/polkitd /usr/libexec/polkit-1/polkitd + /usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py agetty btrfs capsh diff --git a/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py new file mode 100755 index 0000000000000..1685becfbfd3a --- /dev/null +++ b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py @@ -0,0 +1,51 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: LGPL-2.1-or-later + +import os, socket +from http.server import BaseHTTPRequestHandler, HTTPServer + +def sd_notify(state: str) -> bool: + notify_socket = os.environ.get("NOTIFY_SOCKET") + if not notify_socket: + return False + if notify_socket.startswith("@"): + notify_socket = "\0" + notify_socket[1:] + try: + with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock: + sock.sendto(state.encode(), notify_socket) + except OSError: + return False + + return True + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/userdata": + body = b"{\"systemd.credentials\":[{\"name\":\"acredtest\",\"text\":\"avalue\"}]}" + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + elif self.path == "/hostname": + body = b"piff" + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + else: + self.send_error(404) + + def log_message(self, fmt, *args): + print(f"{self.address_string()} - {fmt % args}") + +PORT=8088 + +server = HTTPServer(("", PORT), Handler) +print(f"Serving on http://localhost:{PORT}/") +try: + sd_notify("READY=1") + server.serve_forever() +except KeyboardInterrupt: + print("\nStopped.") diff --git a/test/units/TEST-74-AUX-UTILS.imds.sh b/test/units/TEST-74-AUX-UTILS.imds.sh new file mode 100755 index 0000000000000..533501a8a77e8 --- /dev/null +++ b/test/units/TEST-74-AUX-UTILS.imds.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + + +if ! test -x /usr/lib/systemd/systemd-imdsd ; then + echo "No imdsd installed, skipping test." + exit 0 +fi + +at_exit() { + set +e + systemctl stop fake-imds systemd-imdsd.socket ||: + ip link del dummy0 ||: + rm -f /tmp/fake-imds.py /run/credstore/firstboot.hostname /run/credstore/acredtest /run/systemd/system/systemd-imdsd@.service.d/50-env.conf +} + +trap at_exit EXIT + +systemd-run -p Type=notify --unit=fake-imds /usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py +systemctl status fake-imds + +# Add a fake network interface so that IMDS gets going +ip link add dummy0 type dummy +ip addr add 192.168.47.11/24 dev dummy0 + +USERDATA='{"systemd.credentials":[{"name":"acredtest","text":"avalue"}]}' + +# First try imdsd directly +IMDSD="/usr/lib/systemd/systemd-imdsd --vendor=test --data-url=http://192.168.47.11:8088 --well-known-key=userdata:/userdata --well-known-key=hostname:/hostname" +assert_eq "$($IMDSD --well-known=hostname)" "piff" +assert_eq "$($IMDSD --well-known=userdata)" "$USERDATA" +assert_eq "$($IMDSD /hostname)" "piff" +assert_eq "$($IMDSD /userdata)" "$USERDATA" + +# Then, try it as Varlink service +mkdir -p /run/systemd/system/systemd-imdsd@.service.d/ +cat >/run/systemd/system/systemd-imdsd@.service.d/50-env.conf < Date: Thu, 5 Mar 2026 11:36:03 +0100 Subject: [PATCH 17/18] update TODO --- TODO | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/TODO b/TODO index a069429636caf..0c28bced27106 100644 --- a/TODO +++ b/TODO @@ -123,6 +123,14 @@ Features: * start making use of the new --graceful switch to util-linux' umount command +* sysusers: allow specifying a path to an inode *and* a literal UID in the UID + column, so that if the inode exists it is used, and if not the literal UID is + used. Use this for services such as the imds one, which run under their own + UID in the initrd, and whose data should survive to the host, properly owned. + +* add service file setting to force the fwmark (a la SO_MARK) to some value, so + that we can allowlist certain services for imds this way. + * make systemd work nicely without /bin/sh, logins and associated shell tools around - add a small unit that just prints "boot complete" which we can pull in wherever we pull in getty@1.service, but is conditioned on /bin/sh being @@ -131,6 +139,8 @@ Features: - make sure debug shell service (sushell) has a nice failure mode, prints a message and reboots - varlink interface for "systemctl start" and friends +* imds: maybe do smarter api version handling + * drop NV_ORDERLY flag from the product uuid nvpcr. Effect of the flag is that it pushes the thing into TPM RAM, but a TPM usually has very little of that, less than NVRAM. hence setting the flag amplifies space issues. Unsetting the From 03b83c69c3dceba1e70c2322c374dd357c5eca24 Mon Sep 17 00:00:00 2001 From: Ronan Pigott Date: Fri, 13 Mar 2026 16:41:42 -0700 Subject: [PATCH 18/18] hwdb: add Oracle Cloud OCI IMDS data --- hwdb.d/40-imds.hwdb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hwdb.d/40-imds.hwdb b/hwdb.d/40-imds.hwdb index e098e20fbb8df..cd493678ad6c7 100644 --- a/hwdb.d/40-imds.hwdb +++ b/hwdb.d/40-imds.hwdb @@ -72,6 +72,19 @@ dmi:*:pnGoogleComputeEngine:* IMDS_KEY_IPV4_PUBLIC=/instance/network-interfaces/0/access-configs/0/external-ip IMDS_KEY_USERDATA_BASE=/instance/attributes +# https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/gettingmetadata.htm#metadata-keys +dmi:*:catOracleCloud.com:* + IMDS_VENDOR=oracle-cloud-oci + IMDS_DATA_URL=http://169.254.169.254/opc/v2 + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_ADDRESS_IPV6=fd00:c1::a9fe:a9fe + IMDS_EXTRA_HEADER=Authorization: Bearer Oracle + IMDS_KEY_HOSTNAME=/instance/hostname + IMDS_KEY_REGION=/instance/region + IMDS_KEY_ZONE=/instance/availabilityDomain + IMDS_KEY_SSH_KEY=/instance/metadata/ssh_authorized_keys + IMDS_KEY_USERDATA_BASE64=/metadata/user_data + # https://docs.hetzner.cloud/reference/cloud#description/server-metadata dmi:bvnHetzner:* IMDS_VENDOR=hetzner