diff --git a/TODO b/TODO index a069429636caf..0c28bced27106 100644 --- a/TODO +++ b/TODO @@ -123,6 +123,14 @@ Features: * start making use of the new --graceful switch to util-linux' umount command +* sysusers: allow specifying a path to an inode *and* a literal UID in the UID + column, so that if the inode exists it is used, and if not the literal UID is + used. Use this for services such as the imds one, which run under their own + UID in the initrd, and whose data should survive to the host, properly owned. + +* add service file setting to force the fwmark (a la SO_MARK) to some value, so + that we can allowlist certain services for imds this way. + * make systemd work nicely without /bin/sh, logins and associated shell tools around - add a small unit that just prints "boot complete" which we can pull in wherever we pull in getty@1.service, but is conditioned on /bin/sh being @@ -131,6 +139,8 @@ Features: - make sure debug shell service (sushell) has a nice failure mode, prints a message and reboots - varlink interface for "systemctl start" and friends +* imds: maybe do smarter api version handling + * drop NV_ORDERLY flag from the product uuid nvpcr. Effect of the flag is that it pushes the thing into TPM RAM, but a TPM usually has very little of that, less than NVRAM. hence setting the flag amplifies space issues. Unsetting the diff --git a/hwdb.d/40-imds.hwdb b/hwdb.d/40-imds.hwdb new file mode 100644 index 0000000000000..cd493678ad6c7 --- /dev/null +++ b/hwdb.d/40-imds.hwdb @@ -0,0 +1,106 @@ +# This file is part of systemd + +# This provides various properties that declare if and +# how IMDS is available on the local system, i.e. we are running in a major +# cloud service that provides something resembling AWS' or Azure's Instance +# Metadata Service. +# +# General IMDS endpoint data: +# IMDS_VENDOR= → Indicates IMDS is available, and which vendor it is +# IMDS_TOKEN_URL= → The URL to request an API token from. If not set, no API token is requested. +# IMDS_REFRESH_HEADER_NAME= → The HTTP request header field (everything before the ":") that contains the refresh TTL when requesting a token. +# IMDS_DATA_URL= → The base URL to request actual IMDS data fields from +# IMDS_DATA_URL_SUFFIX= → Parameters to suffix the URLs with +# IMDS_TOKEN_HEADER_NAME= → The HTTP request header field (everything before the ":") used to pass the token +# IMDS_EXTRA_HEADER=, IMDS_EXTRA_HEADER2=, IMDS_EXTRA_HEADER3, … +# → Additional HTTP headers to pass when requesting a data field (full header, including ":") +# IMDS_ADDRESS_IPV4= → IPv4 address of the IMDS server +# IMDS_ADDRESS_IPV6= → IPv6 address of the IMDS server +# +# Well-known IMDS keys: +# IMDS_KEY_HOSTNAME= → IMDS key for the hostname +# IMDS_KEY_REGION= → IMDS key for the region, if that concept applies +# IMDS_KEY_ZONE= → IMDS key for the zone{, if that concept applies +# IMDS_KEY_IPV4_PUBLIC= → IMDS key for the primary public IPv4 address if there is any +# IMDS_KEY_IPV6_PUBLIC= → IMDS key for the primary public IPv6 address if there is any +# IMDS_KEY_SSH_KEY= → IMDS key for an SSH public key to install in the root account +# IMDS_KEY_USERDATA= → IMDS key for arbitrary userdata (if there's only one) +# IMDS_KEY_USERDATA_BASE= → IMDS key for arbitrary userdata (if there are multiple, this is the common prefix) +# IMDS_KEY_USERDATA_BASE64= → IMDS key for arbitrary userdata (if there's only one, but it is base64 encoded) + +# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html +dmi:bvnAmazonEC2:* + IMDS_VENDOR=amazon-ec2 + IMDS_TOKEN_URL=http://169.254.169.254/latest/api/token + IMDS_REFRESH_HEADER_NAME=X-aws-ec2-metadata-token-ttl-seconds + IMDS_DATA_URL=http://169.254.169.254/latest + IMDS_TOKEN_HEADER_NAME=X-aws-ec2-metadata-token + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_ADDRESS_IPV6=fd00:ec2::254 + IMDS_KEY_HOSTNAME=/meta-data/hostname + IMDS_KEY_REGION=/meta-data/placement/region + IMDS_KEY_ZONE=/meta-data/placement/availability-zone + IMDS_KEY_IPV4_PUBLIC=/meta-data/public-ipv4 + IMDS_KEY_IPV6_PUBLIC=/meta-data/ipv6 + IMDS_KEY_SSH_KEY=/meta-data/public-keys/0/openssh-key + IMDS_KEY_USERDATA=/user-data + +# https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service#instance-metadata +dmi:*:cat7783-7084-3265-9085-8269-3286-77:* + IMDS_VENDOR=microsoft-azure + IMDS_DATA_URL=http://169.254.169.254/metadata + IMDS_DATA_URL_SUFFIX=?api-version=2025-04-07&format=text + IMDS_EXTRA_HEADER=Metadata: true + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/instance/compute/osProfile/computerName + IMDS_KEY_REGION=/instance/compute/location + IMDS_KEY_ZONE=/instance/compute/physicalZone + IMDS_KEY_IPV4_PUBLIC=/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress + IMDS_KEY_IPV6_PUBLIC=/instance/network/interface/0/ipv6/ipAddress/0/publicIpAddress + IMDS_KEY_SSH_KEY=/instance/compute/publicKeys/0/keyData + IMDS_KEY_USERDATA_BASE64=/instance/compute/userData + +# https://docs.cloud.google.com/compute/docs/metadata/predefined-metadata-keys +dmi:*:pnGoogleComputeEngine:* + IMDS_VENDOR=google-gcp + IMDS_DATA_URL=http://169.254.169.254/computeMetadata/v1 + IMDS_EXTRA_HEADER=Metadata-Flavor: Google + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/instance/hostname + IMDS_KEY_REGION=/instance/region + IMDS_KEY_ZONE=/instance/zone + IMDS_KEY_IPV4_PUBLIC=/instance/network-interfaces/0/access-configs/0/external-ip + IMDS_KEY_USERDATA_BASE=/instance/attributes + +# https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/gettingmetadata.htm#metadata-keys +dmi:*:catOracleCloud.com:* + IMDS_VENDOR=oracle-cloud-oci + IMDS_DATA_URL=http://169.254.169.254/opc/v2 + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_ADDRESS_IPV6=fd00:c1::a9fe:a9fe + IMDS_EXTRA_HEADER=Authorization: Bearer Oracle + IMDS_KEY_HOSTNAME=/instance/hostname + IMDS_KEY_REGION=/instance/region + IMDS_KEY_ZONE=/instance/availabilityDomain + IMDS_KEY_SSH_KEY=/instance/metadata/ssh_authorized_keys + IMDS_KEY_USERDATA_BASE64=/metadata/user_data + +# https://docs.hetzner.cloud/reference/cloud#description/server-metadata +dmi:bvnHetzner:* + IMDS_VENDOR=hetzner + IMDS_DATA_URL=http://169.254.169.254/hetzner/v1/metadata + IMDS_ADDRESS_IPV4=169.254.169.254 + IMDS_KEY_HOSTNAME=/hostname + IMDS_KEY_REGION=/region + IMDS_KEY_ZONE=/availability-zone + IMDS_KEY_IPV4_PUBLIC=/public-ipv4 + IMDS_KEY_SSH_KEY=/public-keys/0 + IMDS_KEY_USERDATA=/userdata + +# https://www.scaleway.com/en/docs/instances/how-to/use-cloud-init/ +dmi:*:svnScaleway:* + IMDS_VENDOR=scaleway + IMDS_DATA_URL=http://169.254.42.42 + IMDS_ADDRESS_IPV4=169.254.42.42 + IMDS_ADDRESS_IPV6=fd00:42::42 + IMDS_KEY_USERDATA=/user_data diff --git a/hwdb.d/meson.build b/hwdb.d/meson.build index 9ba73b21d6393..3299eaf8a75bf 100644 --- a/hwdb.d/meson.build +++ b/hwdb.d/meson.build @@ -19,6 +19,7 @@ hwdb_files_notest = files( hwdb_files_test = files( '20-dmi-id.hwdb', '20-net-ifname.hwdb', + '40-imds.hwdb', '60-autosuspend.hwdb', '60-autosuspend-fingerprint-reader.hwdb', '60-evdev.hwdb', diff --git a/hwdb.d/parse_hwdb.py b/hwdb.d/parse_hwdb.py index e98510839b73f..e70b0ff04e94e 100755 --- a/hwdb.d/parse_hwdb.py +++ b/hwdb.d/parse_hwdb.py @@ -125,7 +125,7 @@ def hwdb_grammar(): matchline = (matchline_typed | matchline_general) + EOL propertyline = (White(' ', exact=1).suppress() + - Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/')) + Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/?&')) - Optional(pythonStyleComment)) + EOL) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL @@ -215,6 +215,24 @@ def property_grammar(): ('ID_NET_NAME_FROM_DATABASE', name_literal), ('ID_NET_NAME_INCLUDE_DOMAIN', zero_one), ('TPM2_BROKEN_NVPCR', zero_one), + ('IMDS_VENDOR', name_literal), + ('IMDS_TOKEN_URL', name_literal), + ('IMDS_REFRESH_HEADER_NAME', name_literal), + ('IMDS_DATA_URL', name_literal), + ('IMDS_DATA_URL_SUFFIX', name_literal), + ('IMDS_TOKEN_HEADER_NAME', name_literal), + ('IMDS_EXTRA_HEADER', name_literal), + ('IMDS_ADDRESS_IPV4', name_literal), + ('IMDS_ADDRESS_IPV6', name_literal), + ('IMDS_KEY_HOSTNAME', name_literal), + ('IMDS_KEY_REGION', name_literal), + ('IMDS_KEY_ZONE', name_literal), + ('IMDS_KEY_IPV4_PUBLIC', name_literal), + ('IMDS_KEY_IPV6_PUBLIC', name_literal), + ('IMDS_KEY_SSH_KEY', name_literal), + ('IMDS_KEY_USERDATA', name_literal), + ('IMDS_KEY_USERDATA_BASE', name_literal), + ('IMDS_KEY_USERDATA_BASE64', name_literal), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml index 4da1796a97ca2..98673e0a51674 100644 --- a/man/kernel-command-line.xml +++ b/man/kernel-command-line.xml @@ -793,6 +793,19 @@ + + systemd.imds= + systemd.imds.*= + + Controls various Instance Metadata Service (IMDS) cloud aspects, see + systemd-imdsd@.service8 + and + systemd-imds-generator8 + for details. + + + + diff --git a/man/rules/meson.build b/man/rules/meson.build index d69793150be97..376681709ca0a 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1025,6 +1025,14 @@ manpages = [ ['systemd-hostnamed.service', '8', ['systemd-hostnamed'], 'ENABLE_HOSTNAMED'], ['systemd-hwdb', '8', [], 'ENABLE_HWDB'], ['systemd-id128', '1', [], ''], + ['systemd-imds-generator', '8', [], 'ENABLE_IMDS'], + ['systemd-imds', '1', ['systemd-imds-import.service'], 'ENABLE_IMDS'], + ['systemd-imdsd@.service', + '8', + ['systemd-imdsd', + 'systemd-imdsd-early-network.service', + 'systemd-imdsd.socket'], + 'ENABLE_IMDS'], ['systemd-import-generator', '8', [], ''], ['systemd-importd.service', '8', ['systemd-importd'], 'ENABLE_IMPORTD'], ['systemd-inhibit', '1', [], ''], @@ -1260,6 +1268,7 @@ manpages = [ ['systemd.pcrlock', '5', ['systemd.pcrlock.d'], ''], ['systemd.preset', '5', [], ''], ['systemd.resource-control', '5', [], ''], + ['systemd.rr', '5', [], 'ENABLE_RESOLVE'], ['systemd.scope', '5', [], ''], ['systemd.service', '5', [], ''], ['systemd.slice', '5', [], ''], diff --git a/man/systemd-firstboot.xml b/man/systemd-firstboot.xml index 86a85f0bf2855..8d5e9a724d4dd 100644 --- a/man/systemd-firstboot.xml +++ b/man/systemd-firstboot.xml @@ -447,6 +447,16 @@ + + + firstboot.hostname + + This credential specifies the static system hostname setting to set during first + boot, in place of prompting the user. Note that this controls the static hostname, not the transient + hostname, and only has an effect on first boot, unlike system.hostname. + + + Note that by default the systemd-firstboot.service unit file is set up to diff --git a/man/systemd-imds-generator.xml b/man/systemd-imds-generator.xml new file mode 100644 index 0000000000000..6e1a3475caba5 --- /dev/null +++ b/man/systemd-imds-generator.xml @@ -0,0 +1,104 @@ + + + + + + + + systemd-imds-generator + systemd + + + + systemd-imds-generator + 8 + + + + systemd-imds-generator + Generator to automatically enable IMDS on supporting environments + + + + /usr/lib/systemd/system-generators/systemd-imds-generator + + + + Description + + systemd-imds-generator is a generator that enables IMDS (Instance Metadata + Service) functionality at boot on systems that support it. Specifically it does three things: + + + It pulls the systemd-imdsd@.service unit into the initial + transaction, which provides IMDS access to local applications via Varlink IPC. + + It pulls the systemd-imds-early-network.service unit into the + initial transaction, which generates a suitable + systemd.network5 + network configuration file that allows early-boot network access to the IMDS + functionality. + + It pulls the systemd-imds-import.service unit into the initial + transaction, which automatically imports various credentials from IMDS into the local system, storing + them in /run/credstore/. + + + By default, whether to pull in these services or not is decided based on + hwdb7 information, + that detects various IMDS environments automatically. However, this logic may be overridden via + systemd.imds=, see below. + + systemd-imds-generator implements + systemd.generator7. + + + + Kernel Command Line + + systemd-imds-generator understands the following kernel command line + parameters: + + + + + systemd.imds= + + Takes a boolean argument, and may be used to enable or disable the IMDS logic. Note that this + controls only whether the relevant services (as listed above) are automatically pulled into the + initial transaction, it has no effect if some other unit or the user explicitly activate the + relevant units. If this option is not used automatic detection of IMDS is used, see above. + + + + + + + + + systemd.imds.import= + + Takes a boolean argument. If false the systemd-imds-import.service (see + above) is not pulled into the initial transaction, i.e. no credentials are imported from + IMDS. Defaults to true. + + + + + + + + + + See Also + + systemd1 + systemd-imds1 + systemd-imdsd@.service8 + systemd.system-credentials7 + + + + diff --git a/man/systemd-imds.xml b/man/systemd-imds.xml new file mode 100644 index 0000000000000..b0d1f5297ed12 --- /dev/null +++ b/man/systemd-imds.xml @@ -0,0 +1,171 @@ + + + + + + + + systemd-imds + systemd + + + + systemd-imds + 1 + + + + systemd-imds + systemd-imds-import.service + Cloud IMDS (Instance Metadata Service) tool + + + + systemd-imds-import.service + + systemd-imds OPTIONS KEY + + + + + Description + + systemd-imds is a tool for acquiring data from IMDS (Instance Metadata Service), + as provided in many cloud environments. It is a client to + systemd-imdsd@.service8, + and provides access to IMDS data from shell environments. + + The tool can operate in one of four modes: + + + Without positional arguments (and without the switch) + general IMDS service data and a few well known fields are displayed in human friendly + form. + + With a positional argument (and without ) the IMDS data + referenced by the specified key is acquired and written to standard output, in unprocessed form. IMDS + keys are the part of the IMDS acquisition URL that are suffixed to the base URL. IMDS keys must begin + with a slash (/). Note that IMDS keys are typically implementation + specific. + + With the option specified (see below), the indicated + well-known field is written to standard output, in unprocessed form. The concept of well-known fields + abstracts IMDS implementation differences to some level, exposing a unified interface for IMDS fields + that typically exist on many different implementations, but under implementation-specific + keys. + + With the option specified (see below) the "userdata" + provided via IMDS is written to standard output. Under the hood this is similar to + , or + . Each of the three is tried in turn (in this order), and + the first available data is returned. For the + systemd-userdata userdata item is requested. For + the returned data is automatically + Base64-decoded. + + + + + Options and Commands + + + + + + + Takes one of hostname, region, + zone, ipv4-public, ipv6-public, + ssh-key, userdata, userdata-base, + userdata-base64. Acquires a specific "well-known" field from IMDS. Many of + these fields are commonly supported by various IMDS implementations, but typically some fields are + not. Note that if is used an additional subkey must be + specified as positional argument, which encodes the specific userdata item to + acquire. + + + + + + + + Takes a time in seconds as argument, and indicates the required "freshness" of the + data, in case cached data is used. + + + + + + + + Takes a boolean. If set to false local caching of IMDS is disabled, and the data is + always acquired fresh from the IMDS endpoint. + + + + + + + + + Acquire this instance's IMDS user data, if available. See above for + details. + + + + + + + + Acquires IMDS data and writes relevant fields as credentials to + /run/credstore/. This currently covers: + + + If the IMDS user data is a valid JSON object containing a field + systemd.credentials (with a JSON array as value) it is processed, importing + arbitrary credentials listed in the array. Each array item must have a name + field indicating the credential name. It may have one text, + data or encrypted field, containing the credential data. If + text is used the value shall be a literal string of the credential value. If + data is used the value may be arbitrary binary data encoded in a Base64 + string. If encrypted is used the value shall be a Base64 encoded encrypted + credential. See + systemd.system-credentials7 + for information about credentials that may be imported this way. + + If the well-known ssh-key field is available, its value will be + imported into the ssh.authorized_keys credential. + + If the well-known hostname field is available, its value will be + imported into the firstboot.hostname credential. + + + This command is invoked by the systemd-imds-import.service run at + boot. + + + + + + + + + + + Exit status + + On success, 0 is returned, a non-zero failure code otherwise. + + + + See Also + + systemd1 + systemd-imdsd@.service8 + systemd-imds-generator8 + systemd.system-credentials7 + + + + diff --git a/man/systemd-imdsd@.service.xml b/man/systemd-imdsd@.service.xml new file mode 100644 index 0000000000000..a92b0ef98ac92 --- /dev/null +++ b/man/systemd-imdsd@.service.xml @@ -0,0 +1,226 @@ + + + + + + + + systemd-imdsd@.service + systemd + + + + systemd-imdsd@.service + 8 + + + + systemd-imdsd@.service + systemd-imdsd + systemd-imdsd.socket + systemd-imdsd-early-network.service + Cloud IMDS (Instance Metadata Service) client + + + + systemd-imdsd@.service + systemd-imdsd.socket + systemd-imdsd-early-network.service + /usr/lib/systemd/systemd-imdsd + + + + Description + + systemd-imdsd@.service is a system service that provides local access to IMDS + (Instance Metadata Service; or equivalent) functionality, as provided by many public clouds. + + The + systemd-imds1 tool may + be used to query information from IMDS from shell environments. + + The service provides a Varlink IPC interface via + /run/systemd/io.systemd.InstanceMetadata to query IMDS fields. + + systemd-imdsd-early-network.service is a system service that generates a + systemd-networkd.service8 + compatible + systemd.network5 file + for configuring the early-boot network in order to be able to contact the IMDS endpoint. + + + + Kernel Command Line Options + + The IMDS endpoint is typically determined automatically via + hwdb7 records, but can + also be configured explicitly via the kernel command line, via the following options: + + + + systemd.imds.network= + + Takes one of off, locked, + unlocked. Controls whether and how to set up networking for IMDS endpoint + access. Unless set to off early boot networking is enabled, ensuring that the + IMDS endpoint can be reached. If set to locked (the default, if not configured + otherwise) direct access to the IMDS endpoint by regular unprivileged processes is disabled via a + "prohibit" route, so that any access must be done through + systemd-imdsd@.service or its associated tools. If set to + unlocked this "prohibit" route is not created, and regular unprivileged + processes can directly contact IMDS. + + + + + + + systemd.imds.vendor= + + A short string identifying the cloud vendor. + + Example: systemd.imds.vendor=foobarcloud + + + + + + + systemd.imds.token_url= + + If a bearer token must be acquired to talk to the IMDS service, this is the URL to acquire it + from. + + + + + + + systemd.imds.refresh_header_name= + + Takes a HTTP header field name (excluding the :) that declares the header + field for passing the TTL value (in seconds) to the HTTP server when acquiring a token. Only + applies if systemd.imds.token_url= is set too. + + + + + + + systemd.imds.data_url= + + Takes the base URL to acquire the IMDS data from (the IMDS "endpoint"). All data fields are + acquired from below this URL. This URL should typically not end in /. + + The data URLs are concatenated from this base URL, the IMDS "key" and the suffix configured + via systemd.imds.data_url_suffix= below. Well-known IMDS "keys" can be + configured via the systemd.imds.key=* options below. + + Example: systemd.imds.data_url=http://169.254.169.254/metadata + + + + + + + systemd.imds.data_url_suffix= + + If specified, this field is appended to the end of the data URL (after appending the IMDS + "key" to the data base URL), see above. + + Example: systemd.imds.data_url_suffix=?api-version=2025-04-07&format=text + + + + + + + systemd.imds.token_header_name= + + Takes a HTTP header field name (excluding the :) that declares the header + field to pass the bearer token acquired from the token URL (see above) in. Only applies if + systemd.imds.token_url= is set too. + + + + + + + systemd.imds.extra_header= + + Takes a full HTTP header expression (both field name and value, separated by a colon + :) to pass to the HTTP server when requesting data. May be used multiple times + to set multiple headers. + + Example: systemd.imds.extra_header=Metadata:true + + + + + + + systemd.imds.ipv4_address= + + Configures the IPv4 address the IMDS endpoint is contacted on. This should typically be the + IP address also configured via systemd.imds.data_url= (if IPv4 is used) and is + used to set up IP routing. + + Example: systemd.imds.ipv4_address=169.254.169.254 + + + + + + + systemd.imds.ipv6_address= + + Configures the IPv6 address the IMDS endpoint is contacted on. This should typically be the + IP address also configured via systemd.imds.data_url= (if IPv6 is used) and is + used to set up IP routing. + + + + + + + systemd.imds.key.hostname= + systemd.imds.key.region= + systemd.imds.key.zone= + systemd.imds.key.ipv4_public= + systemd.imds.key.ipv6_public= + systemd.imds.key.ssh_key= + systemd.imds.key.userdata= + systemd.imds.key.userdata_base= + systemd.imds.key.userdata_base64= + + Configures strings to concatenate to the data base URL (see above) to acquire data for + various "well-known" fields. These strings must begin with a /. They should + return the relevant data in plain text. + + A special case are the three "userdata" keys: the option + systemd.imds.key.userdata_base= should be used if the IMDS service knows a + concept of multiple userdata fields, and a field identifier thus still needs to be appended to the + userdata base URL. The option systemd.imds.key.userdata= should be used if only + a single userdata field is supported. The option systemd.imds.key.userdata64= + should be used in the same case, but only if the userdata field is encoded in Base64. + + Example: systemd.imds.key.hostname=/instance/compute/osProfile/computerName + + + + + + + + + See Also + + systemd1 + systemd-imds1 + systemd-imds-generator8 + systemd-networkd.service8 + + + + diff --git a/man/systemd-resolved.service.xml b/man/systemd-resolved.service.xml index a5ab48d2fa05c..f4a542162a53c 100644 --- a/man/systemd-resolved.service.xml +++ b/man/systemd-resolved.service.xml @@ -510,6 +510,7 @@ search foobar.com barbar.com systemd1 resolved.conf5 systemd.dns-delegate5 + systemd.rr5 systemd.dnssd5 dnssec-trust-anchors.d5 nss-resolve8 diff --git a/man/systemd.rr.xml b/man/systemd.rr.xml new file mode 100644 index 0000000000000..58a6f76bd5b9e --- /dev/null +++ b/man/systemd.rr.xml @@ -0,0 +1,92 @@ + + + + + + + + systemd.rr + systemd + + + + systemd.rr + 5 + + + + systemd.rr + Local static DNS resource record definitions + + + + + /etc/systemd/resolve/static.d/*.rr + /run/systemd/resolve/static.d/*.rr + /usr/lib/systemd/resolve/static.d/*.rr + + + + + Description + + *.rr files may be used to define resource record sets ("RRsets") that shall be + resolvable locally, similar in style to address records defined by /etc/hosts (see + hosts5 for + details). These files are read by + systemd-resolved.service8, + and are used to synthesize local responses to local queries matching the defined resource record set. + + These drop-in files are in JSON format. Each file either may contain a single top-level DNS RR + object, or an array of one or more DNS RR objects. Each RR object has at least a key + subobject consisting of name string field and a type integer field + (which contains the RR type in numeric form). Depending on the chosen type the RR object also has the + following fields: + + + For A/AAAA RRs, the RR object should have an address + address field set to an array consisting of 4 or 16 8bit unsigned integers for the IP + address. + + For PTR/NS/CNAME/DNAME RRs, the RR object should have a name field + set to the name the record shall point to. + + + This JSON serialization of DNS RRs matches the one returned by resolvectl. + + Currently no other RR types are supported. + + + + Examples + + Consider a file /run/systemd/resolve/static.d/foobar.rr: + + +{ + "key" : { + "type" : 1, + "name" : "foobar.example.com" + }, + "address" : [ 192, 168, 100, 1 ] +} + + This ensures that local address lookups for foobar.example.com will resolve to the + 192.168.100.1 IPv4 address. + + + + + See Also + + systemd1 + systemd-resolved.service8 + hosts5 + resolvectl1 + + + + diff --git a/man/systemd.system-credentials.xml b/man/systemd.system-credentials.xml index e3e2887207784..4ea8c6a14c9ef 100644 --- a/man/systemd.system-credentials.xml +++ b/man/systemd.system-credentials.xml @@ -44,7 +44,7 @@ firstboot.keymap - The console key mapping to set (e.g. de). Read by + The console key mapping to set (e.g. de). Read by systemd-firstboot1, and only honoured if no console keymap has been configured before. @@ -52,6 +52,20 @@ + + firstboot.hostname + + This credential specifies the static system hostname setting to set during first + boot, in place of prompting the user. Note that this controls the static hostname, not the transient + hostname, and only has an effect on first boot, unlike system.hostname (see + below). Read by + systemd-firstboot1 + and only honoured if no static hostname has been configured before. + + + + + firstboot.locale firstboot.locale-messages @@ -398,9 +412,10 @@ system.hostname Accepts a (transient) hostname to configure during early boot. The static hostname specified - in /etc/hostname, if configured, takes precedence over this setting. - Interpreted by the service manager (PID 1). For details see - systemd1. + in /etc/hostname, if configured, takes precedence over this setting. + Interpreted by the service manager (PID 1). For details see + systemd1. Also + see firstboot.hostname above. diff --git a/meson.build b/meson.build index 3672005d75b17..612aff951991d 100644 --- a/meson.build +++ b/meson.build @@ -895,6 +895,7 @@ foreach option : ['adm-gid', 'video-gid', 'wheel-gid', 'systemd-journal-gid', + 'systemd-imds-uid', 'systemd-network-uid', 'systemd-resolve-uid', 'systemd-timesync-uid'] @@ -1528,6 +1529,11 @@ conf.set('DEFAULT_DNSSEC_MODE', 'DNSSEC_' + default_dnssec.underscorify().to_upper()) conf.set_quoted('DEFAULT_DNSSEC_MODE_STR', default_dnssec) +have = get_option('imds').require( + conf.get('HAVE_LIBCURL') == 1, + error_message : 'curl required').allowed() +conf.set10('ENABLE_IMDS', have) + have = get_option('importd').require( conf.get('HAVE_LIBCURL') == 1 and conf.get('HAVE_OPENSSL') == 1 and @@ -2363,6 +2369,7 @@ subdir('src/home') subdir('src/hostname') subdir('src/hwdb') subdir('src/id128') +subdir('src/imds') subdir('src/import') subdir('src/integritysetup') subdir('src/journal') @@ -3134,6 +3141,7 @@ foreach tuple : [ ['homed'], ['hostnamed'], ['hwdb'], + ['imds'], ['importd'], ['initrd'], ['kernel-install'], diff --git a/meson_options.txt b/meson_options.txt index c1af7ce237492..7835f716662d9 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -142,6 +142,8 @@ option('timedated', type : 'boolean', description : 'install the systemd-timedated daemon') option('timesyncd', type : 'boolean', description : 'install the systemd-timesyncd daemon') +option('imds', type : 'feature', + description : 'install the systemd-imds stack') option('journal-storage-default', type : 'combo', choices : ['persistent', 'auto', 'volatile', 'none'], description : 'default storage mode for journald (main namespace)') option('remote', type : 'feature', deprecated : { 'true' : 'enabled', 'false' : 'disabled' }, @@ -334,6 +336,8 @@ option('systemd-resolve-uid', type : 'integer', value : 0, description : 'soft-static allocation for the systemd-resolve user') option('systemd-timesync-uid', type : 'integer', value : 0, description : 'soft-static allocation for the systemd-timesync user') +option('systemd-imds-uid', type : 'integer', value : 0, + description : 'soft-static allocation for the systemd-imds user') option('dev-kvm-mode', type : 'string', value : '0666', description : '/dev/kvm access mode') diff --git a/mkosi/mkosi.sanitizers/mkosi.postinst b/mkosi/mkosi.sanitizers/mkosi.postinst index d4d00907ed07f..7e836e7ab7bf8 100755 --- a/mkosi/mkosi.sanitizers/mkosi.postinst +++ b/mkosi/mkosi.sanitizers/mkosi.postinst @@ -43,6 +43,7 @@ fi wrap=( /usr/lib/polkit-1/polkitd /usr/libexec/polkit-1/polkitd + /usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py agetty btrfs capsh diff --git a/rules.d/60-dmi-id.rules b/rules.d/60-dmi-id.rules index 10b1fe000ca18..ecea74ec60d1c 100644 --- a/rules.d/60-dmi-id.rules +++ b/rules.d/60-dmi-id.rules @@ -2,24 +2,28 @@ ACTION=="remove", GOTO="dmi_end" SUBSYSTEM!="dmi", GOTO="dmi_end" +KERNEL!="id", GOTO="dmi_end" ENV{ID_SYS_VENDOR_IS_RUBBISH}!="1", ENV{ID_VENDOR}="$attr{sys_vendor}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="", ENV{ID_PRODUCT_NAME_IS_RUBBISH}!="1", ENV{ID_MODEL}="$attr{product_name}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="product_name", ENV{ID_MODEL}="$attr{product_name}" ENV{ID_SYSFS_ATTRIBUTE_MODEL}=="product_version", ENV{ID_MODEL}="$attr{product_version}" -# fallback to board information +# Fallback to board information ENV{ID_VENDOR}=="", ENV{ID_VENDOR}="$attr{board_vendor}" ENV{ID_MODEL}=="", ENV{ID_MODEL}="$attr{board_name}" -# stock keeping unit +# Stock keeping unit ENV{ID_PRODUCT_SKU_IS_RUBBISH}!="1", ENV{ID_SKU}="$attr{product_sku}" -# hardware version +# Hardware version ENV{ID_PRODUCT_VERSION_IS_RUBBISH}!="1", ENV{ID_HARDWARE_VERSION}="$attr{product_version}" ENV{ID_HARDWARE_VERSION}=="", ENV{ID_BOARD_VERSION_IS_RUBBISH}!="1", ENV{ID_HARDWARE_VERSION}="$attr{board_version}" -# chassis asset tag +# Chassis asset tag ENV{MODALIAS}!="", ATTR{chassis_asset_tag}!="", IMPORT{builtin}="hwdb '$attr{modalias}cat$attr{chassis_asset_tag}:'" ENV{ID_CHASSIS_ASSET_TAG_IS_RUBBISH}!="1", ENV{ID_CHASSIS_ASSET_TAG}="$attr{chassis_asset_tag}" +# Allow units to be ordered after the DMI device +TAG+="systemd" + LABEL="dmi_end" diff --git a/src/basic/defer-util.h b/src/basic/defer-util.h new file mode 100644 index 0000000000000..320301a068ade --- /dev/null +++ b/src/basic/defer-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "assert-fundamental.h" +#include "macro.h" + +typedef void (*void_func_t)(void); + +static inline void dispatch_void_func(void_func_t *f) { + assert(f); + assert(*f); + (*f)(); +} + +/* Inspired by Go's "defer" construct, but much more basic. This basically just calls a void function when + * the current scope is left. Doesn't do function parameters (i.e. no closures). */ +#define DEFER_VOID_CALL(x) _DEFER_VOID_CALL(UNIQ, x) +#define _DEFER_VOID_CALL(uniq, x) _unused_ _cleanup_(dispatch_void_func) void_func_t UNIQ_T(defer, uniq) = (x) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 90436f6ecf820..66d06484dc981 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -7,12 +7,15 @@ #include #include "alloc-util.h" +#include "chase.h" #include "errno-util.h" #include "extract-word.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" #include "hexdecoct.h" +#include "io-util.h" +#include "iovec-util.h" #include "label.h" #include "log.h" #include "mkdir.h" @@ -1655,3 +1658,62 @@ int warn_file_is_world_accessible(const char *filename, struct stat *st, const c filename, st->st_mode & 07777); return 0; } + +int write_data_file_atomic_at( + int dir_fd, + const char *path, + const struct iovec *iovec, + WriteDataFileFlags flags) { + + int r; + + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); + + /* This is a cousin of write_string_file_atomic(), but operates with arbitrary struct iovec binary + * data (rather than strings), works without FILE* streams, and does direct syscalls instead. */ + + _cleanup_free_ char *dn = NULL, *fn = NULL; + r = path_split_prefix_filename(path, &dn, &fn); + if (IN_SET(r, -EADDRNOTAVAIL, O_DIRECTORY)) + return -EISDIR; /* path refers to "." or "/" (which are dirs, which we cannot write), or is suffixed with "/" */ + if (r < 0) + return r; + + _cleanup_close_ int mfd = -EBADF; + if (dn) { + /* If there's a directory component, readjust our position */ + r = chaseat(dir_fd, + dn, + FLAGS_SET(flags, WRITE_DATA_FILE_MKDIR_0755) ? CHASE_MKDIR_0755 : 0, + /* ret_path= */ NULL, + &mfd); + if (r < 0) + return r; + + dir_fd = mfd; + } + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = open_tmpfile_linkable_at(dir_fd, fn, O_WRONLY|O_CLOEXEC, &t); + if (fd < 0) + return fd; + + CLEANUP_TMPFILE_AT(dir_fd, t); + + if (iovec_is_set(iovec)) { + r = loop_write(fd, iovec->iov_base, iovec->iov_len); + if (r < 0) + return r; + } + + r = fchmod_umask(fd, 0644); + if (r < 0) + return r; + + r = link_tmpfile_at(fd, dir_fd, t, fn, LINK_TMPFILE_REPLACE); + if (r < 0) + return r; + + t = mfree(t); /* disarm CLEANUP_TMPFILE_AT */ + return 0; +} diff --git a/src/basic/fileio.h b/src/basic/fileio.h index 578c16c0ee394..3e2372c4dddbc 100644 --- a/src/basic/fileio.h +++ b/src/basic/fileio.h @@ -163,3 +163,9 @@ int safe_fgetc(FILE *f, char *ret); int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line); int fopen_mode_to_flags(const char *mode); + +typedef enum WriteDataFileFlags { + WRITE_DATA_FILE_MKDIR_0755 = 1 << 0, +} WriteDataFileFlags; + +int write_data_file_atomic_at(int dir_fd, const char *path, const struct iovec *iovec, WriteDataFileFlags flags); diff --git a/src/basic/iovec-util.h b/src/basic/iovec-util.h index 0d1d4a7a94d86..00cbb89a7790b 100644 --- a/src/basic/iovec-util.h +++ b/src/basic/iovec-util.h @@ -24,6 +24,12 @@ struct iovec* iovec_make_string(struct iovec *iovec, const char *s); .iov_len = STRLEN(s), \ } +#define IOVEC_MAKE_BYTE(c) \ + (const struct iovec) { \ + .iov_base = (char*) ((const char[]) { c }), \ + .iov_len = 1, \ + } + void iovec_done_erase(struct iovec *iovec); char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value); diff --git a/src/basic/macro.h b/src/basic/macro.h index 7001c331399d6..390a9fab38ca3 100644 --- a/src/basic/macro.h +++ b/src/basic/macro.h @@ -205,16 +205,3 @@ static inline size_t size_add(size_t x, size_t y) { for (typeof(entry) _va_sentinel_[1] = {}, _entries_[] = { __VA_ARGS__ __VA_OPT__(,) _va_sentinel_[0] }, *_current_ = _entries_; \ ((long)(_current_ - _entries_) < (long)(ELEMENTSOF(_entries_) - 1)) && ({ entry = *_current_; true; }); \ _current_++) - -typedef void (*void_func_t)(void); - -static inline void dispatch_void_func(void_func_t *f) { - assert(f); - assert(*f); - (*f)(); -} - -/* Inspired by Go's "defer" construct, but much more basic. This basically just calls a void function when - * the current scope is left. Doesn't do function parameters (i.e. no closures). */ -#define DEFER_VOID_CALL(x) _DEFER_VOID_CALL(UNIQ, x) -#define _DEFER_VOID_CALL(uniq, x) _unused_ _cleanup_(dispatch_void_func) void_func_t UNIQ_T(defer, uniq) = (x) diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 38e3adaed6eca..769c78e0430cc 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -19,6 +19,7 @@ #include "chase.h" #include "copy.h" #include "creds-util.h" +#include "defer-util.h" #include "dissect-image.h" #include "env-file.h" #include "errno-util.h" @@ -412,13 +413,16 @@ static int prompt_keymap(int rfd, sd_varlink **mute_console_link) { if (arg_keymap) return 0; - r = read_credential("firstboot.keymap", (void**) &arg_keymap, NULL); + _cleanup_free_ char *km = NULL; + r = read_credential("firstboot.keymap", (void**) &km, NULL); if (r < 0) log_debug_errno(r, "Failed to read credential firstboot.keymap, ignoring: %m"); - else { + else if (keymap_is_valid(km)) { log_debug("Acquired keymap from credential."); + arg_keymap = TAKE_PTR(km); return 0; - } + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Keymap '%s' supplied via credential is not valid, ignoring.", km); bool b; if (arg_prompt_keymap_auto) { @@ -540,13 +544,16 @@ static int prompt_timezone(int rfd, sd_varlink **mute_console_link) { if (arg_timezone) return 0; - r = read_credential("firstboot.timezone", (void**) &arg_timezone, NULL); + _cleanup_free_ char *tz = NULL; + r = read_credential("firstboot.timezone", (void**) &tz, NULL); if (r < 0) log_debug_errno(r, "Failed to read credential firstboot.timezone, ignoring: %m"); - else { + else if (timezone_is_valid(tz, LOG_DEBUG)) { log_debug("Acquired timezone from credential."); + arg_timezone = TAKE_PTR(tz); return 0; - } + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Timezone '%s' supplied via credential is not valid, ignoring.", tz); if (!arg_prompt_timezone) { log_debug("Prompting for timezone was not requested."); @@ -647,6 +654,17 @@ static int prompt_hostname(int rfd, sd_varlink **mute_console_link) { if (arg_hostname) return 0; + _cleanup_free_ char *hn = NULL; + r = read_credential("firstboot.hostname", (void**) &hn, NULL); + if (r < 0) + log_debug_errno(r, "Failed to read credential firstboot.hostname, ignoring: %m"); + else if (hostname_is_valid(hn, VALID_HOSTNAME_TRAILING_DOT|VALID_HOSTNAME_QUESTION_MARK)) { + log_debug("Acquired hostname from credentials."); + arg_hostname = TAKE_PTR(hn); + return 0; + } else + log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Hostname '%s' supplied via credential is not valid, ignoring.", hn); + if (!arg_prompt_hostname) { log_debug("Prompting for hostname was not requested."); return 0; diff --git a/src/home/homectl.c b/src/home/homectl.c index 2b92ab6481eae..bc75b3451f99c 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -18,6 +18,7 @@ #include "cgroup-util.h" #include "chase.h" #include "creds-util.h" +#include "defer-util.h" #include "dirent-util.h" #include "dns-domain.h" #include "env-util.h" diff --git a/src/imds/imds-generator.c b/src/imds/imds-generator.c new file mode 100644 index 0000000000000..ea3c5b8c614f3 --- /dev/null +++ b/src/imds/imds-generator.c @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-hwdb.h" + +#include "dropin.h" +#include "fileio.h" +#include "generator.h" +#include "imds-util.h" +#include "log.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "special.h" +#include "string-util.h" +#include "virt.h" + +static int arg_enabled = -1; /* Whether we shall offer local IMDS APIs */ +static bool arg_import = true; /* Whether we shall import IMDS credentials, SSH keys, … into the local system */ +static ImdsNetworkMode arg_network_mode = IMDS_NETWORK_LOCKED; + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + int r; + + assert(key); + + if (proc_cmdline_key_streq(key, "systemd.imds")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = parse_tristate_full(value, "auto", &arg_enabled); + if (r < 0) + return log_warning_errno(r, "Failed to parse systemd.imds= value: %m"); + + } else if (proc_cmdline_key_streq(key, "systemd.imds.import")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = parse_boolean(value); + if (r < 0) + return log_warning_errno(r, "Failed to parse systemd.imds.import= value: %m"); + + arg_import = r; + } else if (proc_cmdline_key_streq(key, "systemd.imds.network")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + ImdsNetworkMode m = imds_network_mode_from_string(value); + if (m < 0) + return log_warning_errno(m, "Failed to parse systemd.imds.network= value: %m"); + + arg_network_mode = m; + } + + return 0; +} + +static int smbios_get_modalias(char **ret) { + int r; + + assert(ret); + + _cleanup_free_ char *modalias = NULL; + r = read_virtual_file("/sys/devices/virtual/dmi/id/modalias", SIZE_MAX, &modalias, /* ret_size= */ NULL); + if (r < 0) + return r; + + truncate_nl(modalias); + + /* To detect Azure we need to check the chassis assert tag. Unfortunately the kernel does not include + * it in the modalias string right now. Let's hence append it manually. This matches similar logic in + * rules.d/60-dmi-id.rules. */ + _cleanup_free_ char *cat = NULL; + r = read_virtual_file("/sys/devices/virtual/dmi/id/chassis_asset_tag", SIZE_MAX, &cat, /* ret_size= */ NULL); + if (r < 0) + log_debug_errno(r, "Failed to read chassis asset tag, ignoring: %m"); + else { + truncate_nl(cat); + + if (!string_has_cc(cat, /* ok= */ NULL) && !isempty(cat) && !strextend(&modalias, "cat", cat, ":")) + return -ENOMEM; + } + + log_debug("Constructed SMBIOS modalias string: %s", modalias); + *ret = TAKE_PTR(modalias); + return 0; +} + +static int smbios_query(void) { + int r; + + /* Let's check whether the DMI device's hwdb data suggests IMDS support is available. Note, we cannot + * ask udev for this, before we typically run long before udev. Hence we'll do the hwdb lookup via + * sd-hwdb directly. */ + + _cleanup_free_ char *modalias = NULL; + r = smbios_get_modalias(&modalias); + if (r == -ENOENT) { + log_debug("No DMI device found, assuming IMDS is not available."); + return false; + } + if (r < 0) + return log_error_errno(r, "Failed to read DMI modalias: %m"); + + _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL; + r = sd_hwdb_new(&hwdb); + if (r < 0) + return log_error_errno(r, "Failed to open hwdb: %m"); + + r = sd_hwdb_seek(hwdb, modalias); + if (r == -ENOENT) { + log_debug("No hwdb data for DMI device found, assuming IMDS is off."); + return false; + } + if (r < 0) + return log_error_errno(r, "Failed to seek in hwdb for '%s': %m", modalias); + + for (;;) { + const char *key, *value; + r = sd_hwdb_enumerate(hwdb, &key, &value); + if (r < 0) + return log_error_errno(r, "Failed to enumerate hwdb entry for '%s': %m", modalias); + if (r == 0) + break; + + if (streq(key, "IMDS_VENDOR")) + return true; + } + + log_debug("IMDS_VENDOR= property for DMI device not set, assuming IMDS is not available."); + return false; +} + +static int run(const char *dest, const char *dest_early, const char *dest_late) { + int r; + + r = proc_cmdline_parse(parse_proc_cmdline_item, /* userdata= */ NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + if (r < 0) + log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); + + if (arg_enabled < 0) { + Virtualization v = detect_container(); + if (v < 0) + log_debug_errno(v, "Container detection failed, ignoring: %m"); + if (v > 0) { + log_debug("Running in a container, disabling IMDS logic."); + arg_enabled = false; + } else { + r = smbios_query(); + if (r < 0) + return r; + } + + arg_enabled = r > 0; + } + + if (!arg_enabled) { + log_debug("IMDS not enabled, skipping generator."); + return 0; + } + + log_info("IMDS support enabled, pull in IMDS units."); + + /* Enable IMDS early networking, so that we can actually reach the IMDS server. */ + if (arg_network_mode != IMDS_NETWORK_OFF) { + r = generator_add_symlink(dest_early, SPECIAL_SYSINIT_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imds-early-network.service"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imds-early-network.service: %m"); + } + + /* Enable the IMDS service socket */ + r = generator_add_symlink(dest_early, SPECIAL_SOCKETS_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imdsd.socket"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imdsd.socket: %m"); + + /* We now know the SMBIOS device exists, hence it's safe now to order the IMDS service after it, so + * that it has all properties properly initialized. */ + r = write_drop_in( + dest_early, + "systemd-imdsd@.service", + 50, "dmi-id", + "# Automatically generated by systemd-imds-generator\n\n" + "[Unit]\n" + "Wants=sys-devices-virtual-dmi-id.device\n" + "After=sys-devices-virtual-dmi-id.device\n"); + if (r < 0) + return log_error_errno(r, "Failed to hook DMI id device before systemd-imdsd@.service: %m"); + + if (arg_import) { + /* Enable that we import IMDS data */ + r = generator_add_symlink(dest_early, SPECIAL_SYSINIT_TARGET, "wants", SYSTEM_DATA_UNIT_DIR "/systemd-imds-import.service"); + if (r < 0) + return log_error_errno(r, "Failed to hook in systemd-imds-import.service: %m"); + } + + return 0; +} + +DEFINE_MAIN_GENERATOR_FUNCTION(run); diff --git a/src/imds/imds-tool.c b/src/imds/imds-tool.c new file mode 100644 index 0000000000000..e715578e1cbbf --- /dev/null +++ b/src/imds/imds-tool.c @@ -0,0 +1,897 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "build.h" +#include "build-path.h" +#include "creds-util.h" +#include "dns-rr.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-table.h" +#include "format-util.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "imds-util.h" +#include "in-addr-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "json-util.h" +#include "log.h" +#include "main-func.h" +#include "parse-argument.h" +#include "pcrextend-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" + +static enum { + ACTION_SUMMARY, + ACTION_GET, + ACTION_USERDATA, + ACTION_IMPORT, + _ACTION_INVALID = -EINVAL, +} arg_action = _ACTION_INVALID; +static char *arg_key = NULL; +static ImdsWellKnown arg_well_known = _IMDS_WELL_KNOWN_INVALID; +static int arg_cache = -1; +static usec_t arg_refresh_usec = 0; +static bool arg_refresh_usec_set = false; + +STATIC_DESTRUCTOR_REGISTER(arg_key, freep); + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-imds", "1", &link); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...] [KEY...]\n" + "\n%sIMDS data acquisition.%s\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -K --well-known=[hostname|region|zone|ipv4-public|ipv6-public|ssh-key|\n" + " userdata|userdata-base|userdata-base64]\n" + " Select well-known key/base\n" + " --refresh=SEC Set token refresh time\n" + " --cache=no Disable cache use\n" + " -u --userdata Dump user data\n" + " --import Import system credentials from IMDS userdata\n" + " and place them in /run/credstore/\n" + "\nSee the %s for details.\n", + program_invocation_short_name, + ansi_highlight(), + ansi_normal(), + link); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_REFRESH, + ARG_CACHE, + ARG_IMPORT, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "well-known", required_argument, NULL, 'K' }, + { "refresh", required_argument, NULL, ARG_REFRESH }, + { "cache", required_argument, NULL, ARG_CACHE }, + { "userdata", no_argument, NULL, 'u' }, + { "import", no_argument, NULL, ARG_IMPORT }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hK:u", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case 'K': { + if (isempty(optarg)) { + arg_well_known = _IMDS_WELL_KNOWN_INVALID; + break; + } + + if (streq(optarg, "help")) + return DUMP_STRING_TABLE(imds_well_known, ImdsWellKnown, _IMDS_WELL_KNOWN_MAX); + + ImdsWellKnown wk = imds_well_known_from_string(optarg); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known= argument: %s", optarg); + + arg_well_known = wk; + break; + } + + case ARG_CACHE: + r = parse_tristate_argument_with_auto("--cache", optarg, &arg_cache); + if (r < 0) + return r; + + break; + + case ARG_REFRESH: { + if (isempty(optarg)) { + arg_refresh_usec_set = false; + break; + } + + usec_t t; + r = parse_sec(optarg, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse refresh timeout: %s", optarg); + + arg_refresh_usec = t; + arg_refresh_usec_set = true; + break; + } + + case 'u': + arg_action = ACTION_USERDATA; + break; + + case ARG_IMPORT: + arg_action = ACTION_IMPORT; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + } + + if (IN_SET(arg_action, ACTION_USERDATA, ACTION_IMPORT)) { + if (argc != optind) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No parameters expected."); + + } else { + assert(arg_action < 0); + + if (argc > optind + 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "None or one argument expected."); + + if (argc == optind && arg_well_known < 0) + arg_action = ACTION_SUMMARY; + else { + if (arg_well_known < 0) + arg_well_known = IMDS_BASE; + + if (argc > optind) { + if (!imds_key_is_valid(argv[optind])) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified IMDS key is not valid, refusing: %s", argv[optind]); + + if (!imds_well_known_can_suffix(arg_well_known)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Well known key '%s' does not take a key suffix, refusing.", imds_well_known_to_string(arg_well_known)); + + r = free_and_strdup_warn(&arg_key, argv[optind]); + if (r < 0) + return r; + } + + arg_action = ACTION_GET; + } + } + + return 1; +} + +static int acquire_imds_key( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + struct iovec *ret) { + + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + const char *error_id = NULL; + sd_json_variant *reply = NULL; + r = sd_varlink_callbo( + link, + "io.systemd.InstanceMetadata.Get", + &reply, + &error_id, + SD_JSON_BUILD_PAIR_CONDITION(wk != IMDS_BASE, "wellKnown", JSON_BUILD_STRING_UNDERSCORIFY(imds_well_known_to_string(wk))), + JSON_BUILD_PAIR_STRING_NON_EMPTY("key", key), + SD_JSON_BUILD_PAIR_CONDITION(arg_refresh_usec_set, "refreshUSec", SD_JSON_BUILD_UNSIGNED(arg_refresh_usec)), + SD_JSON_BUILD_PAIR_CONDITION(arg_cache >= 0, "cache", SD_JSON_BUILD_BOOLEAN(arg_cache))); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.InstanceMetadata.Get(): %m"); + if (error_id) { + if (STR_IN_SET(error_id, "io.systemd.InstanceMetadata.KeyNotFound", "io.systemd.InstanceMetadata.WellKnownKeyUnset")) { + *ret = (struct iovec) {}; + return 0; + } + + return log_error_errno(sd_varlink_error_to_errno(error_id, reply), "Failed to issue io.systemd.InstanceMetadata.Get(): %s", error_id); + } + + _cleanup_(iovec_done) struct iovec data = {}; + static const sd_json_dispatch_field dispatch_table[] = { + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, 0, SD_JSON_MANDATORY }, + {}, + }; + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &data); + if (r < 0) + return r; + + *ret = TAKE_STRUCT(data); + return 1; +} + +static int acquire_imds_key_as_string( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + char **ret) { + + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, wk, key, &data); + if (r < 0) + return r; + if (r == 0) { + *ret = NULL; + return 0; + } + + _cleanup_free_ char *s = NULL; + r = make_cstring(data.iov_base, data.iov_len, MAKE_CSTRING_REFUSE_TRAILING_NUL, &s); + if (r < 0) + return r; + + *ret = TAKE_PTR(s); + return 1; +} + +static int acquire_imds_key_as_ip_address( + sd_varlink *link, + ImdsWellKnown wk, + const char *key, + int family, + union in_addr_union *ret) { + int r; + + assert(link); + assert(wk >= 0); + assert(wk < _IMDS_WELL_KNOWN_MAX); + assert(ret); + + _cleanup_free_ char *s = NULL; + r = acquire_imds_key_as_string(link, wk, key, &s); + if (r < 0) + return r; + if (r == 0 || isempty(s)) { + *ret = (union in_addr_union) {}; + return 0; + } + + r = in_addr_from_string(family, s, ret); + if (r < 0) + return r; + + return 1; +} + +static int action_summary(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(table_unrefp) Table *table = table_new_vertical(); + if (!table) + return log_oom(); + + const char *error_id = NULL; + sd_json_variant *reply = NULL; + r = sd_varlink_call( + link, + "io.systemd.InstanceMetadata.GetVendorInfo", + /* parameters= */ NULL, + &reply, + &error_id); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.InstanceMetadata.GetStatus(): %m"); + if (error_id) + return log_error_errno(sd_varlink_error_to_errno(error_id, reply), "Failed to issue io.systemd.InstanceMetadata.GetStatus(): %s", error_id); + + const char *vendor = NULL; + static const sd_json_dispatch_field dispatch_table[] = { + { "vendor", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, 0, 0 }, + {} + }; + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &vendor); + if (r < 0) + return r; + if (vendor) { + r = table_add_many(table, + TABLE_FIELD, "Vendor", + TABLE_SET_JSON_FIELD_NAME, "vendor", + TABLE_STRING, vendor); + if (r < 0) + return table_log_add_error(r); + } + + static const struct { + ImdsWellKnown well_known; + const char *field; + } wktable[] = { + { IMDS_HOSTNAME, "Hostname" }, + { IMDS_REGION, "Region" }, + { IMDS_ZONE, "Zone" }, + { IMDS_IPV4_PUBLIC, "Public IPv4 Address" }, + { IMDS_IPV6_PUBLIC, "Public IPv6 Address" }, + }; + FOREACH_ELEMENT(i, wktable) { + _cleanup_free_ char *text = NULL; + + r = acquire_imds_key_as_string(link, i->well_known, /* key= */ NULL, &text); + if (r < 0) + return r; + if (r == 0 || isempty(text)) + continue; + + r = table_add_many(table, + TABLE_FIELD, i->field, + TABLE_SET_JSON_FIELD_NAME, imds_well_known_to_string(i->well_known), + TABLE_STRING, text); + if (r < 0) + return table_log_add_error(r); + } + + if (table_isempty(table)) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "No well-known IMDS data available."); + + r = table_print(table, NULL); + if (r < 0) + return table_log_print_error(r); + + return 0; +} + +static const char *detect_json_object(const char *text) { + assert(text); + + /* Checks if the provided text looks like a JSON object. It checks if the first non-whitespace + * characters are {" or {}. */ + + text += strspn(text, WHITESPACE); + if (*text != '{') + return NULL; + + const char *e = text + 1; + e += strspn(e, WHITESPACE); + if (!IN_SET(*e, '"', '}')) + return NULL; + + return text; +} + +static int write_credential(const char *dir, const char *name, const struct iovec *data) { + int r; + + assert(dir); + assert(name); + + _cleanup_close_ int dfd = open_mkdir(dir, O_CLOEXEC|O_PATH, 0700); + if (dfd < 0) + return log_error_errno(dfd, "Failed to open credential directory '%s': %m", dir); + + if (faccessat(dfd, name, F_OK, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno != ENOENT) + return log_error_errno(errno, "Failed to check if '%s' exists in credential directory '%s': %m", name, dir); + } else { + log_notice("Skipping importing of credential '%s', it already exists locally in '%s'.", name, dir); + return 0; + } + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = open_tmpfile_linkable_at(dfd, name, O_WRONLY|O_CLOEXEC, &t); + if (fd < 0) + return log_error_errno(fd, "Failed to create credential file '%s/%s': %m", dir, name); + + CLEANUP_TMPFILE_AT(dfd, t); + + r = loop_write(fd, data->iov_base, data->iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write credential file '%s/%s': %m", dir, name); + + if (fchmod(fd, 0400) < 0) + return log_error_errno(errno, "Failed to set access mode on credential file '%s/%s': %m", dir, name); + + r = link_tmpfile_at(fd, dfd, t, name, /* flags= */ 0); + if (r < 0) + return log_error_errno(r, "Failed to move credential file '%s/%s' into place: %m", dir, name); + + t = mfree(t); /* Disarm auto-cleanup */ + return 1; +} + +typedef struct CredentialData { + const char *name; + const char *text; + struct iovec data, encrypted; +} CredentialData; + +static void credential_data_done(CredentialData *d) { + assert(d); + + iovec_done(&d->data); + iovec_done(&d->encrypted); +} + +static int import_credential_one(CredentialData *d) { + int r; + + assert(d); + assert(d->name); + + log_debug("Importing credential '%s' from IMDS.", d->name); + + const char *dir = "/run/credstore"; + struct iovec *v, _v; + if (d->text) { + _v = IOVEC_MAKE_STRING(d->text); + v = &_v; + } else if (iovec_is_set(&d->data)) + v = &d->data; + else if (iovec_is_set(&d->encrypted)) { + dir = "/run/credstore.encrypted"; + v = &d->encrypted; + } else + assert_not_reached(); + + r = write_credential(dir, d->name, v); + if (r <= 0) + return r; + + log_info("Imported credential '%s' from IMDS (%s).", d->name, FORMAT_BYTES(v->iov_len)); + return 1; +} + +static int import_credentials(const char *text) { + int r; + + assert(text); + + /* We cannot be sure if the data is actually intended for us. Hence let's be somewhat defensive, and + * accept data in two ways: either immediately as a JSON object, or alternatively marked with a first + * line of "#systemd-userdata". The latter mimics the markers cloud-init employs. */ + + const char *e = startswith(text, "#systemd-userdata\n"); + if (!e) { + e = detect_json_object(text); + if (!e) { + log_info("IMDS user data does not look like JSON or systemd userdata, not processing."); + return 0; + } + } + + log_debug("Detected JSON userdata"); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + unsigned line = 0, column = 0; + r = sd_json_parse(e, /* flags= */ 0, &j, &line, &column); + if (r < 0) { + if (line > 0) + log_syntax(/* unit= */ NULL, LOG_WARNING, /* filename= */ NULL, line, r, "JSON parse failure."); + else + log_error_errno(r, "Failed to parse IMDS userdata JSON: %m"); + return 0; + } + + static const sd_json_dispatch_field top_table[] = { + { "systemd.credentials", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, 0, 0 }, + {}, + }; + + sd_json_variant *creds = NULL; + r = sd_json_dispatch(j, top_table, SD_JSON_ALLOW_EXTENSIONS|SD_JSON_LOG, &creds); + if (r < 0) + return r; + + unsigned n_imported = 0; + int ret = 0; + if (creds) { + log_debug("Found 'systemd.credentials' field"); + + sd_json_variant *c; + JSON_VARIANT_ARRAY_FOREACH(c, creds) { + static const sd_json_dispatch_field credential_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(CredentialData, name), SD_JSON_MANDATORY }, + { "text", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(CredentialData, text), 0 }, + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(CredentialData, data), 0 }, + { "encrypted", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(CredentialData, encrypted), 0 }, + {}, + }; + + _cleanup_(credential_data_done) CredentialData d = {}; + r = sd_json_dispatch(c, credential_table, SD_JSON_LOG|SD_JSON_WARNING, &d); + if (r < 0) { + RET_GATHER(ret, r); + continue; + } + + if (!credential_name_valid(d.name)) { + RET_GATHER(ret, log_warning_errno(SYNTHETIC_ERRNO(EBADMSG), "Credential name '%s' is not valid, refusing.", d.name)); + continue; + } + + if ((!!d.text + !!iovec_is_set(&d.data) + !!iovec_is_set(&d.encrypted)) != 1) { + RET_GATHER(ret, log_warning_errno(SYNTHETIC_ERRNO(EBADMSG), "Exactly one of 'text', 'data', 'encrypted' must be set for credential '%s', refusing.", d.name)); + continue; + } + + r = import_credential_one(&d); + if (r < 0) + RET_GATHER(ret, r); + else + n_imported++; + } + } + + log_full(n_imported == 0 ? LOG_DEBUG : LOG_INFO, "Imported %u credentials from IMDS.", n_imported); + return ret; +} + +static int add_public_address_to_json_array(sd_json_variant **array, int family, const union in_addr_union *addr) { + int r; + + assert(array); + assert(IN_SET(family, AF_INET, AF_INET6)); + assert(addr); + + if (in_addr_is_null(family, addr)) + return 0; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + if (dns_resource_record_new_address(&rr, family, addr, "_public") < 0) + return log_oom(); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *rrj = NULL; + r = dns_resource_record_to_json(rr, &rrj); + if (r < 0) + return log_error_errno(r, "Failed to convert A RR to JSON: %m"); + + r = sd_json_variant_append_array(array, rrj); + if (r < 0) + return log_error_errno(r, "Failed to append A RR to JSON array: %m"); + + log_debug("Writing IMDS RR for: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int import_imds_public_addresses(sd_varlink *link) { + int r, ret = 0; + + assert(link); + + /* Creates local RRs (honoured by systemd-resolved) for our public addresses. */ + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL; + + union in_addr_union u = {}; + r = acquire_imds_key_as_ip_address(link, IMDS_IPV4_PUBLIC, /* key= */ NULL, AF_INET, &u); + if (r < 0) + RET_GATHER(ret, r); + else if (r > 0) { + r = add_public_address_to_json_array(&aj, AF_INET, &u); + if (r < 0) + return r; + } + + u = (union in_addr_union) {}; + r = acquire_imds_key_as_ip_address(link, IMDS_IPV6_PUBLIC, /* key= */ NULL, AF_INET6, &u); + if (r < 0) + RET_GATHER(ret, r); + else if (r > 0) { + r = add_public_address_to_json_array(&aj, AF_INET6, &u); + if (r < 0) + return r; + } + + if (sd_json_variant_elements(aj) == 0) { + log_debug("No IMDS public addresses known, not writing our RRs."); + return 0; + } + + _cleanup_free_ char *text = NULL; + r = sd_json_variant_format(aj, SD_JSON_FORMAT_NEWLINE, &text); + if (r < 0) + return log_error_errno(r, "Failed to format JSON text: %m"); + + r = write_string_file("/run/systemd/resolve/static.d/imds-public.rr", text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to write IMDS RR data: %m"); + + log_debug("IMDS public addresses written out."); + return 1; +} + +static int import_imds_ssh_key(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_SSH_KEY, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r == 0 || !iovec_is_set(&data)) { + log_debug("No SSH key supplied via IMDS, not importing."); + return 0; + } + + r = write_credential("/run/credstore", "ssh.authorized_keys.root", &data); + if (r <= 0) + return r; + + log_info("Imported SSH key as credential 'ssh.authorized_keys.root'."); + return 0; +} + +static int import_imds_hostname(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_HOSTNAME, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r == 0 || !iovec_is_set(&data)) { + log_debug("No hostname supplied via IMDS, not importing."); + return 0; + } + + r = write_credential("/run/credstore", "firstboot.hostname", &data); + if (r <= 0) + return 0; + + log_info("Imported hostname as credential 'firstboot.hostname'."); + return 0; +} + +static int acquire_imds_userdata(sd_varlink *link, struct iovec *ret) { + int r; + + assert(link); + assert(ret); + + /* First try our private namespace, if the concept exists, and then fall back to the singleton */ + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, IMDS_USERDATA_BASE, "/systemd-userdata", &data); + if (r == 0) + r = acquire_imds_key(link, IMDS_USERDATA, /* key= */ NULL, &data); + if (r < 0) + return r; + if (r > 0) { + if (!iovec_is_set(&data)) { /* Treat empty user data like empty */ + *ret = (struct iovec) {}; + return 0; + } + + *ret = TAKE_STRUCT(data); + return 1; + } + + r = acquire_imds_key(link, IMDS_USERDATA_BASE64, /* key= */ NULL, &data); + if (r < 0) + return r; + _cleanup_(iovec_done) struct iovec decoded = {}; + if (r > 0) { + r = unbase64mem_full(data.iov_base, data.iov_len, /* secure= */ false, &decoded.iov_base, &decoded.iov_len); + if (r < 0) + return r; + } + + if (!iovec_is_set(&decoded)) { /* Treat empty user data like empty */ + *ret = (struct iovec) {}; + return 0; + } + + *ret = TAKE_STRUCT(decoded); + return 1; +} + +static int action_get(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_key(link, arg_well_known, arg_key, &data); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Key not available."); + + r = loop_write(STDOUT_FILENO, data.iov_base, data.iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write data to standard output: %m"); + + return 0; +} + +static int action_userdata(sd_varlink *link) { + int r; + + assert(link); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_userdata(link, &data); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "User data not available."); + + r = loop_write(STDOUT_FILENO, data.iov_base, data.iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write data to standard output: %m"); + + return 0; +} + +static int remove_userdata(const char *path) { + assert(path); + + if (unlink(path) < 0) { + + if (errno != ENOENT) + log_debug_errno(errno, "Failed to remove '%s', ignoring: %m", path); + + return 0; + } + + log_debug("Removed '%s'.", path); + return 1; +} + +static int save_userdata(const struct iovec *data, const char *path) { + int r; + + assert(data); + assert(path); + + if (!iovec_is_set(data)) + return remove_userdata(path); + + r = write_data_file_atomic_at(AT_FDCWD, path, data, WRITE_DATA_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to save userdata to '%s': %m", path); + + log_debug("Saved userdata to '%s'.", path); + return 1; +} + +static int action_import(sd_varlink *link) { + int r; + + assert(link); + + int ret = import_imds_public_addresses(link); + RET_GATHER(ret, import_imds_hostname(link)); + RET_GATHER(ret, import_imds_ssh_key(link)); + + _cleanup_(iovec_done) struct iovec data = {}; + r = acquire_imds_userdata(link, &data); + if (r < 0) + return RET_GATHER(ret, r); + if (r == 0) { + log_info("Not IMDS data available, not importing credentials."); + (void) remove_userdata("/run/systemd/imds/userdata"); + return ret; + } + + /* Measure the userdata before we use it */ + (void) pcrextend_imds_userdata_now(&data); + + /* Keep a pristince copy of the userdata we actually applied. (Note that this data is typically also + * kept as cached item on systemd-imdsd, but that one is possibly subject to cache invalidation, + * while this one is supposed to pin the data actually in effect.) */ + (void) save_userdata(&data, "/run/systemd/imds/userdata"); + + /* Ensure no inner NUL byte */ + if (memchr(data.iov_base, 0, data.iov_len)) { + log_info("IMDS user data contains NUL byte, not processing."); + return ret; + } + + /* Turn this into a proper C string */ + if (!iovec_append(&data, &IOVEC_MAKE_BYTE(0))) + return log_oom(); + + return RET_GATHER(ret, import_credentials(data.iov_base)); +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = sd_varlink_connect_address(&link, "/run/systemd/io.systemd.InstanceMetadata"); + if (r < 0) { + if (r != -ENOENT && !ERRNO_IS_NEG_DISCONNECT(r)) + return log_error_errno(r, "Failed to connect to systemd-imdsd: %m"); + + log_debug_errno(r, "Couldn't connect to /run/systemd/io.systemd.InstanceMetadata, will try to fork off systemd-imdsd as child now."); + + /* Try to fork off systemd-imdsd as a child as a fallback. If we have privileges and the + * SO_FWMARK trickery is not necessary, then this might just work. */ + _cleanup_free_ char *p = NULL; + _cleanup_close_ int pin_fd = + pin_callout_binary("/usr/lib/systemd/systemd-imdsd", &p); + if (pin_fd < 0) + return log_error_errno(pin_fd, "Failed to pick up imdsd binary: %m"); + + r = sd_varlink_connect_exec(&link, p, /* argv[]= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to connect to imdsd service: %m"); + } + + switch (arg_action) { + + case ACTION_SUMMARY: + return action_summary(link); + + case ACTION_GET: + return action_get(link); + + case ACTION_USERDATA: + return action_userdata(link); + + case ACTION_IMPORT: + return action_import(link); + + default: + assert_not_reached(); + } + + return 0; +} + +DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); diff --git a/src/imds/imds-util.c b/src/imds/imds-util.c new file mode 100644 index 0000000000000..3c67417e4ba5f --- /dev/null +++ b/src/imds/imds-util.c @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "imds-util.h" +#include "string-table.h" +#include "string-util.h" +#include "utf8.h" + +bool imds_key_is_valid(const char *key) { + /* Just some pretty superficial validation. */ + + if (!key) + return false; + + if (!startswith(key, "/")) + return false; + + if (!ascii_is_valid(key)) + return false; + + if (string_has_cc(key, /* ok= */ NULL)) + return false; + + return true; +} + +static const char* const imds_well_known_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_BASE] = "base", + [IMDS_HOSTNAME] = "hostname", + [IMDS_REGION] = "region", + [IMDS_ZONE] = "zone", + [IMDS_IPV4_PUBLIC] = "ipv4-public", + [IMDS_IPV6_PUBLIC] = "ipv6-public", + [IMDS_SSH_KEY] = "ssh-key", + [IMDS_USERDATA] = "userdata", + [IMDS_USERDATA_BASE] = "userdata-base", + [IMDS_USERDATA_BASE64] = "userdata-base64", +}; + +DEFINE_STRING_TABLE_LOOKUP(imds_well_known, ImdsWellKnown); + + +static const char* const imds_network_mode_table[_IMDS_NETWORK_MODE_MAX] = { + [IMDS_NETWORK_OFF] = "off", + [IMDS_NETWORK_LOCKED] = "locked", + [IMDS_NETWORK_UNLOCKED] = "unlocked", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(imds_network_mode, ImdsNetworkMode, IMDS_NETWORK_LOCKED); diff --git a/src/imds/imds-util.h b/src/imds/imds-util.h new file mode 100644 index 0000000000000..55ab79510f44e --- /dev/null +++ b/src/imds/imds-util.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" +#include "string-table.h" /* IWYU pragma: keep */ + +typedef enum ImdsNetworkMode { + IMDS_NETWORK_OFF, /* No automatic pre-IMDS network configuration, something else has to do this. (Also: no "prohibit" route) */ + IMDS_NETWORK_LOCKED, /* "Prohibit" route for the IMDS server, unless you have SO_MARK set to 0x7FFF0815 */ + IMDS_NETWORK_UNLOCKED, /* No "prohibit" route for the IMDS server */ + _IMDS_NETWORK_MODE_MAX, + _IMDS_NETWORK_MODE_INVALID = -EINVAL, +} ImdsNetworkMode; + +/* Various well-known keys */ +typedef enum ImdsWellKnown { + IMDS_BASE, /* The same as "/", typically suffixed */ + IMDS_HOSTNAME, + IMDS_REGION, + IMDS_ZONE, + IMDS_IPV4_PUBLIC, + IMDS_IPV6_PUBLIC, + IMDS_SSH_KEY, + IMDS_USERDATA, + IMDS_USERDATA_BASE, /* typically suffixed */ + IMDS_USERDATA_BASE64, + _IMDS_WELL_KNOWN_MAX, + _IMDS_WELL_KNOWN_INVALID = -EINVAL, +} ImdsWellKnown; + +static inline bool imds_well_known_can_suffix(ImdsWellKnown wk) { + return IN_SET(wk, IMDS_BASE, IMDS_USERDATA_BASE); +} + +bool imds_key_is_valid(const char *key); + +DECLARE_STRING_TABLE_LOOKUP(imds_well_known, ImdsWellKnown); +DECLARE_STRING_TABLE_LOOKUP(imds_network_mode, ImdsNetworkMode); diff --git a/src/imds/imdsd.c b/src/imds/imdsd.c new file mode 100644 index 0000000000000..3caf8d9a00263 --- /dev/null +++ b/src/imds/imdsd.c @@ -0,0 +1,3024 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "sd-bus.h" +#include "sd-device.h" +#include "sd-event.h" +#include "sd-json.h" +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "build-path.h" +#include "build.h" +#include "bus-polkit.h" +#include "chase.h" +#include "copy.h" +#include "device-private.h" +#include "dns-rr.h" +#include "errno-util.h" +#include "escape.h" +#include "event-util.h" +#include "fd-util.h" +#include "format-ifname.h" +#include "hash-funcs.h" +#include "hashmap.h" +#include "imds-util.h" +#include "in-addr-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "json-util.h" +#include "log.h" +#include "main-func.h" +#include "netlink-util.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "path-util.h" +#include "pretty-print.h" +#include "proc-cmdline.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "utf8.h" +#include "varlink-io.systemd.InstanceMetadata.h" +#include "varlink-util.h" +#include "web-util.h" +#include "xattr-util.h" + +#include "../import/curl-util.h" + +/* This implements a client to the AWS' and Azure's "Instance Metadata Service", as well as GCP's "VM + * Metadata", i.e.: + * + * https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html + * https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service + * https://docs.cloud.google.com/compute/docs/metadata/overview + * https://docs.hetzner.cloud/reference/cloud#description/server-metadata + * + * Some notes: + * - IMDS service are heavily rate limited, and hence we want to centralize requests in one place and cache + * - In order to isolate IMDS access this expects that traffic to the IMDS address 169.254.169.254 is + * generally prohibited (via a prohibit route), but our service uses fwmark 0x7FFF0815, which (via source + * routing) can bypass this route. + * - To be robust to situations with multiple interfaces, if we have no hint which interface we shall use, + * we'll fork our own binary off, once for each interface, and communicate to it via Varlink. + * - This is supposed to run under its own UID, but with CAP_NET_ADMIN held (since we want to use + * SO_BINDTODEVICE + SO_MARK) + * - This daemon either be invoked manually from the command line, to do a single request, mostly for + * debugging purposes. Or it can be invoked as a Varlink service, which is the primary intended mode of + * operation. + */ + +#define TOKEN_SIZE_MAX (4096U) +#define DATA_SIZE_MAX (4*1024*1024U) +#define FWMARK_DEFAULT UINT32_C(0x7FFF0815) +#define REFRESH_USEC_DEFAULT (15U * USEC_PER_MINUTE) +#define REFRESH_USEC_MIN (1U * USEC_PER_SEC) +#define DIRECT_OVERALL_TIMEOUT_SEC (40U * USEC_PER_SEC) /* a bit shorter than the default D-Bus/Varlink method call time-out) */ +#define INDIRECT_OVERALL_TIMEOUT_SEC (DIRECT_OVERALL_TIMEOUT_SEC + 5U * USEC_PER_SEC) +#define RETRY_MIN_USEC (20U * USEC_PER_MSEC) +#define RETRY_MAX_USEC (3U * USEC_PER_SEC) +#define RETRY_MAX 10U + +/* Which endpoint configuration source has been used, in order of preference */ +typedef enum EndpointSource { + ENDPOINT_USER, /* Explicit command line options */ + ENDPOINT_ENVIRONMENT, /* Fallback environment variables */ + ENDPOINT_PROC_CMDLINE, /* Acquired via kernel command line */ + ENDPOINT_UDEV, /* Acquired via udev SMBIOS object */ + _ENDPOINT_SOURCE_MAX, + _ENDPOINT_SOURCE_INVALID = -EINVAL, +} EndpointSource; + +static char *arg_ifname = NULL; +static usec_t arg_refresh_usec = REFRESH_USEC_DEFAULT; +static uint32_t arg_fwmark = FWMARK_DEFAULT; +static bool arg_fwmark_set = true; +static ImdsWellKnown arg_well_known = _IMDS_WELL_KNOWN_INVALID; +static char* arg_key = NULL; +static bool arg_cache = true; +static bool arg_wait = false; +static bool arg_varlink = false; +static ImdsNetworkMode arg_network_mode = _IMDS_NETWORK_MODE_INVALID; +static bool arg_setup_network = false; + +/* The follow configure the IMDS service endpoint details */ +static EndpointSource arg_endpoint_source = _ENDPOINT_SOURCE_INVALID; +static char *arg_vendor = NULL; +static char *arg_token_url = NULL; +static char *arg_refresh_header_name = NULL; +static char *arg_data_url = NULL; +static char *arg_data_url_suffix = NULL; +static char *arg_token_header_name = NULL; +static char **arg_extra_header = NULL; +static struct in_addr arg_address_ipv4 = {}; +static struct in6_addr arg_address_ipv6 = {}; +static char *arg_well_known_key[_IMDS_WELL_KNOWN_MAX] = {}; + +static void imds_well_known_key_free(typeof(arg_well_known_key) *array) { + FOREACH_ARRAY(i, *array, _IMDS_WELL_KNOWN_MAX) + free(*i); +} + +STATIC_DESTRUCTOR_REGISTER(arg_ifname, freep); +STATIC_DESTRUCTOR_REGISTER(arg_key, freep); +STATIC_DESTRUCTOR_REGISTER(arg_vendor, freep); +STATIC_DESTRUCTOR_REGISTER(arg_token_url, freep); +STATIC_DESTRUCTOR_REGISTER(arg_refresh_header_name, freep); +STATIC_DESTRUCTOR_REGISTER(arg_data_url, freep); +STATIC_DESTRUCTOR_REGISTER(arg_data_url_suffix, freep); +STATIC_DESTRUCTOR_REGISTER(arg_token_header_name, freep); +STATIC_DESTRUCTOR_REGISTER(arg_extra_header, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_well_known_key, imds_well_known_key_free); + +typedef struct Context Context; + +typedef struct ChildData { + /* If there are multiple network interfaces, and we are not sure where to look for things, we'll fork + * additional instances of ourselves, one for each interface. */ + Context *context; + int ifindex; + sd_varlink *link; /* outing varlink connection towards the child */ + bool retry; /* If true then new information came to light and we should restart the request */ +} ChildData; + +struct Context { + /* Fields shared between requests (these remain allocated between Varlink requests) */ + sd_event *event; + sd_netlink *rtnl; + bool rtnl_attached; + sd_bus *system_bus; /* for polkit */ + CurlGlue *glue; + struct iovec token; /* token in binary */ + char *token_string; /* token as string, once complete and validated */ + int cache_dir_fd; + Hashmap *polkit_registry; + + /* Request-specific fields (these get reset whenever we start processing a new Varlink call) */ + int ifindex; + usec_t timestamp; /* CLOCK_BOOTTIME */ + int cache_fd; + char *cache_filename, *cache_temporary_filename; + uint64_t data_size; + usec_t refresh_usec; + char *key; + ImdsWellKnown well_known; + bool write_stdout; + struct iovec write_iovec; + bool cache; + bool wait; + sd_varlink *current_link; /* incoming varlink connection we are processing */ + uint32_t fwmark; + bool fwmark_set; + sd_event_source *overall_timeout_source; + + /* Mode 1 "direct": we go directly to the network (this is done if we know the interface index to + * use) */ + CURL *curl_token; + CURL *curl_data; + struct curl_slist *request_header_token, *request_header_data; + sd_event_source *retry_source; + unsigned n_retry; + usec_t retry_interval_usec; + + /* Mode 2 "indirect": we fork off a number of children which go to the network on behalf of us, + * because we have multiple network interfaces to deal with. */ + Hashmap *child_data; + sd_netlink_slot *address_change_slot; +}; + +#define CONTEXT_NULL \ + (Context) { \ + .cache_dir_fd = -EBADF, \ + .cache_fd = -EBADF, \ + .well_known = _IMDS_WELL_KNOWN_INVALID, \ + } + +/* Log helpers that cap at debug logging if we are are operating on behalf of a Varlink client */ +#define context_log_errno(c, level, r, fmt, ...) \ + log_full_errno((c)->current_link ? LOG_DEBUG : (level), r, fmt, ##__VA_ARGS__) +#define context_log(c, level, fmt, ...) \ + log_full((c)->current_link ? LOG_DEBUG : (level), fmt, ##__VA_ARGS__) +#define context_log_oom(c) \ + (c)->current_link ? log_oom_debug() : log_oom() + +static int context_acquire_data(Context *c); +static int context_acquire_token(Context *c); +static int context_spawn_child(Context *c, int ifindex, sd_varlink **ret); + +static ChildData* child_data_free(ChildData *cd) { + if (!cd) + return NULL; + + if (cd->context) + hashmap_remove(cd->context->child_data, INT_TO_PTR(cd->ifindex)); + + sd_varlink_close_unref(cd->link); + return mfree(cd); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(ChildData*, child_data_free); + +static void context_reset_token(Context *c) { + assert(c); + + iovec_done(&c->token); + c->token_string = mfree(c->token_string); +} + +static void context_flush_token(Context *c) { + + if (c->cache_dir_fd >= 0) + (void) unlinkat(c->cache_dir_fd, "token", /* flags= */ 0); + + context_reset_token(c); +} + +static void context_reset_for_refresh(Context *c) { + assert(c); + + /* Flush out all fields, up to the point we can restart the current request */ + + if (c->curl_token) { + curl_glue_remove_and_free(c->glue, c->curl_token); + c->curl_token = NULL; + } + + if (c->curl_data) { + curl_glue_remove_and_free(c->glue, c->curl_data); + c->curl_data = NULL; + } + + curl_slist_free_all(c->request_header_token); + c->request_header_token = NULL; + curl_slist_free_all(c->request_header_data); + c->request_header_data = NULL; + + c->cache_fd = safe_close(c->cache_fd); + c->cache_filename = mfree(c->cache_filename); + + if (c->cache_temporary_filename && c->cache_dir_fd >= 0) + (void) unlinkat(c->cache_dir_fd, c->cache_temporary_filename, /* flags= */ 0); + + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + iovec_done(&c->write_iovec); + + c->child_data = hashmap_free(c->child_data); + c->data_size = 0; + + sd_event_source_set_enabled(c->retry_source, SD_EVENT_OFF); +} + +static void context_reset_full(Context *c) { + assert(c); + + /* Flush out all fields relevant to the current request, comprehensively */ + + context_reset_for_refresh(c); + c->key = mfree(c->key); + c->well_known = _IMDS_WELL_KNOWN_INVALID; + c->current_link = sd_varlink_unref(c->current_link); + c->address_change_slot = sd_netlink_slot_unref(c->address_change_slot); + c->retry_source = sd_event_source_unref(c->retry_source); + c->overall_timeout_source = sd_event_source_unref(c->overall_timeout_source); + c->cache_dir_fd = safe_close(c->cache_dir_fd); +} + +static void context_new_request(Context *c) { + assert(c); + + /* Flush everything out from the previous request */ + context_reset_full(c); + + /* Reinitialize settings from defaults. */ + c->ifindex = 0; + c->timestamp = now(CLOCK_BOOTTIME); + c->refresh_usec = arg_refresh_usec; + c->cache = arg_cache; + c->wait = arg_wait; + c->fwmark = arg_fwmark; + c->fwmark_set = arg_fwmark_set; + c->n_retry = 0; +} + +static void context_done(Context *c) { + assert(c); + + /* Flush out everything specific to the current request first */ + context_reset_full(c); + context_reset_token(c); + + /* And then also flush out everything shared between requests */ + c->glue = curl_glue_unref(c->glue); + c->rtnl = sd_netlink_unref(c->rtnl); + c->event = sd_event_unref(c->event); + c->polkit_registry = hashmap_free(c->polkit_registry); + c->system_bus = sd_bus_flush_close_unref(c->system_bus); +} + +static void context_fail_full(Context *c, int r, const char *varlink_error) { + assert(c); + assert(r != 0); + + /* Called whenever the current retrieval fails asynchronously */ + + r = -abs(r); + + if (varlink_error) + context_log_errno(c, LOG_ERR, r, "Operation failed (%s).", varlink_error); + else + context_log_errno(c, LOG_ERR, r, "Operation failed (%m)."); + + /* If we are running in Varlink mode, return the error on the connection */ + if (c->current_link) { + if (varlink_error) + (void) sd_varlink_error(c->current_link, varlink_error, NULL); + else + (void) sd_varlink_error_errno(c->current_link, r); + } else + /* Otherwise terminate the whole process. */ + sd_event_exit(c->event, r); + + context_reset_full(c); +} + +static void context_fail(Context *c, int r) { + context_fail_full(c, r, /* varlink_error= */ NULL); +} + +static void context_success(Context *c) { + int r; + + assert(c); + + /* Called whenever the current retrieval succeeds asynchronously */ + + context_log(c, LOG_DEBUG, "Operation succeeded."); + + if (c->current_link) { + r = sd_varlink_replybo( + c->current_link, + JSON_BUILD_PAIR_IOVEC_BASE64("data", &c->write_iovec), + SD_JSON_BUILD_PAIR_CONDITION(c->ifindex > 0, "interface", SD_JSON_BUILD_INTEGER(c->ifindex))); + if (r < 0) + context_log_errno(c, LOG_WARNING, r, "Failed to reply to Varlink call, ignoring: %m"); + } else + sd_event_exit(c->event, 0); + + context_reset_full(c); +} + +static int setsockopt_callback(void *userdata, curl_socket_t curlfd, curlsocktype purpose) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(curlfd >= 0); + + if (purpose != CURLSOCKTYPE_IPCXN) + return CURL_SOCKOPT_OK; + + r = socket_set_unicast_if(curlfd, AF_UNSPEC, c->ifindex); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to bind HTTP socket to interface: %m")); + return CURL_SOCKOPT_ERROR; + } + + if (c->fwmark_set && + setsockopt(curlfd, SOL_SOCKET, SO_MARK, &c->fwmark, sizeof(c->fwmark)) < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, errno, "Failed to set firewall mark on HTTP socket: %m")); + return CURL_SOCKOPT_ERROR; + } + + return CURL_SOCKOPT_OK; +} + +static int context_combine_key(Context *c, char **ret) { + assert(ret); + + /* Combines the well known key with the explicitly configured key */ + + char *s; + if (c->well_known < 0 || c->well_known == IMDS_BASE) { + if (!c->key) + return -ENODATA; + + s = strdup(c->key); + } else { + const char *wk = arg_well_known_key[c->well_known]; + if (!wk) + return -ENODATA; + if (c->key) + s = strjoin(wk, c->key); + else + s = strdup(wk); + } + if (!s) + return -ENOMEM; + + *ret = TAKE_PTR(s); + return 0; +} + +static const char *context_get_runtime_directory(Context *c) { + assert(c); + + /* Returns the discovered runtime directory, but only if caching is enabled. */ + + if (!c->cache) { + context_log(c, LOG_DEBUG, "Cache disabled."); + return NULL; + } + + const char *e = secure_getenv("RUNTIME_DIRECTORY"); + if (!e) { + context_log(c, LOG_DEBUG, "Not using cache as $RUNTIME_DIRECTORY is not set."); + return NULL; + } + + return e; +} + +static int context_save_ifname(Context *c) { + int r; + + assert(c); + + /* Saves the used interface name for later retrievals, so that we don't have to wildcard search on + * all interfaces anymore. */ + + if (c->ifindex <= 0) + return 0; + + const char *d = context_get_runtime_directory(c); + if (!d) + return 0; + + _cleanup_close_ int dirfd = open(d, O_PATH|O_CLOEXEC); + if (dirfd < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to open runtime directory: %m"); + + _cleanup_free_ char *ifname = NULL; + r = rtnl_get_ifname_full(&c->rtnl, c->ifindex, &ifname, /* ret_altnames= */ NULL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to resolve interface index %i: %m", c->ifindex); + + r = write_string_file_at(dirfd, "ifname", ifname, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write 'ifname' file: %m"); + + return 1; +} + +typedef enum CacheResult { + CACHE_RESULT_DISABLED, /* caching is disabled */ + CACHE_RESULT_HIT, /* found a positive entry */ + CACHE_RESULT_MISS, /* did not find an entry */ + CACHE_RESULT_KEY_NOT_FOUND, /* found a negative entry */ + CACHE_RESULT_NOT_CACHEABLE, /* not suitable for caching */ + _CACHE_RESULT_MAX, + _CACHE_RESULT_INVALID = -EINVAL, + _CACHE_RESULT_ERRNO_MAX = -ERRNO_MAX, +} CacheResult; + +static CacheResult context_process_cache(Context *c) { + int r; + + assert(c); + + assert(c->key || c->well_known >= 0); + assert(c->cache_fd < 0); + assert(c->cache_dir_fd < 0); + assert(!c->cache_filename); + assert(!c->cache_temporary_filename); + + /* Checks the local cache – if we have one – for the current request */ + + const char *e = context_get_runtime_directory(c); + if (!e) + return CACHE_RESULT_DISABLED; + + char ifname[IF_NAMESIZE]; + r = format_ifname(c->ifindex, ifname); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to format interface name: %m"); + + if (!filename_is_valid(ifname)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Network interface name '%s' is not a valid filename, refusing.", ifname); + + _cleanup_free_ char *cache_dir = path_join("cache", ifname); + if (!cache_dir) + return context_log_oom(c); + + r = chase(cache_dir, + e, + CHASE_PROHIBIT_SYMLINKS|CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY|CHASE_PREFIX_ROOT, + /* ret_path= */ NULL, + &c->cache_dir_fd); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to open cache directory: %m"); + + _cleanup_free_ char *k = NULL; + r = context_combine_key(c, &k); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine IMDS key: %m"); + + _cleanup_free_ char *escaped = xescape(k, "/."); + if (!escaped) + return context_log_oom(c); + + _cleanup_free_ char *fn = strjoin("key-", escaped); + if (!fn) + return context_log_oom(c); + + if (!filename_is_valid(fn)) { + context_log(c, LOG_WARNING, "Cache filename for '%s' is not valid, not caching.", fn); + return CACHE_RESULT_NOT_CACHEABLE; + } + + c->cache_filename = TAKE_PTR(fn); + + _cleanup_close_ int fd = openat(c->cache_dir_fd, c->cache_filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return context_log_errno(c, LOG_ERR, errno, "Failed to open cache file '%s': %m", c->cache_filename); + } else { + _cleanup_free_ char *d = NULL; + size_t l; + + context_log(c, LOG_DEBUG, "Found cached file '%s'.", c->cache_filename); + + r = fgetxattr_malloc(fd, "user.imds.timestamp", &d, &l); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read timestamp from cache file: %m"); + if (l != sizeof(usec_t)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EBADMSG), "Invalid timestamp xattr on cache file '%s': %m", c->cache_filename); + + usec_t *u = (usec_t*) d; + if (usec_add(*u, c->refresh_usec) > c->timestamp) { + _cleanup_free_ char *result = NULL; + r = fgetxattr_malloc(fd, "user.imds.result", &result, /* ret_size= */ NULL); + if (r == -ENODATA) { + /* No user.imds.result xattr means: hit! */ + if (c->write_stdout) { + r = copy_bytes(fd, STDOUT_FILENO, /* max_bytes= */ UINT64_MAX, /* copy_flags= */ 0); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write cached data to standard output: %m"); + } else { + assert(!iovec_is_set(&c->write_iovec)); + r = read_full_file_at(fd, /* filename= */ NULL, (char**) &c->write_iovec.iov_base, &c->write_iovec.iov_len); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read cache data: %m"); + } + + return CACHE_RESULT_HIT; + } + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read 'user.imds.result' extended attribute: %m"); + + if (streq(result, "key-not-found")) + return CACHE_RESULT_KEY_NOT_FOUND; + + context_log(c, LOG_WARNING, "Unexpected 'user.imds.result' extended attribute value, ignoring: %s", result); + (void) unlinkat(c->cache_dir_fd, c->cache_filename, /* flags= */ 0); + } else { + context_log(c, LOG_DEBUG, "Cached data is older than '%s', ignoring.", FORMAT_TIMESPAN(c->refresh_usec, 0)); + (void) unlinkat(c->cache_dir_fd, c->cache_filename, /* flags= */ 0); + } + } + + /* So the above was not conclusive, let's then at least try to reuse the token */ + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + r = sd_json_parse_file_at(/* f= */ NULL, c->cache_dir_fd, "token", /* flags= */ 0, &j, /* reterr_line= */ NULL, /* reterr_column= */ NULL); + if (r == -ENOENT) { + context_log_errno(c, LOG_DEBUG, r, "No cached token"); + return CACHE_RESULT_MISS; + } + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to read cached token: %m"); + + struct { + const char *token; + uint64_t until; + } d = {}; + + static const sd_json_dispatch_field table[] = { + { "token", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, token), SD_JSON_MANDATORY }, + { "validUntilUSec", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(d, until), SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(j, table, SD_JSON_ALLOW_EXTENSIONS, &d); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to decode cached token data: %m"); + + if (d.until > c->timestamp) { + c->token_string = strdup(d.token); + if (!c->token_string) + return context_log_oom(c); + + context_log(c, LOG_INFO, "Reusing cached token."); + } else + context_log(c, LOG_DEBUG, "Cached token is stale, not using."); + + return CACHE_RESULT_MISS; +} + +static int on_retry(sd_event_source *s, uint64_t usec, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(s); + + /* Invoked whenever the retry timer event elapses and we need to retry again */ + + context_log(c, LOG_DEBUG, "Retrying..."); + + /* Maybe some other instance was successful in the menatime and already found something? */ + CacheResult cr = context_process_cache(c); + if (cr < 0) { + context_fail(c, cr); + return 0; + } + if (cr == CACHE_RESULT_HIT) { + context_success(c); + return 0; + } + if (cr == CACHE_RESULT_KEY_NOT_FOUND) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found")); + return 0; + } + + r = context_acquire_token(c); + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_acquire_data(c); + if (r < 0) + context_fail(c, r); + + return 0; +} + +static int context_schedule_retry(Context *c) { + int r; + + assert(c); + + /* Schedules a new retry via a timer event */ + + if (c->n_retry >= RETRY_MAX) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EUCLEAN), "Retry limits reached, refusing."); + + if (c->n_retry == 0) + c->retry_interval_usec = RETRY_MIN_USEC; + else if (c->retry_interval_usec < RETRY_MAX_USEC / 2) + c->retry_interval_usec *= 2; + else + c->retry_interval_usec = RETRY_MAX_USEC; + + c->n_retry++; + context_log(c, LOG_DEBUG, "Retry attempt #%u in %s...", c->n_retry, FORMAT_TIMESPAN(c->retry_interval_usec, USEC_PER_MSEC)); + + context_reset_for_refresh(c); + + r = event_reset_time_relative( + c->event, + &c->retry_source, + CLOCK_BOOTTIME, + c->retry_interval_usec, + /* accuracy= */ 0, + on_retry, + c, + /* priority= */ 0, + "imds-retry", + /* force_reset= */ true); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to reset retry timer event source: %m"); + + return 0; +} + +static int context_acquire_http_status(Context *c, CURL *curl, long *ret_status) { + assert(c); + assert(ret_status); + + /* Acquires the HTTP status code, and does some generic validation that applies to both the token and + * the data transfer. + * + * Error handling as per: + * https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-returns + * https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service#rate-limiting + */ + + long status; + CURLcode code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status); + if (code != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", curl_easy_strerror(code)); + + context_log(c, LOG_DEBUG, "Got HTTP error code %li.", status); + + if (status == 403) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EADDRNOTAVAIL), "IMDS is not available"); + + /* Automatically retry on some transient errors from HTTP */ + if (IN_SET(status, + 503, /* AWS + GCP */ + 429 /* Azure + GCP */)) + return context_schedule_retry(c); + + if (status < 200 || status > 600) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request finished with unexpected code %li.", status); + + *ret_status = status; + return 0; +} + +static int context_validate_token_http_status(Context *c, long status) { + assert(c); + + /* Specific HTTP status checks for the token transfer */ + + if (status >= 300) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request for token finished with unexpected code %li.", status); + + return 0; +} + +static int context_validate_data_http_status(Context *c, long status) { + int r; + + assert(c); + + /* Specific HTTP status checks for the data transfer */ + + if (status == 401 && arg_token_url) { + /* We need a new a new token */ + context_log(c, LOG_DEBUG, "Server requested a new token..."); + + /* Count token requests as a retry */ + if (c->n_retry >= RETRY_MAX) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EUCLEAN), "Retry limits reached, refusing."); + c->n_retry++; + + context_flush_token(c); + context_reset_for_refresh(c); + + r = context_acquire_token(c); + if (r < 0) + return r; + + return context_acquire_data(c); + } + + if (status == 404) { + _cleanup_free_ char *key = NULL; + r = context_combine_key(c, &key); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine IMDS key: %m"); + + /* Do negative caching for not found */ + if (c->cache_fd >= 0) { + if (fsetxattr(c->cache_fd, "user.imds.result", "key-not-found", STRLEN("key-not-found"), /* flags= */ 0) < 0) + context_log_errno(c, LOG_DEBUG, errno, "Failed to set result xattr on '%s', ignoring: %m", c->cache_filename); + else { + r = link_tmpfile_at(c->cache_fd, c->cache_dir_fd, c->cache_temporary_filename, c->cache_filename, LINK_TMPFILE_REPLACE); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to move cache file into place: %m"); + + c->cache_fd = safe_close(c->cache_fd); + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + context_log(c, LOG_DEBUG, "Cached negative entry for '%s'.", key); + } + } + + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Key '%s' not found.", key); + } + + if (status >= 300) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "HTTP request for data finished with unexpected code %li.", status); + + return 0; +} + +static int context_validate_token(Context *c) { + int r; + + assert(c); + + /* Validates that the downloaded token data actually forms a valid string */ + + _cleanup_free_ char *t = NULL; + r = make_cstring( + c->token.iov_base, + c->token.iov_len, + MAKE_CSTRING_REFUSE_TRAILING_NUL, + &t); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to convert token into C string: %m"); + + if (string_has_cc(t, NULL) || + !utf8_is_valid(t)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Token not valid UTF-8 or contains control characters, refusing."); + + free_and_replace(c->token_string, t); + return 0; +} + +static int context_save_token(Context *c) { + int r; + + assert(c); + assert(c->token_string); + + /* Save the acquired token in the cache, so that we can reuse it later */ + + if (c->cache_dir_fd < 0) + return 0; + + /* Only store half the valid time, to make sure we have ample time to use it */ + usec_t until = usec_add(c->timestamp, c->refresh_usec/2); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + r = sd_json_buildo( + &j, + SD_JSON_BUILD_PAIR_STRING("token", c->token_string), + SD_JSON_BUILD_PAIR_UNSIGNED("validUntilUSec", until)); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to build token JSON: %m"); + + _cleanup_free_ char *t = NULL; + r = sd_json_variant_format(j, SD_JSON_FORMAT_NEWLINE, &t); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to format JSON: %m"); + + r = write_string_file_at(c->cache_dir_fd, "token", t, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MODE_0600); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to write token cache file: %m"); + + return 0; +} + +static int context_save_data(Context *c) { + int r; + + assert(c); + + /* Finalize saving of the acquired data in the cache */ + + if (c->cache_fd < 0) + return 0; + + r = link_tmpfile_at(c->cache_fd, c->cache_dir_fd, c->cache_temporary_filename, c->cache_filename, LINK_TMPFILE_REPLACE); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to move cache file into place: %m"); + + c->cache_fd = safe_close(c->cache_fd); + c->cache_temporary_filename = mfree(c->cache_temporary_filename); + + context_log(c, LOG_DEBUG, "Cached data."); + return 0; +} + +static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { + int r; + + assert(g); + + /* Called whenever libcurl did its thing and reports a download being complete or having failed */ + + Context *c = NULL; + if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char**) &c) != CURLE_OK) + return; + + switch (result) { + + case CURLE_OK: /* yay! */ + break; + + case CURLE_WRITE_ERROR: + /* CURLE_WRITE_ERROR we'll see if the data callbacks failed already. But if they did, they + * already cancelled the whole operation, and we don't have to do anything here */ + return; + + case CURLE_COULDNT_CONNECT: + case CURLE_OPERATION_TIMEDOUT: + case CURLE_GOT_NOTHING: + case CURLE_SEND_ERROR: + case CURLE_RECV_ERROR: + context_log(c, LOG_INFO, "Connection error from curl: %s", curl_easy_strerror(result)); + + /* Automatically retry on some transient errors from curl itself */ + r = context_schedule_retry(c); + if (r < 0) + return context_fail(c, r); + + return; + + default: + return context_fail_full( + c, + context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EHOSTDOWN), "Transfer failed: %s", curl_easy_strerror(result)), + "io.systemd.InstanceMetadata.CommunicationFailure"); + } + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + long status; + r = context_acquire_http_status(c, curl, &status); + if (r == -EADDRNOTAVAIL) + return context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + if (r < 0) + return context_fail(c, r); + + if (curl == c->curl_token) { + r = context_validate_token_http_status(c, status); + if (r < 0) + return context_fail(c, r); + + r = context_validate_token(c); + if (r < 0) + return context_fail(c, r); + + context_log(c, LOG_DEBUG, "Token successfully acquired: %s", c->token_string); + + r = context_save_token(c); + if (r < 0) + return context_fail(c, r); + + r = context_acquire_data(c); + if (r < 0) + return context_fail(c, r); + + } else if (curl == c->curl_data) { + + r = context_validate_data_http_status(c, status); + if (r == -ENOENT) + return context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + if (r < 0) + return context_fail(c, r); + + context_log(c, LOG_DEBUG, "Data download successful."); + + r = context_save_data(c); + if (r < 0) + return context_fail(c, r); + + context_success(c); + } else + assert_not_reached(); +} + +static int context_acquire_glue(Context *c) { + int r; + + assert(c); + + /* Allocates a curl object if we don't have one yet */ + + if (c->glue) + return 0; + + r = curl_glue_new(&c->glue, c->event); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to allocate curl glue: %m"); + + c->glue->on_finished = curl_glue_on_finished; + c->glue->userdata = c; + + return 0; +} + +static size_t data_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) { + Context *c = ASSERT_PTR(userdata); + size_t sz = size * nmemb; + int r; + + /* Called whenever we receive new payload from the server */ + assert(contents); + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + /* Before we use acquired data, let's verify the HTTP status */ + long status; + r = context_acquire_http_status(c, c->curl_data, &status); + if (r == -EADDRNOTAVAIL) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_validate_data_http_status(c, status); + if (r == -ENOENT) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + if (sz > UINT64_MAX - c->data_size || + c->data_size + sz > DATA_SIZE_MAX) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(E2BIG), "Data too large, refusing.")); + return 0; + } + + c->data_size += sz; + + if (c->write_stdout) + fwrite(contents, sz, nmemb, stdout); + else if (!iovec_append(&c->write_iovec, &IOVEC_MAKE(contents, sz))) { + context_fail(c, context_log_oom(c)); + return 0; + } + + if (c->cache_fd >= 0) { + r = loop_write(c->cache_fd, contents, sz); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to write data to cache: %m")); + return 0; + } + } + + return sz; +} + +static int context_acquire_data(Context *c) { + int r; + + assert(c); + assert(c->key || c->well_known >= 0); + + /* Called to initiate getting the actual IMDS key payload */ + + if (arg_token_url && !c->token_string) + return 0; /* If we need a token first, let's not do anything */ + + _cleanup_free_ char *k = NULL; + r = context_combine_key(c, &k); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to combine key: %m"); + + context_log(c, LOG_INFO, "Requesting data for key '%s'.", k); + + if (c->cache_dir_fd >= 0 && + c->cache_filename && + c->cache_fd < 0) { + c->cache_fd = open_tmpfile_linkable_at(c->cache_dir_fd, c->cache_filename, O_WRONLY|O_CLOEXEC, &c->cache_temporary_filename); + if (c->cache_fd < 0) + return context_log_errno(c, LOG_ERR, c->cache_fd, "Failed to create cache file '%s': %m", c->cache_filename); + + if (fchmod(c->cache_fd, 0600) < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to adjust cache node access mode: %m"); + + if (fsetxattr(c->cache_fd, "user.imds.timestamp", &c->timestamp, sizeof(c->timestamp), /* flags= */ 0) < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to set timestamp xattr on '%s': %m", c->cache_filename); + } + + r = context_acquire_glue(c); + if (r < 0) + return r; + + _cleanup_free_ char *url = strjoin(arg_data_url, k, arg_data_url_suffix); + if (!url) + return context_log_oom(c); + + r = curl_glue_make(&c->curl_data, url, c); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for data: %m"); + + if (c->token_string) { + _cleanup_free_ char *token_header = strjoin(arg_token_header_name, ": ", c->token_string); + if (!token_header) + return context_log_oom(c); + + struct curl_slist *n = curl_slist_append(c->request_header_data, token_header); + if (!n) + return context_log_oom(c); + + c->request_header_data = n; + } + + STRV_FOREACH(i, arg_extra_header) { + struct curl_slist *n = curl_slist_append(c->request_header_data, *i); + if (!n) + return context_log_oom(c); + + c->request_header_data = n; + } + + if (c->request_header_data) + if (curl_easy_setopt(c->curl_data, CURLOPT_HTTPHEADER, c->request_header_data) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_WRITEFUNCTION, data_write_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_WRITEDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion userdata."); + + if (curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORT, 1L) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port"); + + if (curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORTRANGE, 1023L) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port range"); + + r = curl_glue_add(c->glue, c->curl_data); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + + return 0; +} + +static size_t token_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) { + Context *c = ASSERT_PTR(userdata); + size_t sz = size * nmemb; + int r; + + /* Called whenever we get data from the token download */ + assert(contents); + + /* If we managed to get a HTTP reply, this is good enough, let's pin the interface now for later calls */ + (void) context_save_ifname(c); + + /* Before we use acquired data, let's verify the HTTP status */ + long status; + r = context_acquire_http_status(c, c->curl_token, &status); + if (r == -EADDRNOTAVAIL) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.NotAvailable"); + return 0; + } + if (r < 0) { + context_fail(c, r); + return 0; + } + + r = context_validate_token_http_status(c, status); + if (r < 0) { + context_fail(c, r); + return 0; + } + + if (sz > SIZE_MAX - c->token.iov_len || + c->token.iov_len + sz > TOKEN_SIZE_MAX) { + context_fail(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(E2BIG), "IMDS token too large.")); + return 0; + } + + if (!iovec_append(&c->token, &IOVEC_MAKE(contents, sz))) { + context_fail(c, context_log_oom(c)); + return 0; + } + + return sz; +} + +static int context_acquire_token(Context *c) { + int r; + + assert(c); + + /* Called to initiate getting the token if we need one. */ + + if (c->token_string || !arg_token_url) + return 0; + + context_log(c, LOG_INFO, "Requesting token."); + + r = context_acquire_glue(c); + if (r < 0) + return r; + + r = curl_glue_make(&c->curl_token, arg_token_url, c); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for API token: %m"); + + if (arg_refresh_header_name) { + _cleanup_free_ char *ttl_header = NULL; + if (asprintf(&ttl_header, + "%s: %" PRIu64, + arg_refresh_header_name, + DIV_ROUND_UP(c->refresh_usec, USEC_PER_SEC)) < 0) + return context_log_oom(c); + + c->request_header_token = curl_slist_new(ttl_header, NULL); + if (!c->request_header_token) + return context_log_oom(c); + } + + if (curl_easy_setopt(c->curl_token, CURLOPT_HTTPHEADER, c->request_header_token) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_CUSTOMREQUEST, "PUT") != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request method."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_WRITEFUNCTION, token_write_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_WRITEDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion."); + + if (curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt funcion userdata."); + + r = curl_glue_add(c->glue, c->curl_token); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + + return 0; +} + +static int vl_on_reply(sd_varlink *link, sd_json_variant *m, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { + ChildData *cd = ASSERT_PTR(userdata); + Context *c = ASSERT_PTR(cd->context); + int r; + + assert(link); + assert(m); + + /* When we spawned off worker instances of ourselves (one for each local network interface), then + * we'll get a response from them via a Varlink reply. Handle it. */ + + if (error_id) { + r = sd_varlink_error_to_errno(error_id, m); + if (r == -EBADR) + context_log_errno(c, LOG_WARNING, r, "Varlink error from interface %i: %s", cd->ifindex, error_id); + else + context_log_errno(c, LOG_WARNING, r, "Varlink error from interface %i: %m", cd->ifindex); + + /* Propagate these errors immediately */ + if (streq(error_id, "io.systemd.InstanceMetadata.KeyNotFound")) { + context_fail_full(c, -ENOENT, error_id); + return 0; + } + if (streq(error_id, "io.systemd.InstanceMetadata.WellKnownKeyUnset")) { + context_fail_full(c, -ENODATA, error_id); + return 0; + } + if (streq(error_id, "io.systemd.InstanceMetadata.NotAvailable")) { + context_fail_full(c, -EADDRNOTAVAIL, error_id); + return 0; + } + + /* The other errors we consider transient. Let's see if we shall immediately restart the request. */ + if (cd->retry) { + context_log(c, LOG_DEBUG, "Child for network interface %i was scheduled for immediate retry, executing now.", cd->ifindex); + cd->link = sd_varlink_close_unref(cd->link); + cd->retry = false; + + r = context_spawn_child(c, cd->ifindex, &cd->link); + if (r < 0) { + context_fail(c, r); + return 0; + } + + sd_varlink_set_userdata(cd->link, cd); + return 0; + } + + /* We shall no retry immediately. In that case, we give up on the child, and propagate the + * error if it was the last child, otherwise we continue until the last one dies too. */ + cd = child_data_free(cd); + + if (hashmap_isempty(c->child_data) && !c->wait) { + /* This is the last child, propagate the error */ + context_log(c, LOG_DEBUG, "Last child failed, propagating error."); + + if (streq(error_id, "io.systemd.InstanceMetadata.CommunicationFailure")) + context_fail_full(c, -EHOSTDOWN, error_id); + else if (streq(error_id, "io.systemd.InstanceMetadata.Timeout")) + context_fail_full(c, -ETIMEDOUT, error_id); + else + context_fail_full(c, r, error_id); + + return 0; + } + + context_log(c, LOG_DEBUG, "Pending children remaining, continuing to wait."); + return 0; + } + + assert(!iovec_is_set(&c->write_iovec)); + + static const sd_json_dispatch_field table[] = { + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(Context, write_iovec), SD_JSON_MANDATORY }, + { "ifindex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(Context, ifindex), 0 }, + {} + }; + + r = sd_json_dispatch(m, table, SD_JSON_ALLOW_EXTENSIONS, c); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to decode reply data: %m")); + return 0; + } + + if (c->write_stdout) { + r = loop_write(STDOUT_FILENO, c->write_iovec.iov_base, c->write_iovec.iov_len); + if (r < 0) { + context_fail(c, context_log_errno(c, LOG_ERR, r, "Failed to output data: %m")); + return 0; + } + } + + context_success(c); + return 0; +} + +static int context_load_ifname(Context *c) { + int r; + + assert(c); + + /* Tries to load the previously used interface name, so that we don't have to wildcard search on all + * interfaces. */ + + const char *e = context_get_runtime_directory(c); + if (!e) + return 0; + + _cleanup_close_ int dirfd = open(e, O_PATH|O_CLOEXEC); + if (dirfd < 0) + return context_log_errno(c, LOG_ERR, errno, "Failed to open runtime directory: %m"); + + _cleanup_free_ char *ifname = NULL; + r = read_one_line_file_at(dirfd, "ifname", &ifname); + if (r == -ENOENT) + return 0; + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to load 'ifname' file from runtime directory: %m"); + + if (!ifname_valid(ifname)) + return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EINVAL), "Loaded interface name not valid, refusing: %s", ifname); + + c->ifindex = rtnl_resolve_interface(&c->rtnl, ifname); + if (c->ifindex < 0) { + (void) unlinkat(dirfd, "ifname", /* flags= */ 0); + context_log_errno(c, LOG_ERR, c->ifindex, "Failed to resolve saved interface name '%s', assuming interface disappeared, ignoring: %m", ifname); + return c->ifindex; + } + + log_debug("Using previously pinned interface '%s' (ifindex: %i).", ifname, c->ifindex); + return 1; +} + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + child_data_hash_ops, + void, + trivial_hash_func, + trivial_compare_func, + ChildData, + child_data_free); + +static int context_spawn_child(Context *c, int ifindex, sd_varlink **ret) { + int r; + + assert(c); + assert(ifindex > 0); + assert(ret); + + /* If we don't know yet on which network interface the IMDS server can be found, let's spawn separate + * instances of ourselves, one for earch interface, and collect the results. We communicate with + * each one via Varlink, the same way as clients talk to us. */ + + context_log(c, LOG_DEBUG, "Spawning child for interface '%i'.", ifindex); + + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = pin_callout_binary(LIBEXECDIR "/systemd-imdsd", &p); + if (fd < 0) + return context_log_errno(c, LOG_ERR, fd, "Failed to find imdsd binary: %m"); + + _cleanup_strv_free_ char **argv = strv_new( + p, + "--vendor", strempty(arg_vendor), + "--token-url", strempty(arg_token_url), + "--refresh-header-name", strempty(arg_refresh_header_name), + "--data-url", strempty(arg_data_url), + "--data-url-suffix", strempty(arg_data_url_suffix), + "--token-header-name", strempty(arg_token_header_name), + "--address-ipv4", in4_addr_is_null(&arg_address_ipv4) ? "" : IN4_ADDR_TO_STRING(&arg_address_ipv4), + "--address-ipv6", in6_addr_is_null(&arg_address_ipv6) ? "" : IN6_ADDR_TO_STRING(&arg_address_ipv6)); + if (!argv) + return log_oom(); + + STRV_FOREACH(i, arg_extra_header) + if (strv_extend_strv(&argv, STRV_MAKE("--extra-header", *i), /* filter_duplicates= */ false) < 0) + return log_oom(); + + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) { + if (!arg_well_known_key[wk]) + continue; + + if (strv_extendf(&argv, "--well-known-key=%s:%s", imds_well_known_to_string(wk), arg_well_known_key[wk]) < 0) + return log_oom(); + } + + if (DEBUG_LOGGING) { + _cleanup_free_ char *cmdline = quote_command_line(argv, SHELL_ESCAPE_EMPTY); + log_debug("About to fork off: %s", strnull(cmdline)); + } + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_exec(&vl, p, argv); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to fork off imdsd binary for interface %i: %m", ifindex); + + r = sd_varlink_attach_event( + vl, + c->event, + SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to attach Varlink connection to event loop: %m"); + + r = sd_varlink_bind_reply(vl, vl_on_reply); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to bind reply callback: %m"); + + r = sd_varlink_invokebo( + vl, + "io.systemd.InstanceMetadata.Get", + JSON_BUILD_PAIR_STRING_NON_EMPTY("key", c->key), + SD_JSON_BUILD_PAIR_CONDITION(c->well_known >= 0, "wellKnown", JSON_BUILD_STRING_UNDERSCORIFY(imds_well_known_to_string(c->well_known))), + SD_JSON_BUILD_PAIR_INTEGER("interface", ifindex), + SD_JSON_BUILD_PAIR_INTEGER("refreshUSec", c->refresh_usec), + SD_JSON_BUILD_PAIR_BOOLEAN("cache", c->cache), + SD_JSON_BUILD_PAIR_CONDITION(c->fwmark_set, "firewallMark", SD_JSON_BUILD_UNSIGNED(c->fwmark)), + SD_JSON_BUILD_PAIR_CONDITION(!c->fwmark_set, "firewallMark", SD_JSON_BUILD_NULL)); /* explicitly turn of fwmark, if not set */ + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to issue Get() command to Varlink child: %m"); + + *ret = TAKE_PTR(vl); + return 0; +} + +static int context_spawn_new_child(Context *c, int ifindex) { + int r; + + assert(c); + + /* Spawn a child, and keep track of it */ + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = context_spawn_child(c, ifindex, &vl); + if (r < 0) + return r; + + _cleanup_(child_data_freep) ChildData *cd = new(ChildData, 1); + if (!cd) + return context_log_oom(c); + + *cd = (ChildData) { + .ifindex = ifindex, + .link = sd_varlink_ref(vl), + }; + + sd_varlink_set_userdata(vl, cd); + + if (hashmap_ensure_put(&c->child_data, &child_data_hash_ops, INT_TO_PTR(ifindex), cd) < 0) + return context_log_oom(c); + + cd->context = c; + TAKE_PTR(cd); + + return 0; +} + +static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int ifindex, r; + + assert(rtnl); + assert(m); + + /* Called whenever an address appears on the network stack. We use that as hint that it is worth to + * invoke a child processing that interface (either for the first time, or again) */ + + r = sd_rtnl_message_addr_get_ifindex(m, &ifindex); + if (r < 0) { + context_log_errno(c, LOG_WARNING, r, "rtnl: could not get ifindex from message, ignoring: %m"); + return 0; + } + if (ifindex <= 0) { + context_log(c, LOG_WARNING, "rtnl: received address message with invalid ifindex %d, ignoring.", ifindex); + return 0; + } + + if (ifindex == LOOPBACK_IFINDEX) { + context_log(c, LOG_DEBUG, "Ignoring loopback device."); + return 0; + } + + if (!c->key && c->well_known < 0) + return 0; + + ChildData *existing = hashmap_get(c->child_data, INT_TO_PTR(ifindex)); + if (existing) { + /* We already have an attempt ongoing for this one? Remember there's a reason now to retry + * this, because new connectivity appeared. */ + context_log(c, LOG_DEBUG, "Child for network interface %i already spawned off, scheduling for immediate retry.", ifindex); + existing->retry = true; + return 0; + } + + return context_spawn_new_child(c, ifindex); +} + +static int context_acquire_rtnl_with_match(Context *c) { + int r; + + assert(c); + assert(c->event); + + /* Acquire a netlink connection and a match if we don't have one yet */ + + if (!c->rtnl) { + r = sd_netlink_open(&c->rtnl); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to connect to netlink: %m"); + } + + if (!c->rtnl_attached) { + /* The netlink connection might have created previously via rtnl_resolve_interface() – which + * however didn't attach it to our event loop. Do so now. */ + r = sd_netlink_attach_event(c->rtnl, c->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to attach netlink socket to event loop: %m"); + + c->rtnl_attached = true; + } + + if (!c->address_change_slot) { + r = sd_netlink_add_match(c->rtnl, &c->address_change_slot, RTM_NEWADDR, on_address_change, /* destroy_callback= */ NULL, c, "newaddr"); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to subscribe to RTM_NEWADDR events: %m"); + } + + return 0; +} + +static int context_spawn_children(Context *c) { + int r; + + assert(c); + assert(c->key || c->well_known >= 0); + + /* If we we don't know yet on which interface to query, let's see which interfaces there are and + * spawwn ourselves, once on each */ + + r = context_acquire_rtnl_with_match(c); + if (r < 0) + return r; + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; + r = sd_rtnl_message_new_addr(c->rtnl, &req, RTM_GETADDR, /* ifindex= */ 0, AF_INET); + if (r < 0) + return r; + + r = sd_netlink_message_set_request_dump(req, true); + if (r < 0) + return r; + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *reply = NULL; + r = sd_netlink_call(c->rtnl, req, 0, &reply); + if (r < 0) + return r; + + for (sd_netlink_message *i = reply; i; i = sd_netlink_message_next(i)) { + r = on_address_change(c->rtnl, i, c); + if (r < 0) + return r; + } + + return 0; +} + +static int imds_configured(int level) { + /* Checks if we have enough endpoint information to operate */ + + if (arg_endpoint_source < 0) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "No IMDS endpoint information provided or detected, cannot operate."); + + if (!arg_data_url) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "No data base URL provided."); + + if (!!arg_token_url != !!arg_token_header_name) + return log_full_errno(level, SYNTHETIC_ERRNO(EOPNOTSUPP), "Incomplete token parameters configured for endpoint."); + + return 0; +} + +static int setup_network(void) { + int r; + + /* Generates a .network file based on the IMDS endpoint information we have */ + + if (arg_network_mode == IMDS_NETWORK_OFF) { + log_debug("IMDS networking turned off, not generating .network file."); + return 0; + } + + _cleanup_close_ int network_dir_fd = -EBADF; + r = chase("/run/systemd/network", + /* root= */ NULL, + CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY, + /* ret_path= */ NULL, + &network_dir_fd); + if (r < 0) + return log_error_errno(r, "Failed to open .network directory: %m"); + + _cleanup_free_ char *t = NULL; + _cleanup_fclose_ FILE *f = NULL; + r = fopen_tmpfile_linkable_at(network_dir_fd, "85-imds-early.network", O_WRONLY|O_CLOEXEC, &t, &f); + if (r < 0) + return log_error_errno(r, "Failed to create 85-imds-early.network file: %m"); + + CLEANUP_TMPFILE_AT(network_dir_fd, t); + + fputs("# Generated by systemd-imdsd, do not edit.\n" + "#\n" + "# This configures Ethernet devices on cloud hosts that support IMDS, given that\n" + "# before doing IMDS we need to activate the network.\n", f); + + if (arg_network_mode != IMDS_NETWORK_UNLOCKED && + (in4_addr_is_set(&arg_address_ipv4) || in6_addr_is_set(&arg_address_ipv6))) + fputs("#\n" + "# Note: this will create a 'prohibit' route to the IMDS endpoint,\n" + "# blocking direct access to IMDS. Direct IMDS access is then only\n" + "# available to traffic marked with fwmark 0x7FFF0815, which can be\n" + "# set via SO_MARK and various other methods, which require\n" + "# privileges.\n", + f); + + fputs("\n" + "[Match]\n" + "Type=ether\n" + "Kind=!*\n" + "\n" + "[Network]\n" + "DHCP=yes\n" + "LinkLocalAddressing=ipv6\n" + "\n" + "[DHCP]\n" + "UseTimezone=yes\n" + "UseHostname=yes\n" + "UseMTU=yes\n", f); + + if (in4_addr_is_set(&arg_address_ipv4)) + fputs("\n" + "[Link]\n" + "RequiredFamilyForOnline=ipv4\n", f); + else if (in6_addr_is_set(&arg_address_ipv6)) + fputs("\n" + "[Link]\n" + "RequiredFamilyForOnline=ipv6\n", f); + + if (arg_network_mode != IMDS_NETWORK_UNLOCKED) { + if (in4_addr_is_set(&arg_address_ipv4)) + fprintf(f, + "\n" + "# Prohibit regular access to IMDS (IPv4)\n" + "[Route]\n" + "Destination=%s\n" + "Type=prohibit\n", + IN4_ADDR_TO_STRING(&arg_address_ipv4)); + + if (in6_addr_is_set(&arg_address_ipv6)) + fprintf(f, + "\n" + "# Prohibit regular access to IMDS (IPv6)\n" + "[Route]\n" + "Destination=%s\n" + "Type=prohibit\n", + IN6_ADDR_TO_STRING(&arg_address_ipv6)); + } + + if (in4_addr_is_set(&arg_address_ipv4)) + fprintf(f, + "\n" + "# Always allow IMDS access via a special routing table (IPv4)\n" + "[Route]\n" + "Destination=%s\n" + "Scope=link\n" + "Table=0x7FFF0815\n" + "\n" + "# Sockets marked with firewall mark 0x7FFF0815 get access to the IMDS route by\n" + "# using the 0x7FFF0815 table populated above.\n" + "[RoutingPolicyRule]\n" + "Family=ipv4\n" + "FirewallMark=0x7FFF0815\n" + "Table=0x7FFF0815\n", + IN4_ADDR_TO_STRING(&arg_address_ipv4)); + + if (in6_addr_is_set(&arg_address_ipv6)) + fprintf(f, + "\n" + "# Always allow IMDS access via a special routing table (IPv6)\n" + "[Route]\n" + "Destination=%s\n" + "Table=0x7FFF0815\n" + "\n" + "# Sockets marked with firewall mark 0x7FFF0815 get access to the IMDS route by\n" + "# using the 0x7FFF0815 table populated above.\n" + "[RoutingPolicyRule]\n" + "Family=ipv6\n" + "FirewallMark=0x7FFF0815\n" + "Table=0x7FFF0815\n", + IN6_ADDR_TO_STRING(&arg_address_ipv6)); + + if (fchmod(fileno(f), 0644) < 0) + return log_error_errno(errno, "Failed set access mode for 85-imds-early.network: %m"); + + r = flink_tmpfile_at(f, network_dir_fd, t, "85-imds-early.network", LINK_TMPFILE_REPLACE); + if (r < 0) + return log_error_errno(r, "Failed to move 85-imds-early.network into place: %m"); + + log_info("Created 85-imds-early.network."); + return 0; +} + +static int add_address_to_json_array(sd_json_variant **array, int family, const union in_addr_union *addr) { + int r; + + assert(array); + assert(IN_SET(family, AF_INET, AF_INET6)); + assert(addr); + + /* Appends the specified IP address, turned into A/AAAA RRs to the specified JSON array */ + + if (in_addr_is_null(family, addr)) + return 0; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + if (dns_resource_record_new_address(&rr, family, addr, "_imds") < 0) + return log_oom(); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *rrj = NULL; + r = dns_resource_record_to_json(rr, &rrj); + if (r < 0) + return log_error_errno(r, "Failed to convert A RR to JSON: %m"); + + r = sd_json_variant_append_array(array, rrj); + if (r < 0) + return log_error_errno(r, "Failed to append A RR to JSON array: %m"); + + log_debug("Writing IMDS RR for: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int setup_address_rrs(void) { + int r; + + /* Creates local RRs (honoured by systemd-resolved) for the IMDS endpoint addresses. */ + + if (arg_network_mode == IMDS_NETWORK_OFF) { + log_debug("IMDS networking turned off, not generating .rr file."); + return 0; + } + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL; + + union in_addr_union u = { .in = arg_address_ipv4 }; + r = add_address_to_json_array(&aj, AF_INET, &u); + if (r < 0) + return r; + + u = (union in_addr_union) { .in6 = arg_address_ipv6 }; + r = add_address_to_json_array(&aj, AF_INET6, &u); + if (r < 0) + return r; + + if (sd_json_variant_elements(aj) == 0) { + log_debug("No IMDS endpoint addresses known, not writing out RRs."); + return 0; + } + + _cleanup_free_ char *text = NULL; + r = sd_json_variant_format(aj, SD_JSON_FORMAT_NEWLINE, &text); + if (r < 0) + return log_error_errno(r, "Failed to format JSON text: %m"); + + r = write_string_file("/run/systemd/resolve/static.d/imds-endpoint.rr", text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) + return log_error_errno(r, "Failed to write IMDS RR data: %m"); + + log_info("Created imds-endpoint.rr."); + return 0; +} + +static int on_overall_timeout(sd_event_source *s, uint64_t usec, void *userdata) { + Context *c = ASSERT_PTR(userdata); + + assert(s); + + /* Invoked whenever the overall time-out event elapses, and we just give up */ + + context_fail_full(c, context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ETIMEDOUT), "Overall timeout reached."), "io.systemd.InstanceMetadata.Timeout"); + return 0; +} + +static int context_start_overall_timeout(Context *c, usec_t usec) { + int r; + + assert(c); + + r = event_reset_time_relative( + c->event, + &c->overall_timeout_source, + CLOCK_BOOTTIME, + usec, + /* accuracy= */ 0, + on_overall_timeout, + c, + /* priority= */ 0, + "imds-overall-timeout", + /* force_reset= */ true); + if (r < 0) + return context_log_errno(c, LOG_ERR, r, "Failed to reset retry timer event source: %m"); + + return 0; +} + +static int cmdline_run(void) { + int r; + + /* Process the request when invoked via the command line (i.e. not via Varlink) */ + + r = imds_configured(LOG_ERR); + if (r < 0) + return r; + + if (arg_setup_network) { + r = setup_network(); + return RET_GATHER(r, setup_address_rrs()); + } + + assert(arg_key || arg_well_known >= 0); + + _cleanup_(context_done) Context c = CONTEXT_NULL; + c.write_stdout = true; + context_new_request(&c); + + c.well_known = arg_well_known; + if (arg_key) { + c.key = strdup(arg_key); + if (!c.key) + return context_log_oom(&c); + } + + if (arg_ifname) { + c.ifindex = rtnl_resolve_interface_or_warn(&c.rtnl, arg_ifname); + if (c.ifindex < 0) + return c.ifindex; + } else { + /* Try to load the previously cached interface */ + r = context_load_ifname(&c); + if (r < 0) + return r; + } + + r = sd_event_default(&c.event); + if (r < 0) + return context_log_errno(&c, LOG_ERR, r, "Failed to allocate event loop: %m"); + + if (c.ifindex > 0) { + CacheResult cr = context_process_cache(&c); + if (cr < 0) + return cr; + if (cr == CACHE_RESULT_HIT) + return 0; + if (cr == CACHE_RESULT_KEY_NOT_FOUND) + return context_log_errno(&c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found"); + + r = context_acquire_token(&c); + if (r < 0) + return r; + + r = context_acquire_data(&c); + if (r < 0) + return r; + + r = context_start_overall_timeout(&c, DIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + return r; + } else { + /* Couldn't find anything, let's spawn off parallel clients for all interfaces */ + r = context_spawn_children(&c); + if (r < 0) + return r; + + r = context_start_overall_timeout(&c, INDIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + return r; + } + + r = sd_event_loop(c.event); + if (r < 0) + return r; + + return 0; +} + +static int context_acquire_system_bus(Context *c) { + int r; + + assert(c); + + /* Connect to the bus if we haven't yet */ + + if (c->system_bus) + return 0; + + r = sd_bus_default_system(&c->system_bus); + if (r < 0) + return r; + + r = sd_bus_attach_event(c->system_bus, c->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return r; + + return 0; +} + +static JSON_DISPATCH_ENUM_DEFINE(dispatch_well_known, ImdsWellKnown, imds_well_known_from_string); + +static int dispatch_fwmark(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + /* Parses a firewall mark passed via Varlink/JSON. Note that any 32bit fwmark is valid, hence we keep + * track if it is set or not in a separate boolean. */ + + if (sd_json_variant_is_null(variant)) { + c->fwmark_set = false; + return 0; + } + + r = sd_json_dispatch_uint32(name, variant, flags, &c->fwmark); + if (r < 0) + return r; + + c->fwmark_set = true; + return 0; +} + +static int vl_method_get(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(link); + + if (!c->event) + c->event = sd_event_ref(sd_varlink_get_event(link)); + + context_new_request(c); + + static const sd_json_dispatch_field dispatch_table[] = { + { "wellKnown", SD_JSON_VARIANT_STRING, dispatch_well_known, offsetof(Context, well_known), 0 }, + { "key", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(Context, key), 0 }, + { "interface", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(Context, ifindex), 0 }, + { "refreshUSec", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(Context, refresh_usec), 0 }, + { "firewallMark", _SD_JSON_VARIANT_TYPE_INVALID, dispatch_fwmark, 0, 0 }, + { "cache", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(Context, cache), 0 }, + { "wait", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(Context, wait), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, c); + if (r != 0) + return r; + + if (c->key) { + if (!imds_key_is_valid(c->key)) + return sd_varlink_error_invalid_parameter_name(link, "key"); + + if (c->well_known < 0) + c->well_known = IMDS_BASE; + else if (!imds_well_known_can_suffix(c->well_known)) + return sd_varlink_error_invalid_parameter_name(link, "key"); + } else if (c->well_known < 0) + return sd_varlink_error_invalid_parameter_name(link, "key"); + + if (c->refresh_usec < REFRESH_USEC_MIN) + c->refresh_usec = REFRESH_USEC_MIN; + + uid_t peer_uid; + r = sd_varlink_get_peer_uid(link, &peer_uid); + if (r < 0) + return r; + + if (peer_uid != 0 && peer_uid != getuid()) { + /* Ask polkit if client is not privileged */ + + r = context_acquire_system_bus(c); + if (r < 0) + return r; + + const char* l[5]; + size_t k = 0; + if (c->well_known >= 0) { + l[k++] = "wellKnown"; + l[k++] = imds_well_known_to_string(c->well_known); + } + if (c->key) { + l[k++] = "key"; + l[k++] = c->key; + } + l[k] = NULL; + + r = varlink_verify_polkit_async( + link, + c->system_bus, + "io.freedesktop.imds.get", + l, + &c->polkit_registry); + if (r <= 0) + return r; + } + + if (imds_configured(LOG_DEBUG) < 0) + return sd_varlink_error(link, "io.systemd.InstanceMetadata.NotSupported", NULL); + + /* Up to this point we only validated/parsed stuff. Now we actually execute stuff, hence from now on + * we need to go through context_fail() when failing (context_success() if we succeed early), to + * release resources we might have allocated. */ + assert(!c->current_link); + c->current_link = sd_varlink_ref(link); + + _cleanup_free_ char *k = NULL; /* initialize here, to avoid that this remains uninitialized due to the gotos below */ + + if (c->ifindex <= 0) { + /* Try to load the previously used network interface */ + r = context_load_ifname(c); + if (r < 0) + goto fail; + } + + r = context_combine_key(c, &k); + if (r == -ENODATA) { + context_fail_full(c, r, "io.systemd.InstanceMetadata.WellKnownKeyUnset"); + return r; + } + if (r < 0) + goto fail; + + context_log(c, LOG_DEBUG, "Will request '%s' now.", k); + + if (c->ifindex > 0) { + CacheResult cr = context_process_cache(c); + if (cr < 0) { + r = cr; + goto fail; + } + if (cr == CACHE_RESULT_HIT) { + context_success(c); + return 0; + } + if (cr == CACHE_RESULT_KEY_NOT_FOUND) { + r = context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(ENOENT), "Cache reports: key not found"); + context_fail_full(c, r, "io.systemd.InstanceMetadata.KeyNotFound"); + return r; + } + + r = context_acquire_token(c); + if (r < 0) + goto fail; + + r = context_acquire_data(c); + if (r < 0) + goto fail; + + r = context_start_overall_timeout(c, DIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + goto fail; + } else { + r = context_spawn_children(c); + if (r < 0) + goto fail; + + r = context_start_overall_timeout(c, INDIRECT_OVERALL_TIMEOUT_SEC); + if (r < 0) + goto fail; + } + + context_log(c, LOG_DEBUG, "Incoming method call is now pending"); + return 1; + +fail: + context_fail(c, r); + return r; +} + +static int vl_method_get_vendor_info(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + Context *c = ASSERT_PTR(userdata); + int r; + + assert(link); + + r = sd_varlink_dispatch(link, parameters, /* dispatch_table= */ NULL, c); + if (r != 0) + return r; + + /* NB! We allow access to this call without Polkit */ + + if (imds_configured(LOG_DEBUG) < 0) + return sd_varlink_error(link, "io.systemd.InstanceMetadata.NotSupported", NULL); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *wkj = NULL; + for (ImdsWellKnown i = 0; i < _IMDS_WELL_KNOWN_MAX; i++) { + if (!arg_well_known_key[i]) + continue; + + r = sd_json_variant_set_field_string(&wkj, imds_well_known_to_string(i), arg_well_known_key[i]); + if (r < 0) + return r; + } + + return sd_varlink_replybo( + link, + JSON_BUILD_PAIR_STRING_NON_EMPTY("vendor", arg_vendor), + JSON_BUILD_PAIR_STRING_NON_EMPTY("tokenUrl", arg_token_url), + JSON_BUILD_PAIR_STRING_NON_EMPTY("refreshHeaderName", arg_refresh_header_name), + JSON_BUILD_PAIR_STRING_NON_EMPTY("dataUrl", arg_data_url), + JSON_BUILD_PAIR_STRING_NON_EMPTY("dataUrlSuffix", arg_data_url_suffix), + JSON_BUILD_PAIR_STRING_NON_EMPTY("tokenHeaderName", arg_token_header_name), + JSON_BUILD_PAIR_STRV_NON_EMPTY("extraHeader", arg_extra_header), + JSON_BUILD_PAIR_IN4_ADDR_NON_NULL("addressIPv4", &arg_address_ipv4), + JSON_BUILD_PAIR_IN6_ADDR_NON_NULL("addressIPv6", &arg_address_ipv6), + JSON_BUILD_PAIR_VARIANT_NON_EMPTY("wellKnown", wkj)); +} + +static int vl_server(void) { + _cleanup_(context_done) Context c = CONTEXT_NULL; + int r; + + /* Invocation as Varlink service */ + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *varlink_server = NULL; + r = varlink_server_new( + &varlink_server, + SD_VARLINK_SERVER_INHERIT_USERDATA, + &c); + if (r < 0) + return log_error_errno(r, "Failed to allocate Varlink server: %m"); + + r = sd_varlink_server_add_interface(varlink_server, &vl_interface_io_systemd_InstanceMetadata); + if (r < 0) + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + varlink_server, + "io.systemd.InstanceMetadata.Get", vl_method_get, + "io.systemd.InstanceMetadata.GetVendorInfo", vl_method_get_vendor_info); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); + + r = sd_varlink_server_loop_auto(varlink_server); + if (r < 0) + return log_error_errno(r, "Failed to run Varlink event loop: %m"); + + return 0; +} + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-imdsd", "1", &link); + if (r < 0) + return log_oom(); + + printf("%1$s [OPTIONS...] [KEY]\n" + "\n%5$sLow-level IMDS data acquisition.%6$s\n" + "\n%3$sOptions:%4$s\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -i --interface=INTERFACE\n" + " Use the specified interface\n" + " --refresh=SEC Set token refresh time\n" + " --fwmark=INTEGER Choose firewall mark for HTTP traffic\n" + " --cache=no Disable cache use\n" + " -w --wait=yes Wait for connectivity\n" + " -K --well-known= Select well-known key\n" + " --setup-network Generate .network and .rr files\n" + "\n%3$sManual Endpoint Configuration:%4$s\n" + " --vendor=VENDOR Specify IMDS vendor literally\n" + " --token-url=URL URL for acquiring token\n" + " --refresh-header-name=NAME\n" + " Header name for passing refresh time\n" + " --data-url=URL Base URL for acquiring data\n" + " --data-url-suffix=STRING\n" + " Suffix to append to data URL\n" + " --token-header-name=NAME\n" + " Header name for passing token string\n" + " --extra-header='NAME: VALUE'\n" + " Additional header to pass to data transfer\n" + " --address-ipv4=ADDRESS\n" + " --address-ipv6=ADDRESS\n" + " Configure the IPv4 and IPv6 address of the IMDS server\n" + " --well-known-key=NAME:KEY\n" + " Configure the location of well-known keys\n" + "\nSee the %2$s for details.\n", + program_invocation_short_name, + link, + ansi_underline(), + ansi_normal(), + ansi_highlight(), + ansi_normal()); + + return 0; +} + +static bool http_header_name_valid(const char *a) { + return a && ascii_is_valid(a) && !string_has_cc(a, /* ok= */ NULL) && !strchr(a, ':'); +} + +static bool http_header_valid(const char *a) { + return a && ascii_is_valid(a) && !string_has_cc(a, /* ok= */ NULL) && strchr(a, ':'); +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_REFRESH, + ARG_FWMARK, + ARG_CACHE, + ARG_WAIT, + ARG_VENDOR, + ARG_TOKEN_URL, + ARG_REFRESH_HEADER_NAME, + ARG_DATA_URL, + ARG_DATA_URL_SUFFIX, + ARG_TOKEN_HEADER_NAME, + ARG_EXTRA_HEADER, + ARG_ADDRESS_IPV4, + ARG_ADDRESS_IPV6, + ARG_WELL_KNOWN_KEY, + ARG_SETUP_NETWORK, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "interface", required_argument, NULL, 'i' }, + { "refresh", required_argument, NULL, ARG_REFRESH }, + { "fwmark", required_argument, NULL, ARG_FWMARK }, + { "cache", required_argument, NULL, ARG_CACHE }, + { "wait", required_argument, NULL, ARG_WAIT }, + { "well-known", required_argument, NULL, 'K' }, + { "setup-network", no_argument, NULL, ARG_SETUP_NETWORK }, + + /* The following all configure endpoint information explicitly */ + { "vendor", required_argument, NULL, ARG_VENDOR }, + { "token-url", required_argument, NULL, ARG_TOKEN_URL }, + { "refresh-header-name", required_argument, NULL, ARG_REFRESH_HEADER_NAME }, + { "data-url", required_argument, NULL, ARG_DATA_URL }, + { "data-url-suffix", required_argument, NULL, ARG_DATA_URL_SUFFIX }, + { "token-header-name", required_argument, NULL, ARG_TOKEN_HEADER_NAME }, + { "extra-header", required_argument, NULL, ARG_EXTRA_HEADER }, + { "address-ipv4", required_argument, NULL, ARG_ADDRESS_IPV4 }, + { "address-ipv6", required_argument, NULL, ARG_ADDRESS_IPV6 }, + { "well-known-key", required_argument, NULL, ARG_WELL_KNOWN_KEY }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hi:wK:", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case 'i': + if (isempty(optarg)) { + arg_ifname = mfree(arg_ifname); + break; + } + + if (!ifname_valid_full(optarg, IFNAME_VALID_ALTERNATIVE|IFNAME_VALID_NUMERIC)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface name '%s' is not valid.", optarg); + + r = free_and_strdup_warn(&arg_ifname, optarg); + if (r < 0) + return r; + + break; + + case ARG_REFRESH: { + if (isempty(optarg)) { + arg_refresh_usec = REFRESH_USEC_DEFAULT; + break; + } + + usec_t t; + r = parse_sec(optarg, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse refresh timeout: %s", optarg); + if (t < REFRESH_USEC_MIN) { + log_warning("Increasing specified refresh time to %s, lower values are not supported.", FORMAT_TIMESPAN(REFRESH_USEC_MIN, 0)); + arg_refresh_usec = REFRESH_USEC_MIN; + } else + arg_refresh_usec = t; + break; + } + + case ARG_FWMARK: + if (isempty(optarg)) { + arg_fwmark_set = false; + break; + } + + if (streq(optarg, "default")) { + arg_fwmark = FWMARK_DEFAULT; + arg_fwmark_set = true; + break; + } + + r = safe_atou32(optarg, &arg_fwmark); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse --fwmark= parameter: %s", optarg); + + arg_fwmark_set = true; + break; + + case ARG_CACHE: + r = parse_boolean_argument("--cache", optarg, &arg_cache); + if (r < 0) + return r; + + break; + + case ARG_WAIT: + r = parse_boolean_argument("--wait", optarg, &arg_wait); + if (r < 0) + return r; + + break; + + case 'w': + arg_wait = true; + break; + + case 'K': { + if (isempty(optarg)) { + arg_well_known = _IMDS_WELL_KNOWN_INVALID; + break; + } + + ImdsWellKnown wk = imds_well_known_from_string(optarg); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known= parameter: %m"); + + arg_well_known = wk; + break; + } + + case ARG_VENDOR: + if (isempty(optarg)) { + arg_vendor = mfree(arg_vendor); + break; + } + + r = free_and_strdup_warn(&arg_vendor, optarg); + if (r < 0) + return r; + break; + + case ARG_TOKEN_URL: + if (isempty(optarg)) { + arg_token_url = mfree(arg_token_url); + break; + } + + if (!http_url_is_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL: %s", optarg); + + r = free_and_strdup_warn(&arg_token_url, optarg); + if (r < 0) + return r; + + break; + + case ARG_REFRESH_HEADER_NAME: + if (isempty(optarg)) { + arg_refresh_header_name = mfree(arg_refresh_header_name); + break; + } + + if (!http_header_name_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header name: %s", optarg); + + r = free_and_strdup_warn(&arg_refresh_header_name, optarg); + if (r < 0) + return r; + + break; + + case ARG_DATA_URL: + if (isempty(optarg)) { + arg_data_url = mfree(arg_data_url); + break; + } + + if (!http_url_is_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL: %s", optarg); + + r = free_and_strdup_warn(&arg_data_url, optarg); + if (r < 0) + return r; + + break; + + case ARG_DATA_URL_SUFFIX: + if (isempty(optarg)) { + arg_data_url_suffix = mfree(arg_data_url_suffix); + break; + } + + if (!ascii_is_valid(optarg) || string_has_cc(optarg, /* ok= */ NULL)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid URL suffix: %s", optarg); + + r = free_and_strdup_warn(&arg_data_url_suffix, optarg); + if (r < 0) + return r; + + break; + + case ARG_TOKEN_HEADER_NAME: + if (isempty(optarg)) { + arg_token_header_name = mfree(arg_token_header_name); + break; + } + + if (!http_header_name_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header name: %s", optarg); + + r = free_and_strdup_warn(&arg_token_header_name, optarg); + if (r < 0) + return r; + + break; + + case ARG_EXTRA_HEADER: + if (isempty(optarg)) { + arg_extra_header = strv_free(arg_extra_header); + break; + } + + if (!http_header_valid(optarg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid HTTP header: %s", optarg); + + if (strv_extend(&arg_extra_header, optarg) < 0) + return log_oom(); + + break; + + case ARG_ADDRESS_IPV4: { + if (isempty(optarg)) { + arg_address_ipv4 = (struct in_addr) {}; + break; + } + + union in_addr_union u; + r = in_addr_from_string(AF_INET, optarg, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse IPv4 address: %s", optarg); + arg_address_ipv4 = u.in; + break; + } + + case ARG_ADDRESS_IPV6: { + if (isempty(optarg)) { + arg_address_ipv6 = (struct in6_addr) {}; + break; + } + + union in_addr_union u; + r = in_addr_from_string(AF_INET6, optarg, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse IPv6 address: %s", optarg); + arg_address_ipv6 = u.in6; + break; + } + + case ARG_WELL_KNOWN_KEY: { + if (isempty(optarg)) { + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) + arg_well_known_key[wk] = mfree(arg_well_known_key[wk]); + break; + } + + const char *e = strchr(optarg, ':'); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--well-known-key= expects colon separate name and key pairs."); + + _cleanup_free_ char *name = strndup(optarg, e - optarg); + if (!name) + return log_oom(); + + ImdsWellKnown wk = imds_well_known_from_string(name); + if (wk < 0) + return log_error_errno(wk, "Failed to parse --well-known-key= argument: %m"); + + e++; + if (!imds_key_is_valid(e)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Well known key '%s' is not valid.", e); + + r = free_and_strdup_warn(arg_well_known_key + wk, e); + if (r < 0) + return r; + + break; + } + + case ARG_SETUP_NETWORK: + arg_setup_network = true; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + } + + if (arg_vendor || arg_token_url || arg_refresh_header_name || arg_data_url || arg_data_url_suffix || arg_token_header_name || arg_extra_header) + arg_endpoint_source = ENDPOINT_USER; + + r = sd_varlink_invocation(SD_VARLINK_ALLOW_ACCEPT); + if (r < 0) + return log_error_errno(r, "Failed to check if invoked in Varlink mode: %m"); + + arg_varlink = r; + + if (!arg_varlink) { + + if (arg_setup_network) { + if (optind != argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No argument expected."); + } else { + if (arg_well_known < 0) { + /* if no --well-known= parameter was specified we require an argument */ + if (argc != optind+1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "A single argument expected."); + } else if (argc > optind+1) /* if not, then the additional parameter is optional */ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "At most a single argument expected."); + + if (argc > optind) { + if (!imds_key_is_valid(argv[optind])) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified IMDS key is not valid, refusing: %s", argv[optind]); + + r = free_and_strdup_warn(&arg_key, argv[optind]); + if (r < 0) + return r; + } + } + } + + return 1; +} + +static int device_get_property_ip_address( + sd_device *d, + const char *name, + int family, + union in_addr_union *ret) { + + int r; + + /* Parses an IP address stored in the udev database for a device */ + + assert(d); + assert(name); + assert(IN_SET(family, AF_INET, AF_INET6)); + + const char *v = NULL; + r = sd_device_get_property_value(d, name, &v); + if (r < 0) + return r; + + return in_addr_from_string(family, v, ret); +} + +static const char * const imds_well_known_udev_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "IMDS_KEY_HOSTNAME", + [IMDS_REGION] = "IMDS_KEY_REGION", + [IMDS_ZONE] = "IMDS_KEY_ZONE", + [IMDS_IPV4_PUBLIC] = "IMDS_KEY_IPV4_PUBLIC", + [IMDS_IPV6_PUBLIC] = "IMDS_KEY_IPV6_PUBLIC", + [IMDS_SSH_KEY] = "IMDS_KEY_SSH_KEY", + [IMDS_USERDATA] = "IMDS_KEY_USERDATA", + [IMDS_USERDATA_BASE] = "IMDS_KEY_USERDATA_BASE", + [IMDS_USERDATA_BASE64] = "IMDS_KEY_USERDATA_BASE64", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(imds_well_known_udev, ImdsWellKnown); + +static int smbios_server_info(void) { + int r; + + /* Acquires IMDS server information from udev/hwdb */ + + if (arg_endpoint_source >= 0) + return 0; + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + r = sd_device_new_from_syspath(&d, "/sys/class/dmi/id/"); + if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) { + log_debug_errno(r, "Failed to open /sys/class/dmi/id/ device, ignoring: %m"); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to open /sys/class/dmi/id/ device: %m"); + + const char *vendor; + r = sd_device_get_property_value(d, "IMDS_VENDOR", &vendor); + if (r == -ENOENT) { + log_debug_errno(r, "IMDS_VENDOR= property not set on DMI device, skipping."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to read IMDS_SUPPORTED= property of DMI device: %m"); + + log_debug("Detected IMDS vendor support '%s'.", vendor); + + r = free_and_strdup_warn(&arg_vendor, vendor); + if (r < 0) + return r; + + struct { + const char *property; + char **variable; + } table[] = { + { "IMDS_TOKEN_URL", &arg_token_url }, + { "IMDS_REFRESH_HEADER_NAME", &arg_refresh_header_name }, + { "IMDS_DATA_URL", &arg_data_url }, + { "IMDS_DATA_URL_SUFFIX", &arg_data_url_suffix }, + { "IMDS_TOKEN_HEADER_NAME", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + const char *v = NULL; + + r = sd_device_get_property_value(d, i->property, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", i->property); + + r = free_and_strdup_warn(i->variable, v); + if (r < 0) + return r; + } + + for (size_t i = 0; i < 64U; i++) { + _cleanup_free_ char *property = NULL; + const char *p = NULL; + if (i > 0) { + if (asprintf(&property, "IMDS_EXTRA_HEADER%zu", i + 1) < 0) + return log_oom(); + p = property; + } else + p = "IMDS_EXTRA_HEADER"; + + const char *v = NULL; + r = sd_device_get_property_value(d, p, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", p); + + if (v) + if (strv_extend(&arg_extra_header, v) < 0) + return log_oom(); + } + + union in_addr_union u; + r = device_get_property_ip_address(d, "IMDS_ADDRESS_IPV4", AF_INET, &u); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property 'IMDS_ADDRESS_IPV4' of DMI: %m"); + else if (r >= 0) + arg_address_ipv4 = u.in; + + r = device_get_property_ip_address(d, "IMDS_ADDRESS_IPV6", AF_INET6, &u); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property 'IMDS_ADDRESS_IPV6' of DMI: %m"); + else if (r >= 0) + arg_address_ipv6 = u.in6; + + for (ImdsWellKnown k = 0; k < _IMDS_WELL_KNOWN_MAX; k++) { + const char *p = imds_well_known_udev_to_string(k); + if (!p) + continue; + + const char *v = NULL; + r = sd_device_get_property_value(d, p, &v); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read property '%s' of DMI: %m", p); + + r = free_and_strdup_warn(arg_well_known_key + k, v); + if (r < 0) + return r; + } + + log_debug("IMDS endpoint data set from SMBIOS device."); + arg_endpoint_source = ENDPOINT_UDEV; + return 0; +} + +static int secure_getenv_ip_address( + const char *name, + int family, + union in_addr_union *ret) { + + assert(name); + assert(IN_SET(family, AF_INET, AF_INET6)); + + /* Parses and IP address specified in an environment variable */ + + const char *e = secure_getenv(name); + if (!e) + return -ENXIO; + + return in_addr_from_string(family, e, ret); +} + +static const char * const imds_well_known_environment_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "SYSTEMD_IMDS_KEY_HOSTNAME", + [IMDS_REGION] = "SYSTEMD_IMDS_KEY_REGION", + [IMDS_ZONE] = "SYSTEMD_IMDS_KEY_ZONE", + [IMDS_IPV4_PUBLIC] = "SYSTEMD_IMDS_KEY_IPV4_PUBLIC", + [IMDS_IPV6_PUBLIC] = "SYSTEMD_IMDS_KEY_IPV6_PUBLIC", + [IMDS_SSH_KEY] = "SYSTEMD_IMDS_KEY_SSH_KEY", + [IMDS_USERDATA] = "SYSTEMD_IMDS_KEY_USERDATA", + [IMDS_USERDATA_BASE] = "SYSTEMD_IMDS_KEY_USERDATA_BASE", + [IMDS_USERDATA_BASE64] = "SYSTEMD_IMDS_KEY_USERDATA_BASE64", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(imds_well_known_environment, ImdsWellKnown); + +static int environment_server_info(void) { + int r; + + /* Acquires IMDS endpoint info from environment variables */ + + if (arg_endpoint_source >= 0) + return 0; + + static const struct { + const char *name; + char **variable; + } table[] = { + { "SYSTEMD_IMDS_VENDOR", &arg_vendor }, + { "SYSTEMD_IMDS_TOKEN_URL", &arg_token_url }, + { "SYSTEMD_IMDS_REFRESH_HEADER_NAME", &arg_refresh_header_name }, + { "SYSTEMD_IMDS_DATA_URL", &arg_data_url }, + { "SYSTEMD_IMDS_DATA_URL_SUFFIX", &arg_data_url_suffix }, + { "SYSTEMD_IMDS_TOKEN_HEADER_NAME", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + const char *e = secure_getenv(i->name); + if (!e) + continue; + + r = free_and_strdup_warn(i->variable, e); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + for (unsigned u = 1; u < 64; u++) { + _cleanup_free_ char *name = NULL; + + if (u > 1 && asprintf(&name, "SYSTEMD_IMDS_EXTRA_HEADER%u", u) < 0) + return log_oom(); + + const char *e = secure_getenv(name ?: "SYSTEMD_IMDS_EXTRA_HEADER"); + if (!e) + break; + + if (strv_extend(&arg_extra_header, e) < 0) + return log_oom(); + + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + union in_addr_union u; + r = secure_getenv_ip_address("SYSTEMD_IMDS_ADDRESS_IPV4", AF_INET, &u); + if (r < 0 && r != -ENXIO) + return log_error_errno(r, "Failed read IPv4 address from environment variable 'SYSTEMD_IMDS_ADDRESS_IPV4': %m"); + if (r >= 0) { + arg_address_ipv4 = u.in; + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + r = secure_getenv_ip_address("SYSTEMD_IMDS_ADDRESS_IPV6", AF_INET6, &u); + if (r < 0 && r != -ENXIO) + return log_error_errno(r, "Failed read IPv6 address from environment variable 'SYSTEMD_IMDS_ADDRESS_IPV6': %m"); + if (r >= 0) { + arg_address_ipv6 = u.in6; + arg_endpoint_source = ENDPOINT_ENVIRONMENT; + } + + for (ImdsWellKnown k = 0; k < _IMDS_WELL_KNOWN_MAX; k++) { + const char *n = imds_well_known_environment_to_string(k); + if (!n) + continue; + + const char *e = secure_getenv(n); + if (!e) + continue; + + r = free_and_strdup_warn(arg_well_known_key + k, e); + if (r < 0) + return r; + } + + if (arg_endpoint_source >= 0) + log_debug("IMDS endpoint data set from environment."); + + return 0; +} + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + int r; + + assert(key); + + /* Called for each kernel command line option. */ + + if (proc_cmdline_key_streq(key, "systemd.imds.network")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + ImdsNetworkMode m = imds_network_mode_from_string(value); + if (m < 0) + return log_warning_errno(m, "Failed to parse systemd.imds.network= value: %m"); + + arg_network_mode = m; + return 0; + } + + /* The other kernel command line options configured IMDS endpoint data. We'll only check it if no + * other configuration source for it has been used */ + if (arg_endpoint_source >= 0 && arg_endpoint_source != ENDPOINT_PROC_CMDLINE) + return 0; + + static const struct { + const char *key; + char **variable; + } table[] = { + { "systemd.imds.vendor", &arg_vendor }, + { "systemd.imds.token_url", &arg_token_url }, + { "systemd.imds.refresh_header_name", &arg_refresh_header_name }, + { "systemd.imds.data_url", &arg_data_url }, + { "systemd.imds.data_url_suffix", &arg_data_url_suffix }, + { "systemd.imds.token_header_name", &arg_token_header_name }, + }; + + FOREACH_ELEMENT(i, table) { + if (!proc_cmdline_key_streq(key, i->key)) + continue; + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = free_and_strdup_warn(i->variable, value); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.extra_header")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (isempty(value)) + arg_extra_header = strv_free(arg_extra_header); + else if (strv_extend(&arg_extra_header, value) < 0) + return log_oom(); + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.address_ipv4")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + union in_addr_union u; + r = in_addr_from_string(AF_INET, value, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse 'systemd.imds.address_ipv4=' parameter: %s", value); + + arg_address_ipv4 = u.in; + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + if (proc_cmdline_key_streq(key, "systemd.imds.address_ipv6")) { + if (proc_cmdline_value_missing(key, value)) + return 0; + + union in_addr_union u; + r = in_addr_from_string(AF_INET6, value, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse 'systemd.imds.address_ipv6=' parameter: %s", value); + + arg_address_ipv6 = u.in6; + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + static const char * const well_known_table[_IMDS_WELL_KNOWN_MAX] = { + [IMDS_HOSTNAME] = "systemd.imds.key.hostname", + [IMDS_REGION] = "systemd.imds.key.region", + [IMDS_ZONE] = "systemd.imds.key.zone", + [IMDS_IPV4_PUBLIC] = "systemd.imds.key.ipv4_public", + [IMDS_IPV6_PUBLIC] = "systemd.imds.key.ipv6_public", + [IMDS_SSH_KEY] = "systemd.imds.key.ssh_key", + [IMDS_USERDATA] = "systemd.imds.key.userdata", + [IMDS_USERDATA_BASE] = "systemd.imds.key.userdata_base", + [IMDS_USERDATA_BASE64] = "systemd.imds.key.userdata_base64", + }; + + for (ImdsWellKnown wk = 0; wk < _IMDS_WELL_KNOWN_MAX; wk++) { + const char *k = well_known_table[wk]; + if (!k) + continue; + + if (!proc_cmdline_key_streq(key, k)) + continue; + + r = free_and_strdup_warn(arg_well_known_key + wk, value); + if (r < 0) + return r; + + arg_endpoint_source = ENDPOINT_PROC_CMDLINE; + return 0; + } + + return 0; +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + r = environment_server_info(); + if (r < 0) + return r; + + r = proc_cmdline_parse(parse_proc_cmdline_item, /* userdata= */ NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + if (r < 0) + log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); + + r = smbios_server_info(); + if (r < 0) + return r; + + if (arg_varlink) + return vl_server(); + + return cmdline_run(); +} + +DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); diff --git a/src/imds/io.systemd.imds.policy b/src/imds/io.systemd.imds.policy new file mode 100644 index 0000000000000..e844f60b600bc --- /dev/null +++ b/src/imds/io.systemd.imds.policy @@ -0,0 +1,30 @@ + + + + + + + + The systemd Project + https://systemd.io + + + Acquire IMDS instance metadata. + Authentication is required for an application to acquire IMDS instance metadata. + + auth_admin_keep + auth_admin_keep + auth_admin_keep + + + diff --git a/src/imds/meson.build b/src/imds/meson.build new file mode 100644 index 0000000000000..82f41a10cd314 --- /dev/null +++ b/src/imds/meson.build @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +if conf.get('ENABLE_IMDS') != 1 + subdir_done() +endif + +executables += [ + libexec_template + { + 'name' : 'systemd-imdsd', + 'public' : true, + 'sources' : files('imdsd.c', '../import/curl-util.c', 'imds-util.c'), + 'dependencies' : [ libcurl ], + }, + libexec_template + { + 'name' : 'systemd-imds', + 'public' : true, + 'sources' : files('imds-tool.c', 'imds-util.c'), + }, + generator_template + { + 'name' : 'systemd-imds-generator', + 'sources' : files('imds-generator.c', 'imds-util.c'), + }, +] + +install_data( + 'io.systemd.imds.policy', + install_dir : polkitpolicydir) diff --git a/src/libsystemd/sd-json/json-util.c b/src/libsystemd/sd-json/json-util.c index 7f90b7fc7930c..d9167d9e83baa 100644 --- a/src/libsystemd/sd-json/json-util.c +++ b/src/libsystemd/sd-json/json-util.c @@ -194,7 +194,28 @@ int json_dispatch_in_addr(const char *name, sd_json_variant *variant, sd_json_di return r; if (iov.iov_len != sizeof(struct in_addr)) - return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is array of unexpected size.", strna(name)); + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Expected JSON field '%s' to be an array of %zu bytes.", strna(name), sizeof(struct in_addr)); + + memcpy(address, iov.iov_base, iov.iov_len); + return 0; +} + +int json_dispatch_in6_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { + struct in6_addr *address = ASSERT_PTR(userdata); + _cleanup_(iovec_done) struct iovec iov = {}; + int r; + + if (sd_json_variant_is_null(variant)) { + *address = (struct in6_addr) {}; + return 0; + } + + r = json_dispatch_byte_array_iovec(name, variant, flags, &iov); + if (r < 0) + return r; + + if (iov.iov_len != sizeof(struct in6_addr)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Expected JSON field '%s' to be an array of %zu bytes.", strna(name), sizeof(struct in6_addr)); memcpy(address, iov.iov_base, iov.iov_len); return 0; diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index 847725a41e292..478d2a2a2122b 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -115,6 +115,7 @@ int json_dispatch_user_group_name(const char *name, sd_json_variant *variant, sd int json_dispatch_const_user_group_name(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_const_unit_name(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_in_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); +int json_dispatch_in6_addr(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_const_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); int json_dispatch_strv_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); diff --git a/src/resolve/meson.build b/src/resolve/meson.build index be2979343f3f0..b9b2e24b18123 100644 --- a/src/resolve/meson.build +++ b/src/resolve/meson.build @@ -36,6 +36,7 @@ systemd_resolved_extract_sources = files( 'resolved-mdns.c', 'resolved-resolv-conf.c', 'resolved-socket-graveyard.c', + 'resolved-static-records.c', 'resolved-util.c', 'resolved-varlink.c', ) diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c index a0ef750447179..6ec6569ae7639 100644 --- a/src/resolve/resolved-dns-query.c +++ b/src/resolve/resolved-dns-query.c @@ -21,6 +21,7 @@ #include "resolved-etc-hosts.h" #include "resolved-hook.h" #include "resolved-manager.h" +#include "resolved-static-records.h" #include "resolved-timeouts.h" #include "set.h" #include "string-util.h" @@ -910,6 +911,33 @@ static int dns_query_try_etc_hosts(DnsQuery *q) { return 1; } +static int dns_query_try_static_records(DnsQuery *q) { + int r; + + assert(q); + + if (FLAGS_SET(q->flags, SD_RESOLVED_NO_SYNTHESIZE)) + return 0; + + _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL; + r = manager_static_records_lookup( + q->manager, + q->question_bypass ? q->question_bypass->question : q->question_utf8, + &answer); + if (r <= 0) + return r; + + dns_query_reset_answer(q); + + q->answer = TAKE_PTR(answer); + q->answer_rcode = DNS_RCODE_SUCCESS; + q->answer_protocol = dns_synthesize_protocol(q->flags); + q->answer_family = dns_synthesize_family(q->flags); + q->answer_query_flags = SD_RESOLVED_AUTHENTICATED|SD_RESOLVED_CONFIDENTIAL|SD_RESOLVED_SYNTHETIC; + + return 1; +} + static int dns_query_go_scopes(DnsQuery *q) { int r; @@ -1038,6 +1066,14 @@ int dns_query_go(DnsQuery *q) { q->state != DNS_TRANSACTION_NULL) return 0; + r = dns_query_try_static_records(q); + if (r < 0) + return r; + if (r > 0) { + dns_query_complete(q, DNS_TRANSACTION_SUCCESS); + return 1; + } + r = dns_query_try_etc_hosts(q); if (r < 0) return r; diff --git a/src/resolve/resolved-etc-hosts.c b/src/resolve/resolved-etc-hosts.c index e9100de5229d0..00c76a9977f85 100644 --- a/src/resolve/resolved-etc-hosts.c +++ b/src/resolve/resolved-etc-hosts.c @@ -72,6 +72,7 @@ void etc_hosts_clear(EtcHosts *hosts) { void manager_etc_hosts_flush(Manager *m) { etc_hosts_clear(&m->etc_hosts); m->etc_hosts_stat = (struct stat) {}; + m->etc_hosts_last = USEC_INFINITY; } static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) { diff --git a/src/resolve/resolved-gperf.gperf b/src/resolve/resolved-gperf.gperf index c548320449b6f..8b8a66d0369bf 100644 --- a/src/resolve/resolved-gperf.gperf +++ b/src/resolve/resolved-gperf.gperf @@ -31,6 +31,7 @@ Resolve.DNSOverTLS, config_parse_dns_over_tls_mode, 0, Resolve.Cache, config_parse_dns_cache_mode, DNS_CACHE_MODE_YES, offsetof(Manager, enable_cache) Resolve.DNSStubListener, config_parse_dns_stub_listener_mode, 0, offsetof(Manager, dns_stub_listener_mode) Resolve.ReadEtcHosts, config_parse_bool, 0, offsetof(Manager, read_etc_hosts) +Resolve.ReadStaticRecords, config_parse_bool, 0, offsetof(Manager, read_static_records) Resolve.ResolveUnicastSingleLabel, config_parse_bool, 0, offsetof(Manager, resolve_unicast_single_label) Resolve.DNSStubListenerExtra, config_parse_dns_stub_listener_extra, 0, offsetof(Manager, dns_extra_stub_listeners) Resolve.CacheFromLocalhost, config_parse_bool, 0, offsetof(Manager, cache_from_localhost) diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c index a0fb74ec3567a..25a51ed02b042 100644 --- a/src/resolve/resolved-manager.c +++ b/src/resolve/resolved-manager.c @@ -49,6 +49,7 @@ #include "resolved-mdns.h" #include "resolved-resolv-conf.h" #include "resolved-socket-graveyard.h" +#include "resolved-static-records.h" #include "resolved-util.h" #include "resolved-varlink.h" #include "set.h" @@ -637,6 +638,7 @@ static void manager_set_defaults(Manager *m) { m->enable_cache = DNS_CACHE_MODE_YES; m->dns_stub_listener_mode = DNS_STUB_LISTENER_YES; m->read_etc_hosts = true; + m->read_static_records = true; m->resolve_unicast_single_label = false; m->cache_from_localhost = false; m->stale_retention_usec = 0; @@ -659,6 +661,8 @@ static int manager_dispatch_reload_signal(sd_event_source *s, const struct signa m->unicast_scope = dns_scope_free(m->unicast_scope); m->delegates = hashmap_free(m->delegates); dns_trust_anchor_flush(&m->trust_anchor); + manager_etc_hosts_flush(m); + manager_static_records_flush(m); manager_set_defaults(m); @@ -729,6 +733,7 @@ int manager_new(Manager **ret) { .read_resolv_conf = true, .need_builtin_fallbacks = true, .etc_hosts_last = USEC_INFINITY, + .static_records_last = USEC_INFINITY, .sigrtmin18_info.memory_pressure_handler = manager_memory_pressure, .sigrtmin18_info.memory_pressure_userdata = m, @@ -917,6 +922,7 @@ Manager* manager_free(Manager *m) { dns_trust_anchor_flush(&m->trust_anchor); manager_etc_hosts_flush(m); + manager_static_records_flush(m); while ((sb = hashmap_first(m->dns_service_browsers))) dns_service_browser_free(sb); diff --git a/src/resolve/resolved-manager.h b/src/resolve/resolved-manager.h index 4f595e6d04c24..d6afe10c7c0a1 100644 --- a/src/resolve/resolved-manager.h +++ b/src/resolve/resolved-manager.h @@ -123,6 +123,12 @@ typedef struct Manager { struct stat etc_hosts_stat; bool read_etc_hosts; + /* Data from {/etc,/run,/usr/lib}/systemd/resolve/static.d/ */ + Hashmap *static_records; + usec_t static_records_last; + Set *static_records_stat; + bool read_static_records; + /* List of refused DNS Record Types */ Set *refuse_record_types; diff --git a/src/resolve/resolved-static-records.c b/src/resolve/resolved-static-records.c new file mode 100644 index 0000000000000..205159417fa9d --- /dev/null +++ b/src/resolve/resolved-static-records.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-json.h" + +#include "alloc-util.h" +#include "conf-files.h" +#include "constants.h" +#include "dns-answer.h" +#include "dns-domain.h" +#include "dns-question.h" +#include "dns-rr.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "json-util.h" +#include "log.h" +#include "resolved-manager.h" +#include "resolved-static-records.h" +#include "set.h" +#include "stat-util.h" + +/* This implements a mechanism to extend what systemd-resolved resolves locally, via .rr drop-ins in + * {/etc,/run,/usr/lib}/systemd/resolve/static.d/. These files are in JSON format, and are RR serializations, + * that match the usual way we serialize RRs to JSON. + * + * Note that this deliberately doesn't user the (probably more user-friendly) classic DNS zone file format, + * to keep things a bit simpler, and symmetric to the places we currently already generate JSON + * serializations of DNS RRs. Also note the semantics are different from DNS zone file format, for example + * regarding delegation (i.e. the RRs defined here have no effect on subdomains), which is probably nicer for + * one-off mappings of domains to specific resources. Or in other words, this is supposed to be a drop-in + * based alternative to /etc/hosts, not a one to DNS zone files. (The JSON format is also a lot more + * extensible to us, for example we could teach it to map certain lookups to specific DNS errors, or extend + * it so that subdomains always get NXDOMAIN or similar). + * + * (That said, if there's a good reason, we can also support *.zone files too one day). + */ + +/* Recheck static records at most once every 2s */ +#define STATIC_RECORDS_RECHECK_USEC (2*USEC_PER_SEC) + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + answer_by_name_hash_ops, + char, + dns_name_hash_func, + dns_name_compare_func, + DnsAnswer, + dns_answer_unref); + +static int load_static_record_file_item(sd_json_variant *rj, Hashmap **records) { + int r; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + r = dns_resource_record_from_json(rj, &rr); + if (r < 0) + return log_error_errno(r, "Failed to parse DNS record from JSON: %m"); + + _cleanup_(dns_answer_unrefp) DnsAnswer *a = + hashmap_remove(*records, dns_resource_key_name(rr->key)); + + r = dns_answer_add_extend_full(&a, rr, /* ifindex= */ 0, DNS_ANSWER_AUTHENTICATED, /* rrsig= */ NULL, /* until= */ USEC_INFINITY); + if (r < 0) + return log_error_errno(r, "Failed append RR to DNS answer: %m"); + + DnsAnswerItem *item = ASSERT_PTR(ordered_set_first(a->items)); + + r = hashmap_ensure_put(records, &answer_by_name_hash_ops, dns_resource_key_name(item->rr->key), a); + if (r < 0) + return log_error_errno(r, "Failed to add RR to static record set: %m"); + + TAKE_PTR(a); + + log_debug("Added static resource record: %s", dns_resource_record_to_string(rr)); + return 1; +} + +static int load_static_record_file(const ConfFile *cf, Hashmap **records, Set **stats) { + int r; + + assert(cf); + assert(records); + + if (set_contains(*stats, &cf->st)) + return 0; + + _cleanup_free_ struct stat *st_copy = memdup(&cf->st, sizeof(cf->st)); + if (!st_copy) + return log_oom(); + + if (set_ensure_consume(stats, &inode_hash_ops, TAKE_PTR(st_copy)) < 0) + return log_oom(); + + _cleanup_fclose_ FILE *f = NULL; + r = xfopenat(cf->fd, /* path= */ NULL, "re", /* open_flags= */ 0, &f); + if (r < 0) { + log_warning_errno(r, "Failed to open '%s', skipping: %m", cf->result); + return 0; + } + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + unsigned line = 0, column = 0; + r = sd_json_parse_file(f, cf->result, /* flags= */ 0, &j, &line, &column); + if (r < 0) { + if (line > 0) + log_syntax(/* unit= */ NULL, LOG_WARNING, cf->result, line, r, "Failed to parse JSON, skipping: %m"); + else + log_warning_errno(r, "Failed to parse JSON file '%s', skipping: %m", cf->result); + return 0; + } + + if (sd_json_variant_is_array(j)) { + sd_json_variant *i; + JSON_VARIANT_ARRAY_FOREACH(i, j) { + r = load_static_record_file_item(i, records); + if (r < 0) + return r; + } + } else if (sd_json_variant_is_object(j)) { + r = load_static_record_file_item(j, records); + if (r < 0) + return r; + } else { + log_warning("JSON file '%s' contains neither array nor object, skipping.", cf->result); + return 0; + } + + return 1; +} + +static int manager_static_records_read(Manager *m) { + int r; + + usec_t ts; + assert_se(sd_event_now(m->event, CLOCK_BOOTTIME, &ts) >= 0); + + /* See if we check the static records db recently already */ + if (m->static_records_last != USEC_INFINITY && usec_add(m->static_records_last, STATIC_RECORDS_RECHECK_USEC) > ts) + return 0; + + m->static_records_last = ts; + + ConfFile **files = NULL; + size_t n_files = 0; + CLEANUP_ARRAY(files, n_files, conf_file_free_many); + + r = conf_files_list_nulstr_full( + ".rr", + /* root= */ NULL, + CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED|CONF_FILES_WARN, + CONF_PATHS_NULSTR("systemd/resolve/static.d/"), + &files, + &n_files); + if (r < 0) + return log_error_errno(r, "Failed to enumerate static record drop-ins: %m"); + + bool reload; + if (set_size(m->static_records_stat) != n_files) + reload = true; + else { + reload = false; + FOREACH_ARRAY(f, files, n_files) + if (!set_contains(m->static_records_stat, &(*f)->st)) { + reload = true; + break; + } + } + + if (!reload) { + log_debug("No static record files changed, not re-reading."); + return 0; + } + + _cleanup_(hashmap_freep) Hashmap *records = NULL; + _cleanup_(set_freep) Set *stats = NULL; + FOREACH_ARRAY(f, files, n_files) + (void) load_static_record_file(*f, &records, &stats); + + hashmap_free(m->static_records); + m->static_records = TAKE_PTR(records); + + set_free(m->static_records_stat); + m->static_records_stat = TAKE_PTR(stats); + + return 0; +} + +int manager_static_records_lookup(Manager *m, DnsQuestion *q, DnsAnswer **answer) { + int r; + + assert(m); + assert(q); + assert(answer); + + if (!m->read_static_records) + return 0; + + (void) manager_static_records_read(m); + + DnsAnswer *f = hashmap_get(m->static_records, dns_question_first_name(q)); + if (!f) + return 0; + + r = dns_answer_extend(answer, f); + if (r < 0) + return r; + + return 1; +} + +void manager_static_records_flush(Manager *m) { + assert(m); + + m->static_records = hashmap_free(m->static_records); + m->static_records_stat = set_free(m->static_records_stat); + m->static_records_last = USEC_INFINITY; +} diff --git a/src/resolve/resolved-static-records.h b/src/resolve/resolved-static-records.h new file mode 100644 index 0000000000000..f50c70ef459a6 --- /dev/null +++ b/src/resolve/resolved-static-records.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "resolved-forward.h" + +void manager_static_records_flush(Manager *m); +int manager_static_records_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer); diff --git a/src/resolve/test-dns-rr.c b/src/resolve/test-dns-rr.c index e45f1d34238b0..b75604251f5a3 100644 --- a/src/resolve/test-dns-rr.c +++ b/src/resolve/test-dns-rr.c @@ -7,6 +7,21 @@ #include "dns-type.h" #include "tests.h" +static void test_to_json_from_json(DnsResourceRecord *rr) { + int r; + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *j = NULL; + ASSERT_OK(dns_resource_record_to_json(rr, &j)); + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr2 = NULL; + r = dns_resource_record_from_json(j, &rr2); + if (r == -EOPNOTSUPP) + return; + ASSERT_OK(r); + + ASSERT_TRUE(dns_resource_record_equal(rr, rr2)); +} + /* ================================================================ * DNS_RESOURCE_RECORD_RDATA() * ================================================================ */ @@ -802,6 +817,8 @@ TEST(dns_resource_record_new_address_ipv4) { ASSERT_EQ(rr->key->type, DNS_TYPE_A); ASSERT_STREQ(dns_resource_key_name(rr->key), "www.example.com"); ASSERT_EQ(rr->a.in_addr.s_addr, addr.in.s_addr); + + test_to_json_from_json(rr); } TEST(dns_resource_record_new_address_ipv6) { @@ -818,6 +835,8 @@ TEST(dns_resource_record_new_address_ipv6) { ASSERT_EQ(rr->key->type, DNS_TYPE_AAAA); ASSERT_STREQ(dns_resource_key_name(rr->key), "www.example.com"); ASSERT_EQ(memcmp(&rr->aaaa.in6_addr, &addr.in6, sizeof(struct in6_addr)), 0); + + test_to_json_from_json(rr); } /* ================================================================ @@ -1003,11 +1022,13 @@ TEST(dns_resource_record_equal_cname_copy) { a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_CNAME, "www.example.com"); ASSERT_NOT_NULL(a); - a->cname.name = strdup("example.com"); + a->cname.name = ASSERT_PTR(strdup("example.com")); b = dns_resource_record_copy(a); ASSERT_NOT_NULL(b); ASSERT_TRUE(dns_resource_record_equal(a, b)); + + test_to_json_from_json(a); } TEST(dns_resource_record_equal_cname_fail) { @@ -1220,11 +1241,13 @@ TEST(dns_resource_record_equal_ptr_copy) { a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR, "127.1.168.192.in-addr-arpa"); ASSERT_NOT_NULL(a); - a->ptr.name = strdup("example.com"); + a->ptr.name = ASSERT_PTR(strdup("example.com")); b = dns_resource_record_copy(a); ASSERT_NOT_NULL(b); ASSERT_TRUE(dns_resource_record_equal(a, b)); + + test_to_json_from_json(a); } TEST(dns_resource_record_equal_ptr_fail) { diff --git a/src/shared/dns-rr.c b/src/shared/dns-rr.c index 0fa730c13baa2..aa7e5ce8434c1 100644 --- a/src/shared/dns-rr.c +++ b/src/shared/dns-rr.c @@ -2215,6 +2215,12 @@ int dns_resource_key_from_json(sd_json_variant *v, DnsResourceKey **ret) { if (r < 0) return r; + r = dns_name_is_valid(p.name); + if (r < 0) + return r; + if (!r) + return -EBADMSG; + key = dns_resource_key_new(p.class, p.type, p.name); if (!key) return -ENOMEM; @@ -2513,6 +2519,97 @@ int dns_resource_record_to_json(DnsResourceRecord *rr, sd_json_variant **ret) { } } +int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret) { + int r; + + assert(v); + assert(ret); + + sd_json_variant *k = sd_json_variant_by_key(v, "key"); + if (!k) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Resource record entry lacks key field, refusing."); + + _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL; + r = dns_resource_key_from_json(k, &key); + if (r < 0) + return r; + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + rr = dns_resource_record_new(key); + if (!rr) + return log_oom_debug(); + + /* Note, for now we only support the most common subset of RRs for decoding here. Please send patches for more. */ + switch (key->type) { + + case DNS_TYPE_PTR: + case DNS_TYPE_NS: + case DNS_TYPE_CNAME: + case DNS_TYPE_DNAME: { + _cleanup_free_ char *name = NULL; + + static const struct sd_json_dispatch_field table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &name); + if (r < 0) + return r; + + r = dns_name_is_valid(name); + if (r < 0) + return r; + if (!r) + return -EBADMSG; + + rr->ptr.name = TAKE_PTR(name); + break; + } + + case DNS_TYPE_A: { + struct in_addr addr = {}; + + static const struct sd_json_dispatch_field table[] = { + { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in_addr, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &addr); + if (r < 0) + return r; + + rr->a.in_addr = addr; + break; + } + + case DNS_TYPE_AAAA: { + struct in6_addr addr = {}; + + static const struct sd_json_dispatch_field table[] = { + { "address", SD_JSON_VARIANT_ARRAY, json_dispatch_in6_addr, 0, SD_JSON_MANDATORY }, + { "key", SD_JSON_VARIANT_OBJECT, NULL, 0, SD_JSON_MANDATORY }, + {} + }; + + r = sd_json_dispatch(v, table, /* flags= */ 0, &addr); + if (r < 0) + return r; + + rr->aaaa.in6_addr = addr; + break; + } + + default: + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Decoding DNS type %i is currently not supported.", key->type); + } + + *ret = TAKE_PTR(rr); + return 0; +} + static const char* const dnssec_algorithm_table[_DNSSEC_ALGORITHM_MAX_DEFINED] = { /* Mnemonics as listed on https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */ [DNSSEC_ALGORITHM_RSAMD5] = "RSAMD5", diff --git a/src/shared/dns-rr.h b/src/shared/dns-rr.h index c30cd71cfa5c7..d747083aa8a81 100644 --- a/src/shared/dns-rr.h +++ b/src/shared/dns-rr.h @@ -419,6 +419,7 @@ int dns_resource_record_new_from_raw(DnsResourceRecord **ret, const void *data, int dns_resource_key_to_json(DnsResourceKey *key, sd_json_variant **ret); int dns_resource_key_from_json(sd_json_variant *v, DnsResourceKey **ret); int dns_resource_record_to_json(DnsResourceRecord *rr, sd_json_variant **ret); +int dns_resource_record_from_json(sd_json_variant *v, DnsResourceRecord **ret); void dns_resource_key_hash_func(const DnsResourceKey *k, struct siphash *state); int dns_resource_key_compare_func(const DnsResourceKey *x, const DnsResourceKey *y); diff --git a/src/shared/meson.build b/src/shared/meson.build index bbc0307999324..f24b28efe651f 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -204,6 +204,7 @@ shared_sources = files( 'varlink-io.systemd.FactoryReset.c', 'varlink-io.systemd.Hostname.c', 'varlink-io.systemd.Import.c', + 'varlink-io.systemd.InstanceMetadata.c', 'varlink-io.systemd.Journal.c', 'varlink-io.systemd.JournalAccess.c', 'varlink-io.systemd.Login.c', diff --git a/src/shared/pcrextend-util.c b/src/shared/pcrextend-util.c index 8586e85cbbd3f..eef159898d7ad 100644 --- a/src/shared/pcrextend-util.c +++ b/src/shared/pcrextend-util.c @@ -18,6 +18,7 @@ #include "mountpoint-util.h" #include "pcrextend-util.h" #include "pkcs7-util.h" +#include "sha256.h" #include "string-util.h" #include "strv.h" @@ -291,3 +292,70 @@ int pcrextend_verity_now( return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM2 support disabled, not measuring Verity root hashes and signatures."); #endif } + +#define IMDS_USERDATA_TRUNCATED_MAX 256U + +int pcrextend_imds_userdata_word(const struct iovec *data, char **ret) { + assert(iovec_is_set(data)); + assert(ret); + + /* We include both a hash of the complete user data, and a truncated version of the data in the word + * we measure. The former protects the actual data, the latter is useful for debugging. */ + + _cleanup_free_ char *hash = hexmem(SHA256_DIRECT(data->iov_base, data->iov_len), SHA256_DIGEST_SIZE); + if (!hash) + return log_oom(); + + _cleanup_free_ char *data_encoded = NULL; + if (base64mem_full(data->iov_base, MIN(data->iov_len, IMDS_USERDATA_TRUNCATED_MAX), /* line_break= */ SIZE_MAX, &data_encoded) < 0) + return log_oom(); + + _cleanup_free_ char *word = strjoin("imds-userdata:", hash, ":", data_encoded); + if (!word) + return log_oom(); + + *ret = TAKE_PTR(word); + return 0; +} + +int pcrextend_imds_userdata_now(const struct iovec *data) { + +#if HAVE_TPM2 + int r; + + _cleanup_free_ char *word = NULL; + r = pcrextend_imds_userdata_word(data, &word); + if (r < 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.PCRExtend"); + if (r < 0) + return r; + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = sd_varlink_callbo( + vl, + "io.systemd.PCRExtend.Extend", + /* ret_reply= */ NULL, + &error_id, + SD_JSON_BUILD_PAIR_INTEGER("pcr", 12), + SD_JSON_BUILD_PAIR_STRING("text", word), + SD_JSON_BUILD_PAIR_STRING("eventType", "imds_userdata")); + if (r < 0) + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %m"); + if (error_id) { + r = sd_varlink_error_to_errno(error_id, reply); + if (r != -EBADR) + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %m"); + + return log_debug_errno(r, "Failed to issue io.systemd.PCRExtend.Extend() varlink call: %s", error_id); + } + + log_debug("Measurement of '%s' into PCR 12 completed.", word); + return 1; +#else + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM2 support disabled, not measuring IMDS userdata."); +#endif +} diff --git a/src/shared/pcrextend-util.h b/src/shared/pcrextend-util.h index 00bc5b9b48dc7..eadc2d5cffc98 100644 --- a/src/shared/pcrextend-util.h +++ b/src/shared/pcrextend-util.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include + int pcrextend_file_system_word(const char *path, char **ret, char **ret_normalized_path); int pcrextend_machine_id_word(char **ret); int pcrextend_product_id_word(char **ret); int pcrextend_verity_word(const char *name, const struct iovec *root_hash, const struct iovec *root_hash_sig, char **ret); +int pcrextend_imds_userdata_word(const struct iovec *data, char **ret); -int pcrextend_verity_now(const char *name, const struct iovec *root_hash,const struct iovec *root_hash_sig); +int pcrextend_verity_now(const char *name, const struct iovec *root_hash, const struct iovec *root_hash_sig); +int pcrextend_imds_userdata_now(const struct iovec *data); diff --git a/src/shared/varlink-io.systemd.InstanceMetadata.c b/src/shared/varlink-io.systemd.InstanceMetadata.c new file mode 100644 index 0000000000000..eb5992dbffa95 --- /dev/null +++ b/src/shared/varlink-io.systemd.InstanceMetadata.c @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-polkit.h" +#include "varlink-io.systemd.InstanceMetadata.h" + +static SD_VARLINK_DEFINE_ENUM_TYPE( + WellKnown, + SD_VARLINK_DEFINE_ENUM_VALUE(base), + SD_VARLINK_DEFINE_ENUM_VALUE(hostname), + SD_VARLINK_DEFINE_ENUM_VALUE(region), + SD_VARLINK_DEFINE_ENUM_VALUE(zone), + SD_VARLINK_DEFINE_ENUM_VALUE(ipv4_public), + SD_VARLINK_DEFINE_ENUM_VALUE(ipv6_public), + SD_VARLINK_DEFINE_ENUM_VALUE(ssh_key), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata_base), + SD_VARLINK_DEFINE_ENUM_VALUE(userdata_base64)); + +static SD_VARLINK_DEFINE_METHOD( + Get, + SD_VARLINK_FIELD_COMMENT("The key to retrieve"), + SD_VARLINK_DEFINE_INPUT(key, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Start with a well-known key"), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(wellKnown, WellKnown, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The network interface to use"), + SD_VARLINK_DEFINE_INPUT(interface, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Refresh cached data if older (CLOCK_BOOTTIME, µs)"), + SD_VARLINK_DEFINE_INPUT(refreshUSec, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Whether to accept cached data"), + SD_VARLINK_DEFINE_INPUT(cache, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The firewall mark value to use"), + SD_VARLINK_DEFINE_INPUT(firewallMark, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + VARLINK_DEFINE_POLKIT_INPUT, + SD_VARLINK_FIELD_COMMENT("The data in Base64 encoding."), + SD_VARLINK_DEFINE_OUTPUT(data, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("The interface the data was found on."), + SD_VARLINK_DEFINE_OUTPUT(interface, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_METHOD( + GetVendorInfo, + SD_VARLINK_FIELD_COMMENT("The detected cloud vendor"), + SD_VARLINK_DEFINE_OUTPUT(vendor, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The URL to acquire the token from"), + SD_VARLINK_DEFINE_OUTPUT(tokenUrl, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The HTTP header to configure the refresh timeout for the token in"), + SD_VARLINK_DEFINE_OUTPUT(refreshHeaderName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The base URL to acquire the data from"), + SD_VARLINK_DEFINE_OUTPUT(dataUrl, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("A suffix to append to the data URL"), + SD_VARLINK_DEFINE_OUTPUT(dataUrlSuffix, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The HTTP header to pass the token in when requesting data"), + SD_VARLINK_DEFINE_OUTPUT(tokenHeaderName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Additional HTTP headers to pass when acquiring data"), + SD_VARLINK_DEFINE_OUTPUT(extraHeader, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("IPv4 address of IMDS server"), + SD_VARLINK_DEFINE_OUTPUT(addressIPv4, SD_VARLINK_INT, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("IPv6 address of IMDS server"), + SD_VARLINK_DEFINE_OUTPUT(addressIPv6, SD_VARLINK_INT, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("Well-known fields"), + SD_VARLINK_DEFINE_OUTPUT(wellKnown, SD_VARLINK_OBJECT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_ERROR( + KeyNotFound); + +static SD_VARLINK_DEFINE_ERROR( + WellKnownKeyUnset); + +static SD_VARLINK_DEFINE_ERROR( + NotAvailable); + +static SD_VARLINK_DEFINE_ERROR( + NotSupported); + +static SD_VARLINK_DEFINE_ERROR( + CommunicationFailure); + +static SD_VARLINK_DEFINE_ERROR( + Timeout); + +SD_VARLINK_DEFINE_INTERFACE( + io_systemd_InstanceMetadata, + "io.systemd.InstanceMetadata", + SD_VARLINK_INTERFACE_COMMENT("APIs for acquiring cloud IMDS information."), + SD_VARLINK_SYMBOL_COMMENT("Well known data fields"), + &vl_type_WellKnown, + SD_VARLINK_SYMBOL_COMMENT("Acquire data."), + &vl_method_Get, + SD_VARLINK_SYMBOL_COMMENT("Get information about cloud vendor and IMDS connectivity."), + &vl_method_GetVendorInfo, + SD_VARLINK_SYMBOL_COMMENT("The requested key is not found on the IMDS server."), + &vl_error_KeyNotFound, + SD_VARLINK_SYMBOL_COMMENT("IMDS is disabled or otherwise not available."), + &vl_error_NotAvailable, + SD_VARLINK_SYMBOL_COMMENT("IMDS is not supported."), + &vl_error_NotSupported, + SD_VARLINK_SYMBOL_COMMENT("Well-known key is not set."), + &vl_error_WellKnownKeyUnset, + SD_VARLINK_SYMBOL_COMMENT("Communication with IMDS failed."), + &vl_error_CommunicationFailure, + SD_VARLINK_SYMBOL_COMMENT("Timeout reached"), + &vl_error_Timeout); diff --git a/src/shared/varlink-io.systemd.InstanceMetadata.h b/src/shared/varlink-io.systemd.InstanceMetadata.h new file mode 100644 index 0000000000000..60920bd9c9f55 --- /dev/null +++ b/src/shared/varlink-io.systemd.InstanceMetadata.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-varlink-idl.h" + +extern const sd_varlink_interface vl_interface_io_systemd_InstanceMetadata; diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c index 38d92299467a7..575e2c52ed7df 100644 --- a/src/test/test-fileio.c +++ b/src/test/test-fileio.c @@ -9,6 +9,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "iovec-util.h" #include "memfd-util.h" #include "parse-util.h" #include "path-util.h" @@ -695,4 +696,55 @@ TEST(fdopen_independent) { f = safe_fclose(f); } +TEST(write_data_file_atomic_at) { + struct iovec a = IOVEC_MAKE_STRING("hallo"); + ASSERT_OK(write_data_file_atomic_at(AT_FDCWD, "/tmp/wdfa", &a, /* flags= */ 0)); + + _cleanup_(iovec_done) struct iovec ra = {}; + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, NULL, &a, /* flags= */ 0), EINVAL); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "", &a, /* flags= */ 0), EINVAL); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/", &a, /* flags= */ 0), EISDIR); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, ".", &a, /* flags= */ 0), EISDIR); + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/tmp/", &a, /* flags= */ 0), EISDIR); + + _cleanup_free_ char *cwd = NULL; + ASSERT_OK(safe_getcwd(&cwd)); + ASSERT_OK_ERRNO(chdir("/tmp")); + + ASSERT_OK(write_data_file_atomic_at(AT_FDCWD, "wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/wdfa", &a, /* flags= */ 0)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/wdfa")); + + ASSERT_OK_ERRNO(chdir(cwd)); + + ASSERT_ERROR(write_data_file_atomic_at(XAT_FDROOT, "tmp/zzz/wdfa", &a, /* flags= */ 0), ENOENT); + ASSERT_OK(write_data_file_atomic_at(XAT_FDROOT, "tmp/zzz/wdfa", &a, WRITE_DATA_FILE_MKDIR_0755)); + iovec_done(&ra); + ASSERT_OK(read_full_file("/tmp/zzz/wdfa", (char**) &ra.iov_base, &ra.iov_len)); + ASSERT_EQ(iovec_memcmp(&a, &ra), 0); + ASSERT_OK_ERRNO(unlink("/tmp/zzz/wdfa")); + + ASSERT_ERROR(write_data_file_atomic_at(AT_FDCWD, "/tmp/zzz", &a, /* flags= */ 0), EEXIST); + + ASSERT_OK_ERRNO(rmdir("/tmp/zzz")); +} + DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/test/test-varlink-idl.c b/src/test/test-varlink-idl.c index 039d36a85e42d..07fd33d67afa7 100644 --- a/src/test/test-varlink-idl.c +++ b/src/test/test-varlink-idl.c @@ -24,6 +24,7 @@ #include "varlink-io.systemd.FactoryReset.h" #include "varlink-io.systemd.Hostname.h" #include "varlink-io.systemd.Import.h" +#include "varlink-io.systemd.InstanceMetadata.h" #include "varlink-io.systemd.Journal.h" #include "varlink-io.systemd.JournalAccess.h" #include "varlink-io.systemd.Login.h" @@ -190,6 +191,7 @@ TEST(parse_format) { &vl_interface_io_systemd_FactoryReset, &vl_interface_io_systemd_Hostname, &vl_interface_io_systemd_Import, + &vl_interface_io_systemd_InstanceMetadata, &vl_interface_io_systemd_Journal, &vl_interface_io_systemd_JournalAccess, &vl_interface_io_systemd_Login, diff --git a/sysusers.d/meson.build b/sysusers.d/meson.build index 84fadfe3f7020..3c2e450a183bb 100644 --- a/sysusers.d/meson.build +++ b/sysusers.d/meson.build @@ -15,7 +15,8 @@ in_files = [['basic.conf', true], ['systemd-journal.conf', true], ['systemd-network.conf', conf.get('ENABLE_NETWORKD') == 1], ['systemd-resolve.conf', conf.get('ENABLE_RESOLVE') == 1], - ['systemd-timesync.conf', conf.get('ENABLE_TIMESYNCD') == 1]] + ['systemd-timesync.conf', conf.get('ENABLE_TIMESYNCD') == 1], + ['systemd-imds.conf', conf.get('ENABLE_IMDS') == 1]] foreach tuple : in_files file = tuple[0] diff --git a/sysusers.d/systemd-imds.conf.in b/sysusers.d/systemd-imds.conf.in new file mode 100644 index 0000000000000..adb8d5b1fb1c6 --- /dev/null +++ b/sysusers.d/systemd-imds.conf.in @@ -0,0 +1,8 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +u! systemd-imds {{SYSTEMD_IMDS_UID}} "systemd Instance Metadata" diff --git a/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py new file mode 100755 index 0000000000000..1685becfbfd3a --- /dev/null +++ b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py @@ -0,0 +1,51 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: LGPL-2.1-or-later + +import os, socket +from http.server import BaseHTTPRequestHandler, HTTPServer + +def sd_notify(state: str) -> bool: + notify_socket = os.environ.get("NOTIFY_SOCKET") + if not notify_socket: + return False + if notify_socket.startswith("@"): + notify_socket = "\0" + notify_socket[1:] + try: + with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock: + sock.sendto(state.encode(), notify_socket) + except OSError: + return False + + return True + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/userdata": + body = b"{\"systemd.credentials\":[{\"name\":\"acredtest\",\"text\":\"avalue\"}]}" + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + elif self.path == "/hostname": + body = b"piff" + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + else: + self.send_error(404) + + def log_message(self, fmt, *args): + print(f"{self.address_string()} - {fmt % args}") + +PORT=8088 + +server = HTTPServer(("", PORT), Handler) +print(f"Serving on http://localhost:{PORT}/") +try: + sd_notify("READY=1") + server.serve_forever() +except KeyboardInterrupt: + print("\nStopped.") diff --git a/test/units/TEST-74-AUX-UTILS.imds.sh b/test/units/TEST-74-AUX-UTILS.imds.sh new file mode 100755 index 0000000000000..533501a8a77e8 --- /dev/null +++ b/test/units/TEST-74-AUX-UTILS.imds.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + + +if ! test -x /usr/lib/systemd/systemd-imdsd ; then + echo "No imdsd installed, skipping test." + exit 0 +fi + +at_exit() { + set +e + systemctl stop fake-imds systemd-imdsd.socket ||: + ip link del dummy0 ||: + rm -f /tmp/fake-imds.py /run/credstore/firstboot.hostname /run/credstore/acredtest /run/systemd/system/systemd-imdsd@.service.d/50-env.conf +} + +trap at_exit EXIT + +systemd-run -p Type=notify --unit=fake-imds /usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-imds.py +systemctl status fake-imds + +# Add a fake network interface so that IMDS gets going +ip link add dummy0 type dummy +ip addr add 192.168.47.11/24 dev dummy0 + +USERDATA='{"systemd.credentials":[{"name":"acredtest","text":"avalue"}]}' + +# First try imdsd directly +IMDSD="/usr/lib/systemd/systemd-imdsd --vendor=test --data-url=http://192.168.47.11:8088 --well-known-key=userdata:/userdata --well-known-key=hostname:/hostname" +assert_eq "$($IMDSD --well-known=hostname)" "piff" +assert_eq "$($IMDSD --well-known=userdata)" "$USERDATA" +assert_eq "$($IMDSD /hostname)" "piff" +assert_eq "$($IMDSD /userdata)" "$USERDATA" + +# Then, try it as Varlink service +mkdir -p /run/systemd/system/systemd-imdsd@.service.d/ +cat >/run/systemd/system/systemd-imdsd@.service.d/50-env.conf </run/systemd/resolve/static.d/statictest.rr </run/systemd/resolve/static.d/statictest2.rr <