From a16d8679352cadc4ddf8c8553e7b5c4fe81b78fe Mon Sep 17 00:00:00 2001 From: Aimee Ukasick Date: Wed, 7 Oct 2020 15:56:00 -0500 Subject: [PATCH 1/3] initial commit --- Dockerfile | 38 ++- Makefile | 61 +++++ content/en/docs/smoke-test.md | 6 +- netlify.toml | 36 ++- scripts/README.md | 92 ++++++++ scripts/check-headers-file.sh | 16 ++ scripts/diff_l10n_branches.py | 137 +++++++++++ scripts/find_pr.py | 89 +++++++ scripts/hash-files.sh | 10 + scripts/linkchecker.py | 425 ++++++++++++++++++++++++++++++++++ scripts/lsync.sh | 31 +++ scripts/replace-capture.sh | 100 ++++++++ scripts/requirements.txt | 3 + scripts/test_examples.sh | 54 +++++ scripts/upstream_changes.py | 78 +++++++ 15 files changed, 1155 insertions(+), 21 deletions(-) mode change 100755 => 100644 Dockerfile create mode 100644 Makefile create mode 100644 scripts/README.md create mode 100644 scripts/check-headers-file.sh create mode 100644 scripts/diff_l10n_branches.py create mode 100644 scripts/find_pr.py create mode 100644 scripts/hash-files.sh create mode 100644 scripts/linkchecker.py create mode 100644 scripts/lsync.sh create mode 100644 scripts/replace-capture.sh create mode 100644 scripts/requirements.txt create mode 100644 scripts/test_examples.sh create mode 100644 scripts/upstream_changes.py diff --git a/Dockerfile b/Dockerfile old mode 100755 new mode 100644 index df0c388a6d..1ec44478b5 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,35 @@ -FROM jekyll/jekyll:3.8 +# Modified from github.com/kubernetes/website/Dockerfile +# Credit to Julien Guyomard (https://github.com/jguyomard). This Dockerfile +# is essentially based on his Dockerfile at +# https://github.com/jguyomard/docker-hugo/blob/master/Dockerfile. The only significant +# change is that the Hugo version is now an overridable argument rather than a fixed +# environment variable. -EXPOSE 4000 +FROM alpine:latest -# USER jekyll +LABEL maintainer="Armory Docs Team " -RUN gem install bundler +RUN apk add --no-cache \ + curl \ + git \ + openssh-client \ + rsync \ + build-base \ + libc6-compat \ + npm && \ + npm install -G autoprefixer postcss-cli -WORKDIR /srv/jekyll +ARG HUGO_VERSION -COPY Gemfile* /srv/jekyll/ +RUN mkdir -p /usr/local/src && \ + cd /usr/local/src && \ + curl -L https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_Linux-64bit.tar.gz | tar -xz && \ + mv hugo /usr/local/bin/hugo && \ + addgroup -Sg 1000 hugo && \ + adduser -Sg hugo -u 1000 -h /src hugo -RUN touch Gemfile.lock \ - && chmod a+w Gemfile.lock \ - && bundle install +WORKDIR /src + +USER hugo:hugo + +EXPOSE 1313 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..c5bfd47df6 --- /dev/null +++ b/Makefile @@ -0,0 +1,61 @@ +## stolen from kuberntes/website +HUGO_VERSION = $(shell grep ^HUGO_VERSION netlify.toml | tail -n 1 | cut -d '=' -f 2 | tr -d " \"\n") +NODE_BIN = node_modules/.bin +NETLIFY_FUNC = $(NODE_BIN)/netlify-lambda + +# The CONTAINER_ENGINE variable is used for specifying the container engine. By default 'docker' is used +# but this can be overridden when calling make, e.g. +# CONTAINER_ENGINE=podman make container-image +CONTAINER_ENGINE ?= docker +IMAGE_VERSION=$(shell scripts/hash-files.sh Dockerfile Makefile | cut -c 1-12) +CONTAINER_IMAGE = armorydocs-hugo:v$(HUGO_VERSION)-$(IMAGE_VERSION) +CONTAINER_RUN = $(CONTAINER_ENGINE) run --rm --interactive --tty --volume $(CURDIR):/src + +CCRED=\033[0;31m +CCEND=\033[0m + +.PHONY: all build build-preview help serve + +help: ## Show this help. + @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {sub("\\\\n",sprintf("\n%22c"," "), $$2);printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + +module-check: + @git submodule status --recursive | awk '/^[+-]/ {printf "\033[31mWARNING\033[0m Submodule not initialized: \033[34m%s\033[0m\n",$$2}' 1>&2 + +all: build ## Build site with production settings and put deliverables in ./public + +build: module-check ## Build site with production settings and put deliverables in ./public + hugo --minify + +build-preview: module-check ## Build site with drafts and future posts enabled + hugo --buildDrafts --buildFuture + +deploy-preview: ## Deploy preview site via netlify + hugo --enableGitInfo --buildFuture --buildDrafts -b $(DEPLOY_PRIME_URL) + +functions-build: + $(NETLIFY_FUNC) build functions-src + +check-headers-file: + scripts/check-headers-file.sh + +production-build: build check-headers-file ## Build the production site and ensure that noindex headers aren't added + +non-production-build: ## Build the non-production site, which adds noindex headers to prevent indexing + hugo --enableGitInfo + +serve: module-check ## Boot the development server. + hugo server --buildFuture --buildDrafts + +container-image: + $(CONTAINER_ENGINE) build . \ + --network=host \ + --tag $(CONTAINER_IMAGE) \ + --build-arg HUGO_VERSION=$(HUGO_VERSION) + +container-build: module-check + $(CONTAINER_RUN) $(CONTAINER_IMAGE) hugo --minify + +container-serve: module-check + $(CONTAINER_RUN) --mount type=tmpfs,destination=/src/resources,tmpfs-mode=0777 -p 1313:1313 $(CONTAINER_IMAGE) hugo server --buildFuture --buildDrafts --bind 0.0.0.0 + diff --git a/content/en/docs/smoke-test.md b/content/en/docs/smoke-test.md index f52820c6ad..057f378fb8 100644 --- a/content/en/docs/smoke-test.md +++ b/content/en/docs/smoke-test.md @@ -3,14 +3,10 @@ title: "Smoke Test" linkTitle: "Smoke Test" weight: 999 draft: true -description: > - Markdown stuff - change to draft before publication --- -Images links to test link checker - -![Architecture Diagram](/images/install_admin_guides_SpinnakerArchitecture.png) +>THIS IS A DRAFT AND SHOULD ONLY APPEAR IN LOCAL BUILDS AND DEPLOY PREVIEWS {{% pageinfo %}} diff --git a/netlify.toml b/netlify.toml index c5d5130ca0..0c8ce399a2 100755 --- a/netlify.toml +++ b/netlify.toml @@ -1,16 +1,38 @@ # Hugo build configuration for Netlify # (https://gohugo.io/hosting-and-deployment/hosting-on-netlify/#configure-hugo-version-in-netlify) # Default build settings -[build] - publish = "public" - command = "cd themes/docsy && git submodule update -f --init && cd ../.. && hugo" +[build] +# This default build command adds the robots noindex directive to the site headers. +# It is turned off for only for the production site by using [context.master] below +# DO NOT REMOVE THIS +publish = "public" +functions = "functions" +command = "git submodule update --init --recursive --depth 1 && make non-production-build" -# "production" environment specific build settings [build.environment] - HUGO_VERSION = "0.71.1" - HUGO_THEME = "docsy" - HUGO_ENV = "production" +HUGO_VERSION = "0.74.3" +NODE_VERSION= "10.20.0" +RUBY_VERSION = "2.7.1" +HUGO_THEME = "docsy" + +[context.production.environment] +HUGO_BASEURL = "https://docs.armory.io/" +HUGO_ENV = "production" +HUGO_ENABLEGITINFO = "true" + +[context.deploy-preview] +command = "git submodule update --init --recursive --depth 1 && make deploy-preview" + +[context.branch-deploy] +command = "git submodule update --init --recursive --depth 1 && make deploy-preview" + +[context.master] +# This context is triggered by the `master` branch and allows search indexing +# DO NOT REMOVE THIS (contact @kubernetes/sig-docs-leads) +publish = "public" +command = "git submodule update --init --recursive --depth 1 && make production-build" + # section redirects [[redirects]] diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000000..c21cef8c29 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,92 @@ +# Development scripts for Armory documentation + +Modified from github.com/kubernetes/website + +| Script | Description | +|-------------------------|---------------------------------------------------------------------------------------------------------------------------------------| +| `find_pr.py` | Find what GitHub pull requests touch a given file. | +| `upstream_changes.py` | Find what changes occurred between two versions. | +| `test_examples.sh` | This script tests whether a change affects example files bundled in the website. | +| `check-headers-file.sh` | This script checks the headers if you are in a production environment. | +| `diff_l10n_branches.py` | This script generates a report of outdated contents in `content/` directory by comparing two l10n team milestone branches. | + + + +## Requirements + +Some of those scripts have external requirements. You can install them with the following commands: + +``` +python3 -m pip install -r requirements.txt +``` + +## find_pr.py + +``` +$ ./find_pr.py --help +Usage: find_pr.py [OPTIONS] PATH + + Find what GitHub pull requests touch a given file. + + ex: ./find_pr.py --tags "language/fr" "content/fr/_index.html" + +Options: + --tags TEXT Tags of PullRequest (Can be passed multiple times) + --token TEXT GitHub API token. (Default env variable GITHUB_TOKEN) + --last-n-pr INTEGER Last n-th PullRequests + --help Show this message and exit. +``` + +## upstream_changes.py + +``` +$ ./upstream_changes.py --help +Usage: upstream_changes.py [OPTIONS] PATH + + Find what changes occurred between two versions + + ex: ./upstream_changes.py content/fr/_index.html + +Options: + --reference TEXT Specify the reference version of the file. Default to the + English one. + --git-path TEXT Specify git path + --help Show this message and exit. +``` + +## test_examples.sh + +This script tests whether a change affects example files bundled in the website. + +To install the dependencies: + + $ ./scripts/test_examples.sh install + +To run the examples: + + $ ./scripts/test_examples.sh run + +## check-headers-file.sh + +This script checks the headers if you are in a production environment. + + ./scripts/check-headers-file.sh + +## diff_l10n_branches.py + +``` +$ scripts/diff_l10n_branches.py --help +Usage: diff_l10n_branches.py [OPTIONS] L10N_LANG L_COMMIT R_COMMIT + + This script generates a report of outdated contents in `content/` directory by comparing two l10n team milestone branches. + + L10n team owners can open a GitHub issue with the report generated by this + script when they start a new team milestone. + + ex: `scripts/diff_l10n_branches.py ko dev-1.15-ko.3 dev-1.15-ko.4` + +Options: + --src-lang TEXT Source language + --help Show this message and exit. +``` diff --git a/scripts/check-headers-file.sh b/scripts/check-headers-file.sh new file mode 100644 index 0000000000..c4b5a9da42 --- /dev/null +++ b/scripts/check-headers-file.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +if [ "$HUGO_ENV" == "production" ]; then + echo "INFO: Production environment. Checking the _headers file for noindex headers." + + if grep -q "noindex" public/_headers; then + echo "PANIC: noindex headers were found in the _headers file. This build has failed." + exit 1 + else + echo "INFO: noindex headers were not found in the _headers file. All clear." + exit 0 + fi +else + echo "Non-production environment. Skipping the _headers file check." + exit 0 +fi \ No newline at end of file diff --git a/scripts/diff_l10n_branches.py b/scripts/diff_l10n_branches.py new file mode 100644 index 0000000000..5da4cabcc5 --- /dev/null +++ b/scripts/diff_l10n_branches.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python + +import os +import subprocess +import jinja2 +import click + +DEVNULL = open(os.devnull, 'w') +ISSUE_TEMPLATE = """\ +# This is a Bug Report +## Problem +Outdated files in the {{ r_commit }} branch. + +### {{ files_to_be_modified | count }} files to be modified +{% for m_file in files_to_be_modified -%} + 1. [ ] {{ m_file.filepath }} {{ m_file.shortstat }} +{% endfor %} + +### {{ files_to_be_renamed | count }} files to be renamed +{% for r_file in files_to_be_renamed -%} + 1. [ ] {{ r_file.diff_status_letter }} {{ r_file.src_filepath }} -> {{ r_file.dest_filepath }} +{% endfor %} + +### {{ files_to_be_deleted | count }} files to be deleted +{% for d_file in files_to_be_deleted -%} + 1. [ ] {{ d_file }} +{% endfor %} + +## Proposed Solution + +{% if files_to_be_modified %} + +Use `git diff` to check what is changed in the upstream. And apply the upstream changes manually +to the `{{ l10n_lang_path }}` of `{{ r_commit }}` branch. + +For example: +``` +# checkout `{{ r_commit }}` +... +# check what is updated in the upstream +git diff {{ l_commit }} {{ r_commit }} -- {{ files_to_be_modified.0.filepath }} +# apply changes to {{ l10n_lang_path }} +vi {{ files_to_be_modified.0.filepath | replace(src_lang_path, l10n_lang_path) }} +... +# commit and push +... +# make PR to `{{ r_commit }}` +``` + +{% endif %} + +## Pages to Update + +""" + +files_to_be_deleted = [] +files_to_be_renamed = [] +files_to_be_modified = [] + + +def git_diff(filepath, l_commit, r_commit, shortstat=False): + cmd = ["git", "diff", l_commit, r_commit, "--", filepath] + + if shortstat: + cmd = ["git", "diff", l_commit, r_commit, "--shortstat", "--", filepath] + + return subprocess.check_output(cmd).decode("UTF-8").strip() + + +def git_exists(path, filepath): + cmd = ["git", "cat-file", "-e", "{}:{}".format(path, filepath)] + ret_code = subprocess.call(cmd, stderr=DEVNULL) + return ret_code == 0 + + +def process_diff_status(diff_status, l_commit, r_commit, src_lang_path, + l10n_lang_path): + status_letter = diff_status[0] + filepath = diff_status[1] + + if git_exists(r_commit, filepath.replace(src_lang_path, l10n_lang_path)): + if status_letter == 'D': + files_to_be_deleted.append(filepath) + elif status_letter.startswith('R'): + replaced = {"diff_status_letter": diff_status[0], + "src_filepath": diff_status[1], + "dest_filepath": diff_status[2]} + files_to_be_renamed.append(replaced) + elif status_letter == 'M': + modified = {"filepath": filepath, + "shortstat": git_diff(filepath, l_commit, r_commit, + shortstat=True), + "diff": git_diff(filepath, l_commit, r_commit)} + files_to_be_modified.append(modified) + + +def git_diff_name_status(l_commit, r_commit, src_lang_path, l10n_lang_path): + cmd = ["git", "diff", l_commit, r_commit, "--name-status", "--", + src_lang_path] + name_status_output = subprocess.check_output(cmd).strip() + for line in name_status_output.decode('utf-8').splitlines(): + diff_status = line.split() + process_diff_status(diff_status, l_commit, r_commit, src_lang_path, + l10n_lang_path) + + +@click.command() +@click.argument("l10n-lang") +@click.argument("l-commit") +@click.argument("r-commit") +@click.option("--src-lang", help="Source language", default="en") +def main(l10n_lang, src_lang, l_commit, r_commit): + """ + This script generates a report of outdated contents in `content/` + directory by comparing two l10n team milestone branches. + + L10n team owners can open a GitHub issue with the report generated by this + script when they start a new team milestone. + + ex: `scripts/diff_l10n_branches.py ko dev-1.15-ko.3 dev-1.15-ko.4` + """ + l10n_lang_path = "content/" + l10n_lang + src_lang_path = "content/" + src_lang + git_diff_name_status(l_commit, r_commit, src_lang_path, + l10n_lang_path) + issue_template = jinja2.Template(ISSUE_TEMPLATE) + ret = issue_template.render(l_commit=l_commit, r_commit=r_commit, + src_lang_path=src_lang_path, + l10n_lang_path=l10n_lang_path, + files_to_be_deleted=files_to_be_deleted, + files_to_be_modified=files_to_be_modified, + files_to_be_renamed=files_to_be_renamed) + print(ret) + + +if __name__ == "__main__": + main() diff --git a/scripts/find_pr.py b/scripts/find_pr.py new file mode 100644 index 0000000000..f05a790d75 --- /dev/null +++ b/scripts/find_pr.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +import os +import json + +import click +import requests +from jinja2 import Template + + +@click.command() +@click.argument("path") +@click.option("--tags", + multiple=True, + help="Tags of PullRequest (Can be passed multiple times)") +@click.option("--token", + help="GitHub API token. (Default env variable GITHUB_TOKEN)", + default=os.environ.get("GITHUB_TOKEN", "")) +@click.option("--last-n-pr", + help="Last n-th PullRequests", + default=100) +def main(tags, token, path, last_n_pr): + """ + Find what GitHub pull requests touch a given file. + + ex: + ./find_pr.py --tags "language/fr" "content/fr/_index.html" + """ + + if not token: + print("GitHub token not provided (required)") + exit(1) + + query = Template(""" + query { + repository(name: "website", owner: "kubernetes") { + pullRequests({% if tags %}labels: [{% for tag in tags %}"{{ tag }}", {% endfor %}], {% endif %}last: {{ last_n_pr }}) { + edges { + node { + title + state + url + files (last: 100) { + edges { + node { + path + } + } + } + } + } + } + } + } + """).render(tags=tags, last_n_pr=last_n_pr) + + try: + r = requests.post("https://api.github.com/graphql", + json={"query": query}, + headers={ + "Authorization": "token %s" % token, + "Accept": "application/vnd.github.ocelot-preview+json", + "Accept-Encoding": "gzip" + }) + r.raise_for_status() + + reply = r.json() + prs = reply['data']['repository']['pullRequests']['edges'] + + for pr in prs: + files = pr["node"]["files"]["edges"] + for f in files: + if path == f["node"]["path"]: + print("%s (%s)" % (pr["node"]["title"], pr["node"]["state"])) + print(pr["node"]["url"]) + print("----------------") + + except requests.exceptions.HTTPError as err: + gh_err_response = json.loads(err.response.text) + print("HTTP Error: %d %s" % (err.response.status_code, gh_err_response['message'])) + except requests.exceptions.ConnectionError as err: + print("Error Connecting: %s" % err) + except requests.exceptions.Timeout as err: + print("Timeout Error: %s" % err) + except requests.exceptions.RequestException as err: + print("Oops, another error occurred: %s" % err) + +if __name__ == '__main__': + main() diff --git a/scripts/hash-files.sh b/scripts/hash-files.sh new file mode 100644 index 0000000000..0040f4355b --- /dev/null +++ b/scripts/hash-files.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# this script emits as hash for the files listed in $@ +if command -v shasum >/dev/null 2>&1; then + cat "$@" | shasum -a 256 | cut -d' ' -f1 +elif command -v sha256sum >/dev/null 2>&1; then + cat "$@" | sha256sum | cut -d' ' -f1 +else + echo "missing shasum tool" 1>&2 + exit 1 +fi diff --git a/scripts/linkchecker.py b/scripts/linkchecker.py new file mode 100644 index 0000000000..71f40ac22b --- /dev/null +++ b/scripts/linkchecker.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 +# +# This a link checker for Kubernetes documentation website. +# - We cover the following cases for the language you provide via `-l`, which +# defaults to 'en'. +# - If the language specified is not English (`en`), we check if you are +# actually using the localized links. For example, if you specify `zh` as +# the language, and for link target `/docs/foo/bar`, we check if the English +# version exists AND if the Chinese version exists as well. A checking record +# is produced if the link can use the localized version. +# +# Usage: linkchecker.py -h +# +# Cases handled: +# +# - [foo](#bar) : ignored currently +# + [foo](http://bar) : insecure links to external site +# + [foo](https://k8s.io/website/...) : hardcoded site domain name +# +# + [foo](//docs/bar/...) : where is not 'en' +# + //docs/bar : contains shortcode, so ignore, or +# + //docs/bar : is a image link (ignore currently), or +# + //docs/bar : points to shared (non-localized) page, or +# + //docs/bar.md : exists for current lang, or +# + //docs/bar/_index.md : exists for current lang, or +# + //docs/bar/ : is a redirect entry, or +# + //docs/bar : is something we don't understand, then ERR +# +# + [foo](/docs/bar/...) +# + /docs/bar : contains shortcode, so ignore, or +# + /docs/bar : is a image link (ignore currently), or +# + /docs/bar : points to a shared (non-localized) page, or +# + /docs/bar.md : exists for current lang, or +# + /docs/bar/_index.md : exists for current lang, or +# + /docs/bar : is a redirect entry, or +# + /docs/bar : is something we don't understand +# + +import argparse +import glob +import os +import re +import sys + +# These are the bad links that doesn't hurt, though good to fix +BAD_LINK_TYPES = { + "B01": { + "reason": "Using bad protocol", + "level": "WARNING", + }, + "B02": { + "reason": "Link target is a redirect entry", + "level": "WARNING", + }, + "B03": { + "reason": "Intra-site linkes should use relative path", + "level": "WARNING", + }, +} + +# Constants for colored printing +C_RED = "\033[31m" +C_GREEN = "\033[32m" +C_YELLOW = "\033[33m" +C_GRAY = "\033[90m" +C_CYAN = "\033[36m" +C_END = "\033[0m" + +# Command line arguments shared across functions +ARGS = None +# Global result dictionary keyed by page examined +RESULT = {} +# Cached redirect entries +REDIRECTS = {} + + +def new_record(level, message, target): + """Create new checking record. + + :param level: Record severity level, one of 'INFO', 'WARNING' and 'ERROR' + :param message: Error message string + :param target: The link target in question + :returns: A string representation the checking result, may contain ASCII + coded terminal colors, or None if the record is suppressed. + """ + global ARGS + + # Skip info when verbose + if ARGS.verbose == False and level == "INFO": + return None + + result = None + if ARGS.no_color: + result = target + ": " + message + else: + target = C_GRAY + target + C_END + if level == "INFO": + result = target + ": " + C_GREEN + message + C_END + elif level == "WARNING": + result = target + ": " + C_YELLOW+ message + C_END + else: # default to error + result = target + ": " + C_RED + message + C_END + + return result + + +def dump_result(): + """Dump result to stdout.""" + global RESULT, ARGS + + for path, path_output in RESULT.items(): + norm_path = os.path.normpath(path) + if ARGS.no_color: + print("File: " + norm_path) + else: + print(C_CYAN + "File: " + norm_path + C_END) + for p in path_output: + print(" "*4 + p) + return + + +def strip_comments(content): + """Manual striping of comments from file content. + + Many localized content pages contain original English content in comments. + These comments have to be stripped out before analyzing the links. + Doing this using regular expression is difficult. Even the grep tool is + not suitable for this use case. + + NOTE: We strived to preserve line numbers when producing the resulted + text. This can be useful in future if we want to print out the line + numbers for bad links. + """ + result = [] + in_comment = False + for line in content: + idx1 = line.find("") + if not in_comment: + # only care if new comment started + if idx1 < 0: + result.append(line) + continue + + # single line comment + if idx2 > 0: + result.append(line[:idx1] + line[idx2+4:]) + continue + result.append(line[:idx1]) + in_comment = True + continue + + # already in comment block + if idx2 < 0: # ignore whole line + result.append("") + continue + result.append(line[idx2+4:]) + in_comment = False + + return result + + +def normalize_filename(name, ftype="markdown"): + """Guess the filename based on a link target. + + This function only deals with regular files. + """ + if name.endswith("/"): + name = name[:-1] + if ftype == "markdown": + name += ".md" + else: + name += ".html" + return name + + +def check_file_exists(base, path, ftype="markdown"): + """Check if the target file exists. + + NOTE: We build a normalized path using 'base' and 'path' values. Suppose + the resulted path string is 'foo/bar', we check if 'foo/bar.md' exists, + AND we check if 'foo/bar/_index.md' exists. + + :param base: The base directory to begin with + :param path: The link target which is a relative path string + :returns: A boolean indicating whether the target file exists. + """ + # NOTE: anchor is ignored, can be a todo item + parts = path.split("#") + + fn = normalize_filename(parts[0], ftype=ftype) + target = base + fn + + if os.path.isfile(target): + return True + + dir_name = base + parts[0] + if os.path.isdir(dir_name): + if os.path.isfile(dir_name + "/_index.md"): + return True + if os.path.isfile(dir_name + "/_index.html"): + return True + # /docs/contribute/style/hugo-shortcodes/ has this + if os.path.isfile(dir_name + "/index.md"): + return True + return False + + +def get_redirect(path): + """Check if the path exists in the redirect database. + + NOTE: We do NOT check if the redirect target is there or not. We do an + **exact** matching for redirection entries. + :returns: The redirect target if any, or None if not found. + """ + global REDIRECTS + + def _check_redirect(t): + for key, value in REDIRECTS.items(): + if key == t: # EXACT MATCH + return value + return None + + # NOTE: anchor is ignored, can be a future todo + parts = path.split("#") + target = parts[0] + if not target.endswith("/"): + target += "/" + + new_target = _check_redirect(target) + last_target = new_target + while new_target: + new_target = _check_redirect(new_target) + if new_target is None: + break + last_target = new_target + + return last_target + + +def check_target(page, anchor, target): + """Check a link from anchor to target on provided page. + + :param page: Currently not used. Passed here in case we want to check the + in-page links in the future. + :param anchor: Anchor string from the content page. This is provided to + help handle cases where target is empty. + :param target: The link target string to check + :returns: A checking record (string) if errors found, or None if we can + find the target link. + """ + target = target.strip() + # B01: bad protocol + if target.startswith("http://"): + return new_record("WARNING", "Use HTTPS rather than HTTP", target) + + # full link + if target.startswith("https://"): + # B03: self link, should revise to relative path + if (target.startswith("https://k8s.io/docs") or + target.startswith("https://kubernetes.io/docs")): + return new_record("ERROR", "Should use relative paths", target) + # external link, skip + return new_record("INFO", "External link, skipped", target) + + # in-page link + # TODO: check if the target anchor does exists + if target.startswith("#"): + return new_record("INFO", "In-page link, skipped", target) + + # Link has shortcode + if target.find("{{") > 0: + return new_record("INFO", "Link has shortcode, skipped", target) + + # TODO: check links to examples + if target.startswith("/examples/"): + return new_record("WARNING", "Examples link, skipped", target) + + # it is an embedded image + # TODO: an image might get translated as well + if target.endswith(".png") or target.endswith(".svg"): + return new_record("INFO", "Link to image, skipped", target) + + # link to English or localized page + if (target.startswith("/docs/") or + target.startswith("/" + ARGS.lang + "/docs/")): + + # target is shared reference (kubectl or kubernetes-api? + if (target.find("/docs/reference/generated/kubectl/") >= 0 or + target.find("/docs/reference/generated/kubernetes-api/") >= 0): + if check_file_exists(ROOT + "/static", target, "html"): + return None + return new_record("ERROR", "Missing shared reference", target) + + # target is a markdown (.md) or a "/_index.md"? + if target.startswith("/docs/"): + base = os.path.join(ROOT, "content", "en") + else: + # localized target + base = os.path.join(ROOT, "content") + ok = check_file_exists(base, target) + if ok: + # We do't do additional checks for English site even if it has + # links to a non-English page + if ARGS.lang == "en": + return None + + # If we are already checking localized link, fine + if target.startswith("/" + ARGS.lang + "/docs/"): + return None + + # additional check for localization even if English target exists + base = os.path.join(ROOT, "content", ARGS.lang) + found = check_file_exists(base, target) + if not found: + # Still to be translated + return None + msg = ("Localized page detected, please append '/%s' to the target" + % ARGS.lang) + return new_record("ERROR", "Link not using localized page", target) + + # taget might be a redirect entry + real_target = get_redirect(target) + if real_target: + msg = ("Link using redirect records, should use %s instead" % + real_target) + return new_record("WARNING", msg, target) + return new_record("ERROR", "Missing link for [%s]" % anchor, target) + + msg = "Link may be wrong for the anchor [%s]" % anchor + return new_record("WARNING", msg, target) + + +def validate_links(page): + """Find and validate links on a content page. + + The checking records are consolidated into the global variable RESULT. + """ + try: + with open(page, "r") as f: + data = f.readlines() + except Exception as ex: + print("[Error] failed in reading markdown file: " + str(ex)) + return + + content = "\n".join(strip_comments(data)) + + # Single results: searches for pattern: []() + link_pattern = r"\[([`/\w\s\n]*)\]\(([^\)]*)\)" + regex = re.compile(link_pattern) + + matches = regex.findall(content) + records = [] + for m in matches: + r = check_target(page, m[0], m[1]) + if r: + records.append(r) + if len(records): + RESULT[page] = records + + +def parse_arguments(): + """Argument parser. + + Result is returned and saved into global variable ARGS. + """ + parser = argparse.ArgumentParser(description="Links checker for docs.") + parser.add_argument("-l", dest="lang", default="en", metavar="", + help=("two letter language code, e.g. 'zh'. " + "(default='en')")) + parser.add_argument("-v", dest="verbose", action="store_true", + help="switch on verbose level") + parser.add_argument("-f", dest="filter", default="/docs/**/*.md", + metavar="", + help=("File pattern to scan, e.g. '/docs/foo.md'. " + "(default='/docs/foo/*.md')")) + parser.add_argument("-n", "--no-color", action="store_true", + help="Suppress colored printing.") + + return parser.parse_args() + + +def main(): + """The main entry of the program.""" + global ARGS, ROOT, REDIRECTS + + ARGS = parse_arguments() + print("Language: " + ARGS.lang) + ROOT = os.path.join(os.path.dirname(__file__), '..') + content_dir = os.path.join(ROOT, 'content') + lang_dir = os.path.join(content_dir, ARGS.lang) + + # read redirects data + redirects_fn = os.path.join(ROOT, "static", "_redirects") + try: + with open(redirects_fn, "r") as f: + data = f.readlines() + for item in data: + parts = item.split() + # There are entries without 301 specified + if len(parts) < 2: + continue + entry = parts[0] + # There are some entries not ended with "/" + if entry.endswith("/"): + REDIRECTS[entry] = parts[1] + else: + REDIRECTS[entry + "/"] = parts[1] + + except Exception as ex: + print("[Error] failed in reading redirects file: " + str(ex)) + return + + folders = [f for f in glob.glob(lang_dir + ARGS.filter, recursive=True)] + for page in folders: + validate_links(page) + + dump_result() + + # Done + print("Completed link validation.") + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/lsync.sh b/scripts/lsync.sh new file mode 100644 index 0000000000..740b390be1 --- /dev/null +++ b/scripts/lsync.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# This script checks if the English version of a page has changed since a localized +# page has been committed. + +if [ "$#" -ne 1 ] || ! [ -f "$1" ]; then + echo -e "\nThis script checks if the English version of a page has changed since a " + echo -e "localized page has been committed.\n" + echo -e "Usage:\n\t$0 \n" >&2 + echo -e "Example:\n\t$0 content/zh/docs/concepts/_index.md\n" >&2 + exit 1 +fi + +LOCALIZED="$1" + +# Try get the English version +EN_VERSION=`echo $LOCALIZED | sed "s/content\/..\//content\/en\//g"` +if ! [ -f $EN_VERSION ]; then + echo "$EN_VERSION has been removed." + exit 2 +fi + +# Last commit for the localized file +LASTCOMMIT=`git log -n 1 --pretty=format:%h -- $LOCALIZED` + +git diff $LASTCOMMIT...HEAD $EN_VERSION + +if [ "$?" -eq 0 ]; then + echo "$LOCALIZED is still in sync" + exit 3 +fi diff --git a/scripts/replace-capture.sh b/scripts/replace-capture.sh new file mode 100644 index 0000000000..4221c48e37 --- /dev/null +++ b/scripts/replace-capture.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# set K8S_WEBSITE in your env to your docs website root +# or rely on this script to determine it automatically +# You must run the script inside the repository for that to work +# +# Note: website/content//docs + +find_content_dir() { + local self + local top + if command git rev-parse --is-inside-work-tree > /dev/null 2>&1 ; then + self="$0" + top="$(command git rev-parse --show-toplevel)" + while ( cd "${top}/.." && command git rev-parse --is-inside-work-tree> /dev/null 2>&1 ); do + top="$( cd "${top}/.." && "${self}" )" + done + printf "%s/content" "${top}" + else + printf "Could not autodetect CONTENT_DIR\n" 1>&2 + exit 1 + fi +} + +if [ -z ${K8S_WEBSITE+x} ]; then + CONTENT_DIR="$( find_content_dir )" +else + CONTENT_DIR=${K8S_WEBSITE}/content +fi + +if ! [ -d "${CONTENT_DIR}" ]; then + printf "Directory %s not found\n" "${CONTENT_DIR}" 1>&2 + exit 1 +fi + +# 16 langs +# de en es fr hi id it ja ko no pl pt ru uk vi zh + +declare -a DIRS=("concepts" "contribute" "home" "reference" "setup" "tasks" "tutorials") +declare -a EMPTY_STMTS=("body" "discussion" "lessoncontent" "overview" "steps") +declare -a REPLACE_STMTS=("cleanup" "objectives" "options" "prerequisites" "seealso" "synopsis" "whatsnext") +declare -a CONTENT_TYPES=("concept" "task" "tutorial" "tool-reference") +END_CAPTURE="{{% \/capture %}}" +CONTENT_TEMPLATE="content_template:" + +# replace or remove capture statements +function replace_capture_stmts { + echo "i:""$i" + if [ -d "$1" ] ; then + for i in `ls $1`; do + replace_capture_stmts "${1}/${i}" + done + else + if [ -f "$1" ] ; then + ls -f $1 | while read -r file; do + for stmt in "${EMPTY_STMTS[@]}" ; do + CAPTURE_STMT="{{% capture ""$stmt"" %}}" + COMMENT_REPLACE="" + sed -i -e "s/${CAPTURE_STMT}/${COMMENT_REPLACE}/g" $1 + done + + for stmt in "${REPLACE_STMTS[@]}" ; do + CAPTURE_STMT="{{% capture ""$stmt"" %}}" + HEADING_STMT="## {{% heading \"""$stmt""\" %}}\n" + echo "HEADING STMT TO ADD:""$HEADING_STMT" + sed -i -e "s/${CAPTURE_STMT}/${HEADING_STMT}/g" $1 + done + + sed -i -e "s/${END_CAPTURE}//g" $1 + + # replace content_template: templates/ with + # content_template: + #sed -i -e "s/^${CONTENT_TEMPLATE}/# ${CONTENT_TEMPLATE}/g" $1 + for t in "${CONTENT_TYPES[@]}" ; do + sed -i -e "s/content_template:[[:space:]]*templates\/$t/content_type: $t/g" $1 + done + done + else + exit 1 + fi + fi +} + +# change to docs content dir +cd $CONTENT_DIR + +for langdir in `ls $CONTENT_DIR`; do + # Testing with a couple of langs to start + if [ $langdir = "en" ] ; then + LANGDIR="$CONTENT_DIR""/""$langdir""/docs" + + for d in "${DIRS[@]}"; do + ROOTDIR="${LANGDIR}""/""$d" + cd ${ROOTDIR} + for i in `ls ${ROOTDIR}`; do + replace_capture_stmts "${ROOTDIR}""/""$i" + done + done + fi +done diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000000..50ad2619ec --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,3 @@ +requests==2.20.1 +click==6.7 +jinja2==2.10.1 diff --git a/scripts/test_examples.sh b/scripts/test_examples.sh new file mode 100644 index 0000000000..16ee428777 --- /dev/null +++ b/scripts/test_examples.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +set -e + +# List files changed in the commit to check +FILES=($( git diff "$( git merge-base --fork-point master )" --name-only )) + +TEST_EXAMPLES=No + +# Check if examples folders (all locales) change in this branch +if printf -- '%s\n' "${FILES[@]}" | grep -qE '^"?content/[^/]+/examples/'; then + TEST_EXAMPLES=Yes +fi + +function install() { + if ! [[ $TEST_EXAMPLES == Yes ]]; then + echo "PR not touching examples, skipping example tests install" 1>&2 + exit 0 + fi + + export PATH=$GOPATH/bin:$PATH + mkdir -p $HOME/gopath/src/k8s.io + mv $TRAVIS_BUILD_DIR $HOME/gopath/src/k8s.io/website && cd $HOME/gopath/src/k8s.io/website + + # Make sure we are testing against the correct branch + wget https://github.com/kubernetes/kubernetes/archive/v${KUBE_VERSION}.0.tar.gz -P $GOPATH/src/k8s.io + + pushd $GOPATH/src/k8s.io + tar xzf v${KUBE_VERSION}.0.tar.gz + mv kubernetes-${KUBE_VERSION}.0 kubernetes + cd kubernetes + make generated_files + cp -L -R vendor $GOPATH/src/ + rm -r vendor + popd + + # Fetch additional dependencies to run the tests in examples/examples_test.go + go get -t -v k8s.io/website/content/en/examples +} + +function run_test() { + if ! [[ $TEST_EXAMPLES == Yes ]]; then + echo "PR not touching examples, skipping example tests execution" 1>&2 + exit 0 + fi + go test -v k8s.io/website/content/en/examples +} + +if [[ $1 == install ]]; then + install + exit 0 +elif [[ $1 == "run" ]]; then + run_test +fi diff --git a/scripts/upstream_changes.py b/scripts/upstream_changes.py new file mode 100644 index 0000000000..02b472e054 --- /dev/null +++ b/scripts/upstream_changes.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import re +from subprocess import check_output + +import click + + +def last_commit(path, git): + """ + Find the hash of the last commit that touched a file. + """ + cmd = [git, "log", "-n", "1", "--pretty=format:%H", "--", path] + try: + return check_output(cmd) + except Exception as exc: + raise exc + + +def diff(reference_commit_hash, translation_commit_hash, reference_path, git): + """ + Returns the diff between two hashes on a specific file + """ + cmd = [git, "diff", + "%s...%s" % (translation_commit_hash, reference_commit_hash), + "--", + reference_path] + try: + return check_output(cmd) + except Exception as exc: + raise exc + + +def find_full_path(path, git): + cmd = [git, "ls-tree", + "--name-only", "--full-name", "HEAD", + path] + try: + return check_output(cmd).strip() + except Exception as exc: + raise exc + + +def find_reference(path, git): + abs_path = find_full_path(path, git=git) + return re.sub('content/(\w{2})/', 'content/en/', abs_path) + + +@click.command() +@click.argument("path") +@click.option("--reference", "reference", + help="Specify the reference version of the file. Default to the English one.", + default=None) +@click.option("--git-path", + "git", + help="Specify git path", + default="git") +def main(path, reference, git): + """ + Find what changes occurred between two versions + + ex: + ./upstream_changes.py content/fr/_index.html + """ + if reference is None: + reference = find_reference(path, git=git) + reference_commit_hash = last_commit(path=reference, git=git) + translation_commit_hash = last_commit(path=path, git=git) + + print(diff( + reference_commit_hash=reference_commit_hash, + translation_commit_hash=translation_commit_hash, + reference_path=reference, + git=git + )) + +if __name__ == '__main__': + main() From 4b1527b832873485ad2ba7468c584e7f44beb817 Mon Sep 17 00:00:00 2001 From: Aimee Ukasick Date: Wed, 7 Oct 2020 16:23:11 -0500 Subject: [PATCH 2/3] update readme with run locally instructions --- README.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a1f4e916ce..96653b6245 100755 --- a/README.md +++ b/README.md @@ -1,11 +1,25 @@ +[![Netlify Status](https://api.netlify.com/api/v1/badges/d0be032e-c23d-48e5-8fbc-f7d5f2388fce/deploy-status)](https://app.netlify.com/sites/armory-docs/deploys) + # Overview This is the repo for Armory documentation (https://docs.armory.io). We welcome contributions from people outside of Armory. -The site is hosted by [Netlify](https://www.netlify.com/), which generates a preview build for every pull request. Install [Hugo](https://gohugo.io/) if you want to compile and run the project locally. The Hugo extended version is specified in `netlify.toml` (currently 0.71.1). +The site is hosted by [Netlify](https://www.netlify.com/), which generates a preview build for every pull request. Install [Hugo](https://gohugo.io/) if you want to compile and run the project locally. The Hugo extended version is specified in `netlify.toml`. The latest version of the docs website is the `master` branch. Previous releases point to branches that start with `release-`. +## Prerequisites + +To compile and run locally, install the following: + +- [yarn](https://yarnpkg.com/) +- [npm](https://www.npmjs.com/) +- [Go](https://golang.org/) +- [Hugo](https://gohugo.io/) +- A container runtime, like [Docker](https://www.docker.com/). + +GitHub is configured to generate a deploy preview when you create a pull request, so you do not have to build the site locally. + ## Cloning the project If you work for Armory, see the internal docs for how to contribute content. @@ -86,12 +100,67 @@ Content is in `content/en/docs`. Make your changes to the desired file. Use the `git status` command at any time to see what files you've changed. -If you have installed [Hugo](https://gohugo.io/getting-started/installing/) and want to preview your changes locally, run from the repo root: +## Running the website locally +### Using a container + +To build the site in a container, run the following to build the container image and run it: + +``` +make container-image +make container-serve +``` + +Open up your browser to http://localhost:1313 to view the website. As you make changes to the source files, Hugo updates the website and forces a browser refresh. + +### Using Hugo + +Make sure to install the Hugo extended version specified by the `HUGO_VERSION` environment variable in the [`netlify.toml`](netlify.toml#L10) file. + +To build and test the site locally, run: + +```bash +make serve +``` + +This will start the local Hugo server on port 1313. Open up your browser to http://localhost:1313 to view the website. As you make changes to the source files, Hugo updates the website and forces a browser refresh. + +### Troubleshooting macOS for too many open files + +If you run `make serve` on macOS and receive the following error: + +``` +ERROR 2020/08/01 19:09:18 Error: listen tcp 127.0.0.1:1313: socket: too many open files +make: *** [serve] Error 1 +``` + +Try checking the current limit for open files: + +`launchctl limit maxfiles` + +Then run the following commands (adapted from https://gist.github.com/tombigel/d503800a282fcadbee14b537735d202c): ``` -hugo server +#!/bin/sh + +# These are the original gist links, linking to my gists now. +# curl -O https://gist.githubusercontent.com/a2ikm/761c2ab02b7b3935679e55af5d81786a/raw/ab644cb92f216c019a2f032bbf25e258b01d87f9/limit.maxfiles.plist +# curl -O https://gist.githubusercontent.com/a2ikm/761c2ab02b7b3935679e55af5d81786a/raw/ab644cb92f216c019a2f032bbf25e258b01d87f9/limit.maxproc.plist + +curl -O https://gist.githubusercontent.com/tombigel/d503800a282fcadbee14b537735d202c/raw/ed73cacf82906fdde59976a0c8248cce8b44f906/limit.maxfiles.plist +curl -O https://gist.githubusercontent.com/tombigel/d503800a282fcadbee14b537735d202c/raw/ed73cacf82906fdde59976a0c8248cce8b44f906/limit.maxproc.plist + +sudo mv limit.maxfiles.plist /Library/LaunchDaemons +sudo mv limit.maxproc.plist /Library/LaunchDaemons + +sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist +sudo chown root:wheel /Library/LaunchDaemons/limit.maxproc.plist + +sudo launchctl load -w /Library/LaunchDaemons/limit.maxfiles.plist ``` +This works for Catalina as well as Mojave macOS. + + ## Commit your changes Check which files you need to commit: From 9580edbd55c283d9e57943a30fbb231bebd38321 Mon Sep 17 00:00:00 2001 From: Aimee Ukasick Date: Wed, 7 Oct 2020 16:32:49 -0500 Subject: [PATCH 3/3] clean up content --- README.md | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 96653b6245..454b10a21f 100755 --- a/README.md +++ b/README.md @@ -4,13 +4,11 @@ This is the repo for Armory documentation (https://docs.armory.io). We welcome contributions from people outside of Armory. -The site is hosted by [Netlify](https://www.netlify.com/), which generates a preview build for every pull request. Install [Hugo](https://gohugo.io/) if you want to compile and run the project locally. The Hugo extended version is specified in `netlify.toml`. - The latest version of the docs website is the `master` branch. Previous releases point to branches that start with `release-`. ## Prerequisites -To compile and run locally, install the following: +The site is hosted by [Netlify](https://www.netlify.com/), which generates a preview build for every pull request. Install the following if you want to compile and run the project locally. Make sure to install the Hugo extended version specified by the `HUGO_VERSION` environment variable in the [`netlify.toml`](netlify.toml#L14) file. - [yarn](https://yarnpkg.com/) - [npm](https://www.npmjs.com/) @@ -18,12 +16,9 @@ To compile and run locally, install the following: - [Hugo](https://gohugo.io/) - A container runtime, like [Docker](https://www.docker.com/). -GitHub is configured to generate a deploy preview when you create a pull request, so you do not have to build the site locally. ## Cloning the project -If you work for Armory, see the internal docs for how to contribute content. - People who are not part of the Armory organization need to create a fork of this repo. See the GitHub.com help [docs](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-forks) for how to fork a repo. Clone your forked repo: @@ -32,10 +27,11 @@ Clone your forked repo: git clone git@github.com:/docs.git ``` -Armory docs uses the [Docsy]() theme as a submodule. You have to update the submodule if you want to build locally. +Armory docs uses the [Docsy Hugo theme](https://github.com/google/docsy#readme) theme as a submodule. You have to update the submodule if you want to build locally. Even if you plan to run the website in a container, we strongly recommend pulling in the submodule and other development dependencies by running the following: ```bash cd docs +yarn # install Yarn depend git submodule update --init --recursive ``` @@ -101,6 +97,7 @@ Content is in `content/en/docs`. Make your changes to the desired file. Use the `git status` command at any time to see what files you've changed. ## Running the website locally + ### Using a container To build the site in a container, run the following to build the container image and run it: @@ -114,8 +111,6 @@ Open up your browser to http://localhost:1313 to view the website. As you make c ### Using Hugo -Make sure to install the Hugo extended version specified by the `HUGO_VERSION` environment variable in the [`netlify.toml`](netlify.toml#L10) file. - To build and test the site locally, run: ```bash