diff --git a/.gitignore b/.gitignore index 3d4646fae..7ee6f3701 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,10 @@ dist allure-results/* allure-report/* -# Performance test ref data +# Performance test ref data & output tests/performance/reference-data.json tests/performance/producer/expanded_pointer_distributions.json +producer-internal-*.json +producer-public-*.json +consumer-internal-*.json +consumer-public-*.json diff --git a/Makefile b/Makefile index 5119abb72..a97f18ca2 100644 --- a/Makefile +++ b/Makefile @@ -314,7 +314,7 @@ perftest-prepare: ## Prepare input files for producer & consumer perf tests perftest-producer-internal: ## Run producer perf tests @echo "Running producer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)" - k6 run tests/performance/producer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) + k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) perftest-producer-public: check-warn ## Run the producer perftests for the external access points @echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)" @@ -329,12 +329,12 @@ perftest-producer-public: check-warn ## Run the producer perftests for the exter TEST_CONNECT_MODE=public \ TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \ TEST_CONFIG_FILE=$$CONFIG_FILE \ - k6 run tests/performance/producer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) + k6 run tests/performance/producer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/producer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) perftest-consumer-internal: @echo "Running consumer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)" - k6 run tests/performance/consumer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) + k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-internal-$$(date +%Y%m%d%H%M%S).json -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) perftest-consumer-public: check-warn ## Run the consumer perftests for the external access points @echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)" @@ -349,22 +349,14 @@ perftest-consumer-public: check-warn ## Run the consumer perftests for the exter TEST_CONNECT_MODE=public \ TEST_PUBLIC_BASE_URL=$$PUBLIC_BASE_URL \ TEST_CONFIG_FILE=$$CONFIG_FILE \ - k6 run tests/performance/consumer/perftest.js -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) + k6 run tests/performance/consumer/perftest.js --summary-mode=full --out json=$(DIST_PATH)/consumer-public-$$(date +%Y%m%d%H%M%S).json -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH) kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) perftest-generate-pointer-table-extract: - @echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)" + @echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and ENV=$(ENV) and DIST_PATH=$(DIST_PATH)" rm -rf "${DIST_PATH}/nft" mkdir -p "${DIST_PATH}/nft" - PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft" + PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft" --extract-size=1500000 ./scripts/get-current-info.sh > "${DIST_PATH}/nft/info.json" zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft" aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip" - -perftest-run-token-refresher: - @echo "Starting token refresher in background with ENV=$(ENV) PERFTEST_TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT)" - ENV=$(ENV) TOKEN_REFRESH_PORT=$(PERFTEST_TOKEN_REFRESH_PORT) PYTHONPATH=. poetry run python ./tests/performance/token_refresher.py & - trap "kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) 2>/dev/null" EXIT - - make perftest-consumer-public - kill $$(lsof -t -i :$(PERFTEST_TOKEN_REFRESH_PORT)) diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py index e6c19f7e5..286d62b58 100644 --- a/scripts/seed_nft_tables.py +++ b/scripts/seed_nft_tables.py @@ -102,6 +102,22 @@ def _write_pointer_extract_to_file(table_name, pointer_data): create_extract_metadata_file(table_name, nft_dist_path) +# To avoid sonarcube maintainability warning +def get_pointer_processor(unprocessed_items): + def pointer_is_processed(pointer): + pointer_id = pointer[0] + matches = [ + unprocessed_item + for unprocessed_item in unprocessed_items + if unprocessed_item["PutRequest"]["Item"].get("id") == pointer_id + ] + # print("unprocessed matches:", matches) + + return len(matches) == 0 + + return pointer_is_processed + + def _populate_seed_table( table_name: str, patients_with_pointers: int, @@ -143,6 +159,7 @@ def _populate_seed_table( unprocessed_count = 0 pointer_data: list[list[str]] = [] + batch_pointer_data: list[list[str]] = [] start_time = datetime.now(tz=timezone.utc) batch_upsert_items: list[dict[str, Any]] = [] @@ -158,11 +175,20 @@ def _populate_seed_table( RequestItems={table_name: batch_upsert_items} ) + processed_pointers = batch_pointer_data + if response.get("UnprocessedItems"): - unprocessed_count += len( - response.get("UnprocessedItems").get(table_name, []) + unprocessed_items = response.get("UnprocessedItems").get(table_name, []) + unprocessed_count += len(unprocessed_items) + pointer_is_processed = get_pointer_processor(unprocessed_items) + + processed_pointers = list( + filter(pointer_is_processed, batch_pointer_data) ) + pointer_data.extend(processed_pointers) + + batch_pointer_data = [] batch_upsert_items = [] batch_counter = 0 @@ -178,7 +204,7 @@ def _populate_seed_table( ) put_req = {"PutRequest": {"Item": pointer.model_dump()}} batch_upsert_items.append(put_req) - pointer_data.append( + batch_pointer_data.append( [ pointer.id, new_type, # not full type url diff --git a/terraform/bastion/README.md b/terraform/bastion/README.md index 27c503155..d4882682b 100644 --- a/terraform/bastion/README.md +++ b/terraform/bastion/README.md @@ -11,7 +11,7 @@ Before deploying a bastion, you will need: ## Deploying a bastion -The bastions are emphemeral resources that should be deploy when you need them. +The bastions are ephemeral resources that should be deployed when you need them. To deploy a bastion, you will first need to login to the AWS mgmt account on the CLI. @@ -44,7 +44,7 @@ terraform apply ./bastion.tfplan Once the bastion is deployed, you can connect to it via SSH with: ``` -make ssh-connection +make ssh-connection ENV=perftest ``` Once connected successfully, you will be at the SSM `$` prompt. To switch to the `nrlf_ops` user, run this command: @@ -102,7 +102,7 @@ If you're trying to access an AWS resource from the bastion and are getting an a You can check the role in the AWS console to work out if things are missing and can edit it there too for immediate access to the resources you need. -If you want to permenantly grant new access to the bastion, you can add a policy and attach it to the EC2 instance in [iam.tf](iam.tf) +If you want to permanently grant new access to the bastion, you can add a policy and attach it to the EC2 instance in [iam.tf](iam.tf) #### A tool I need is missing diff --git a/terraform/infrastructure/etc/perftest.tfvars b/terraform/infrastructure/etc/perftest.tfvars index 50bfd5841..f2aa7c0b4 100644 --- a/terraform/infrastructure/etc/perftest.tfvars +++ b/terraform/infrastructure/etc/perftest.tfvars @@ -1,7 +1,7 @@ account_name = "perftest" aws_account_name = "test" -dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table" +dynamodb_pointers_table_name = "nhsd-nrlf--perftest-15m-pointers-table" domain = "perftest.record-locator.national.nhs.uk" public_domain = "perftest.api.service.nhs.uk" diff --git a/tests/performance/README.md b/tests/performance/README.md index 7eda3541b..0cf4fc644 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -6,7 +6,7 @@ We have performance tests which give us a benchmark of how NRLF performs under l ### Prep the environment -Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-1million (todo: update with real names!). +Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-15m vs perftest-55m #### Pull certs for perftest @@ -17,37 +17,37 @@ make truststore-pull-all ENV=perftest #### Point perftest at a different pointers table -We (will) have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers. - -In order to run performance tests to get figures for these different states, we can point the perftest environment at one of these tables. - -Currently, this requires tearing down the existing environment and restoring from scratch: - -1. Follow instructions in terraform/infrastructure/readme.md to tear down the perf test environment. - - Do **not** tear down shared account-wide infrastructure -2. Update `perftest-pointers-table.name_prefix` in `terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf` to be the table name you want, minus "-pointers-table" - - e.g. to use the baseline table `nhsd-nrlf--perftest-baseline-pointers-table`, set `name_prefix = "nhsd-nrlf--perftest-baseline"` -3. Update `dynamodb_pointers_table_prefix` in `terraform/infrastructure/etc/perftest.tfvars` same as above. - - e.g. to use the baseline table `dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline"` -4. Commit changes to a branch & push -5. Run the [Deploy Account-wide infrastructure](https://github.com/NHSDigital/NRLF/actions/workflows/deploy-account-wide-infra.yml) workflow against your branch & `account-test`. - - If you get a terraform failure like "tried to create table but it already exists", you will need to do some fanangaling: - 1. make sure there is a backup of your chosen table or create one if not. In the AWS console: dynamodb > tables > your perftest table > backups > create backup > Create on-demand backup > leave all settings as defaults > create backup. This might take up to an hour to complete. - 2. once backed up, delete your table. In the AWS console: dynamodb > tables > your perftest table > actions > delete table - 3. Rerun the Deploy Account-wide infrastructure action. - 4. Terraform will create an empty table with the correct name & (most importantly!) read/write IAM policies. - 5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code & selecting the matching customer managed encryption key. -6. Run the [Persistent Environment Deploy](https://github.com/NHSDigital/NRLF/actions/workflows/persistent-environment.yml) workflow against your branch & `perftest` to restore the environment with lambdas pointed at your chosen table. -7. You can check this has been successful by checking the table name in the lambdas. - - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table` +We have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers. + +In order to run performance tests to get figures for these different volumes, we can point the perftest environment at one of these tables. + +To do this, we change an environment variable which defines which table our lambdas talk to and deploy changes. + +1. Update `dynamodb_pointers_table_name` to be the desired table name in [terraform/infrastructure/etc/perftest.tfvars](terraform/infrastructure/etc/perftest.tfvars) e.g. + +```sh +dynamodb_pointers_table_name = "nhsd-nrlf--perftest-baseline-pointers-table" +``` -If you've followed these steps, you will also need to [generate permissions](#generate-permissions) as the organisation permissions will have been lost when the environment was torn down. +2. To avoid erasing the test permissions when you deploy these changes, make sure to run through the steps to [generate permissions](#generate-permissions) +3. Apply your changes + +```sh +cd ./terraform/infrastructure +make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest +make ENV=perftest USE_SHARED_RESOURCES=true apply +``` + +4. You can verify this has been successful by checking the table name in the lambdas. + - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table` #### Generate permissions You will need to generate pointer permissions the first time performance tests are run in an environment e.g. if the perftest environment is destroyed & recreated. ```sh +assume nhsd-nrlf-mgmt + # In project root make perftest-generate-permissions # makes a bunch of json permission files for test organisations make get-s3-perms ENV=perftest # will take all permissions & create nrlf_permissions.zip file @@ -55,7 +55,6 @@ make build # apply this new permissions zip file to your environment cd ./terraform/infrastructure -assume nhsd-nrlf-mgmt make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest make ENV=perftest USE_SHARED_RESOURCES=true apply ``` @@ -111,9 +110,11 @@ Regenerates the input files from the current state of a given perftest table & u ```sh make perftest-generate-pointer-table-extract \ - PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table + PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table ENV=perftest ``` +This will generate a csv extract of the given pointer table containing a row per pointer. To run the perf tests, you will need an extract larger than the number of test iterations. The default extract size is 1.5 million - this can be changed in the make file command by updating the value of`--extract-size`. Too big and the test runners will take a long time to load the file. + ## Assumptions / Caveats - Run performance tests in the perftest environment only\* diff --git a/tests/performance/consumer/perftest.config.json b/tests/performance/consumer/perftest.config.json index 4bbe6b4d9..b9b8bc47f 100644 --- a/tests/performance/consumer/perftest.config.json +++ b/tests/performance/consumer/perftest.config.json @@ -6,43 +6,43 @@ }, "scenarios": { "countDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "countPostDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "readDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchDocumentReferenceByCategory": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchPostDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchPostDocumentReferenceByCategory": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" diff --git a/tests/performance/perftest_environment.py b/tests/performance/perftest_environment.py index 3e6f6b0f3..135136cab 100644 --- a/tests/performance/perftest_environment.py +++ b/tests/performance/perftest_environment.py @@ -66,6 +66,7 @@ def __next__(self): def generate_pointer_table_extract( + extract_size=1500000, output_dir=".", ): """ @@ -79,6 +80,7 @@ def generate_pointer_table_extract( start_key = None buffer = [] buffer_size = 1_000_000 # 10k rows needs ~3MB of RAM, so 1M rows needs ~300MB + buffers_written = 0 with open(out, "w", newline="") as csv_file: writer = csv.writer(csv_file) @@ -109,12 +111,20 @@ def generate_pointer_table_extract( if len(buffer) >= buffer_size: print("Writing buffer to CSV...") # noqa: T201 writer.writerows(buffer) + buffers_written += 1 buffer.clear() start_key = response.get("LastEvaluatedKey", None) - done = start_key is None + + no_more_to_read = start_key is None + reached_desired_extract_size = ( + buffers_written * buffer_size + ) >= extract_size + + done = no_more_to_read or reached_desired_extract_size # Write any remaining rows in buffer if buffer: writer.writerows(buffer) + buffers_written += 1 print(f"Pointer extract CSV data written to {out}") # noqa: T201 create_extract_metadata_file(table_name, output_dir) diff --git a/tests/performance/producer/client_perftest.js b/tests/performance/producer/client_perftest.js index b649221e7..9909f01a2 100644 --- a/tests/performance/producer/client_perftest.js +++ b/tests/performance/producer/client_perftest.js @@ -2,7 +2,6 @@ import http from "k6/http"; import { ODS_CODE } from "../constants.js"; import { check } from "k6"; import { randomItem } from "https://jslib.k6.io/k6-utils/1.2.0/index.js"; -import { crypto } from "k6/experimental/webcrypto"; import { createRecord } from "../setup.js"; import { getHeaders, getFullUrl } from "../test-config.js"; import exec from "k6/execution"; diff --git a/tests/performance/producer/perftest.config.json b/tests/performance/producer/perftest.config.json index 20726b3ae..c899e8551 100644 --- a/tests/performance/producer/perftest.config.json +++ b/tests/performance/producer/perftest.config.json @@ -6,31 +6,31 @@ }, "scenarios": { "createDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "readDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "upsertDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" }, "searchPostDocumentReference": { - "tps": 5, + "tps": 1, "duration": "5m", "hold": "30m", "rampDown": "1m" diff --git a/tests/performance/test-config.js b/tests/performance/test-config.js index aab5511b3..66769ca09 100644 --- a/tests/performance/test-config.js +++ b/tests/performance/test-config.js @@ -9,10 +9,12 @@ const configPort = __ENV.TOKEN_REFRESH_PORT || 8765; const fetchConfig = () => { const res = http.get(`http://localhost:${configPort}`); - console.log("Fetched latest bearer token", res.status); if (res.error) { - throw new Error("Bearer token not found in config file", res.error); + throw new Error( + "Unable to fetch config and latest bearer token", + res.error + ); } return res.json(); };