diff --git a/docker/docker-compose-w2.yml b/docker/docker-compose-w2.yml index 0770a68..01c30c8 100644 --- a/docker/docker-compose-w2.yml +++ b/docker/docker-compose-w2.yml @@ -6,7 +6,7 @@ services: environment: - discovery.type=single-node - http.max_content_length=1000mb - - "OPENSEARCH_JAVA_OPTS=-Xms1G -Xmx1G" + - "OPENSEARCH_JAVA_OPTS=-Xms2G -Xmx2G" # Avoid creating security-auditlog index, since that results in unallocated shards on a single node setup. # This will send security audit events to stdout instead https://opensearch.org/docs/latest/security/audit-logs/storage-types/ - plugins.security.audit.type=debug @@ -24,8 +24,8 @@ services: deploy: resources: limits: - cpus: '1' - memory: 2GB + cpus: '4' + memory: 8GB opensearch-dashboards: image: opensearchproject/opensearch-dashboards:2.6.0 container_name: opensearch-dashboards diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.58.33.png b/week2/project results/Screen Shot 2023-05-06 at 17.58.33.png new file mode 100644 index 0000000..28de109 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.58.33.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.58.47.png b/week2/project results/Screen Shot 2023-05-06 at 17.58.47.png new file mode 100644 index 0000000..71c9ad8 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.58.47.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.02.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.02.png new file mode 100644 index 0000000..edc34d2 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.02.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.13.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.13.png new file mode 100644 index 0000000..d6052f6 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.13.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.22.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.22.png new file mode 100644 index 0000000..aa09d18 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.22.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.29.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.29.png new file mode 100644 index 0000000..0e32953 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.29.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.37.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.37.png new file mode 100644 index 0000000..d73e9ad Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.37.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.45.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.45.png new file mode 100644 index 0000000..f1b2854 Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.45.png differ diff --git a/week2/project results/Screen Shot 2023-05-06 at 17.59.55.png b/week2/project results/Screen Shot 2023-05-06 at 17.59.55.png new file mode 100644 index 0000000..1fa88da Binary files /dev/null and b/week2/project results/Screen Shot 2023-05-06 at 17.59.55.png differ diff --git a/week2/project results/results.txt b/week2/project results/results.txt new file mode 100644 index 0000000..31ca4f8 --- /dev/null +++ b/week2/project results/results.txt @@ -0,0 +1,39 @@ +Week 2 + +Level 1 +curl -k -X PUT -u admin:admin "https://localhost:9200/bbuy_products" -H 'Content-Type: application/json' -d @week1/bbuy_products.json + +python week2/index.py -s /workspace/datasets/product_data/products -w 16 -b 500 -m 50000 +INFO:Indexing /workspace/datasets/product_data/products to bbuy_products with 16 workers, refresh_interval of -1 to host localhost with a maximum number of docs sent per file per worker of 50000 and 500 per batch. +INFO:Done. 1275077 were indexed in 11.064095348049886 minutes. Total accumulated time spent in `bulk` indexing: 128.65915842517472 minutes + +~2k docs/sec + +re-index content: +python week2/index.py -s /workspace/datasets/product_data/products -w 16 -b 500 -m 50000 +INFO:Indexing /workspace/datasets/product_data/products to bbuy_products with 16 workers, refresh_interval of -1 to host localhost with a maximum number of docs sent per file per worker of 50000 and 500 per batch. +INFO:Done. 1275077 were indexed in 10.461924422199566 minutes. Total accumulated time spent in `bulk` indexing: 120.99868232431861 minutes + +~2k docs/sec + +python week2/query.py -q /workspace/datasets/train.csv -w 4 -m 25000 +INFO:Finished running 25000 queries in 4.871648958350609 minutes + +85 queries/sec + +Level 2. 2 CPU, 4GB + +python week2/index.py -s /workspace/datasets/product_data/products -w 16 -b 500 -m 50000 + +-- documents indexed per second went down, but time spent indexing went up. +-- deleted the index and tried again. + +INFO:Indexing /workspace/datasets/product_data/products to bbuy_products with 16 workers, refresh_interval of -1 to host localhost with a maximum number of docs sent per file per worker of 50000 and 500 per batch. +INFO:Done. 1275077 were indexed in 12.691176588032977 minutes. Total accumulated time spent in `bulk` indexing: 85.20092873292354 minutes + +python week2/query.py -q /workspace/datasets/train.csv -w 4 -m 25000 + +CPU usage was limiting once again. + +Level 3: 4 CPU, 8GB +results in images \ No newline at end of file