diff --git a/.github/workflows/pxf-ci.yml b/.github/workflows/pxf-ci.yml index 660c47d4e..28b40d8f0 100644 --- a/.github/workflows/pxf-ci.yml +++ b/.github/workflows/pxf-ci.yml @@ -301,10 +301,142 @@ jobs: exit 1 fi + # Stage 2b: Testcontainers-based tests + build-pxf-testcontainer-image: + name: Build PXF Testcontainer Image + runs-on: ubuntu-latest + steps: + - name: Checkout PXF source + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Build pxf-cbdb testcontainer image + run: | + docker build \ + -t pxf/cbdb-testcontainer:1 \ + automation/src/main/resources/testcontainers/pxf-cbdb + docker save pxf/cbdb-testcontainer:1 > /tmp/pxf-cbdb-testcontainer-image.tar + + - name: Upload pxf-cbdb testcontainer image + uses: actions/upload-artifact@v4 + with: + name: pxf-cbdb-testcontainer-image + path: /tmp/pxf-cbdb-testcontainer-image.tar + retention-days: 1 + + pxf-testcontainer-test: + name: "TC Test - ${{ matrix.tc_group }} (${{ matrix.test_mode }})" + needs: [build-pxf-testcontainer-image] + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - tc_group: jdbc-tc + test_mode: external-table + use_fdw: "false" + - tc_group: jdbc-tc + test_mode: fdw + use_fdw: "true" + steps: + + - name: Checkout PXF source + uses: actions/checkout@v4 + with: + fetch-depth: 1 + submodules: true + + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: ${{ env.JAVA_VERSION }} + + - name: Download pxf-cbdb testcontainer image + uses: actions/download-artifact@v4 + with: + name: pxf-cbdb-testcontainer-image + path: /tmp + + - name: Load pxf-cbdb testcontainer image + run: | + docker load < /tmp/pxf-cbdb-testcontainer-image.tar + + - name: Build PXF stage artifacts (no unit tests) + run: | + make -C server stage-notest + + - name: Run Testcontainers tests - ${{ matrix.tc_group }} (${{ matrix.test_mode }}) + id: run_test + continue-on-error: true + timeout-minutes: 120 + working-directory: automation + env: + PXF_HOME: ${{ github.workspace }}/server/build/stage + run: | + make test-tc TC_GROUP=${{ matrix.tc_group }} USE_FDW=${{ matrix.use_fdw }} + + - name: Collect artifacts and generate stats + if: always() + id: collect_artifacts + run: | + mkdir -p artifacts/logs + TC_GROUP="${{ matrix.tc_group }}" + TEST_MODE="${{ matrix.test_mode }}" + TEST_RESULT="${{ steps.run_test.outcome }}" + + TOTAL=0; PASSED=0; FAILED=0; SKIPPED=0 + for xml in automation/target/surefire-reports/TEST-*.xml; do + if [ -f "$xml" ]; then + tests=$(grep -oP 'tests="\K\d+' "$xml" 2>/dev/null | head -1 || echo "0") + failures=$(grep -oP 'failures="\K\d+' "$xml" 2>/dev/null | head -1 || echo "0") + errors=$(grep -oP 'errors="\K\d+' "$xml" 2>/dev/null | head -1 || echo "0") + skipped=$(grep -oP 'skipped="\K\d+' "$xml" 2>/dev/null | head -1 || echo "0") + TOTAL=$((TOTAL + tests)) + FAILED=$((FAILED + failures + errors)) + SKIPPED=$((SKIPPED + skipped)) + fi + done + PASSED=$((TOTAL - FAILED - SKIPPED)) + + cat > artifacts/test_stats.json <> $GITHUB_OUTPUT + echo "skipped_count=$SKIPPED" >> $GITHUB_OUTPUT + echo "Test stats for tc:$TC_GROUP ($TEST_MODE): total=$TOTAL, passed=$PASSED, failed=$FAILED, skipped=$SKIPPED" + + - name: Upload test artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-tc-${{ matrix.tc_group }}-${{ matrix.test_mode }} + path: artifacts/** + if-no-files-found: ignore + retention-days: 7 + + - name: Check test result + if: always() + run: | + FAILED_COUNT="${{ steps.collect_artifacts.outputs.failed_count || 0 }}" + if [ "${{ steps.run_test.outcome }}" == "failure" ] || [ "$FAILED_COUNT" -gt 0 ]; then + echo "Testcontainer test group ${{ matrix.tc_group }} (${{ matrix.test_mode }}) failed (Failures: $FAILED_COUNT)" + exit 1 + fi + # Stage 3: Summary job test-summary: name: Test Summary - needs: [pxf-test] + needs: [pxf-test, pxf-testcontainer-test] if: always() runs-on: ubuntu-latest steps: diff --git a/automation/Makefile b/automation/Makefile index a475ce954..33f91eafe 100755 --- a/automation/Makefile +++ b/automation/Makefile @@ -22,6 +22,11 @@ ifneq "$(GROUP)" "" MAVEN_TEST_OPTS+= -Dgroups=$(GROUP) endif +EXCLUDED_GROUPS ?= testcontainers +ifneq "$(EXCLUDED_GROUPS)" "" + MAVEN_TEST_OPTS+= -DexcludedGroups=$(EXCLUDED_GROUPS) +endif + MAVEN_TEST_OPTS+= -Djava.awt.headless=true -DuseFDW=$(USE_FDW) -Duser.timezone=UTC ifneq "$(OFFLINE)" "true" @@ -94,7 +99,11 @@ MVN=mvn all: test check-env: - @if [ -z "$(PXF_HOME)" ]; then echo 'ERROR: PXF_HOME must be set'; exit 1; fi + @if [ -z "$(PXF_HOME)" ]; then \ + echo 'ERROR: PXF_HOME must be set'; \ + echo 'Example: export PXF_HOME="$(abspath ../server/build/stage)"'; \ + exit 1; \ + fi symlink_pxf_jars: check-env @if [ -d "$(PXF_HOME)/application" ]; then \ @@ -245,6 +254,18 @@ else @ls src/test/java/org/apache/cloudberry/pxf/automation/features/*/*Test.java | sed 's/.*\///g' | sed 's/\.java//g' | awk '{print "* ", $$1}' endif +# Run Testcontainers-based tests. +# Usage: +# make test-tc => run all testcontainers tests +# make test-tc TC_GROUP=jdbc-tc => run only jdbc-tc group +.PHONY: test-tc +test-tc: check-env symlink_pxf_jars pxf_regress + $(MVN) -B -e -Djava.awt.headless=true -Duser.timezone=UTC \ + -DuseFDW=$(USE_FDW) \ + -Dgroups=$(or $(TC_GROUP),testcontainers) \ + -DexcludedGroups= \ + test + .PHONY: pxf_regress pxf_regress: $(MAKE) -C pxf_regress diff --git a/automation/pom.xml b/automation/pom.xml index e294cac07..1746e1627 100644 --- a/automation/pom.xml +++ b/automation/pom.xml @@ -76,6 +76,12 @@ + + src/main/resources + + **/* + + src/test/resources @@ -177,6 +183,30 @@ 4.2.0 + + org.testcontainers + testcontainers + 2.0.3 + + + + org.apache.commons + commons-lang3 + 3.17.0 + + + + commons-io + commons-io + 2.17.0 + + + + org.apache.commons + commons-compress + 1.26.2 + + org.jsystemtest jsystemCore @@ -221,6 +251,13 @@ 42.7.2 + + com.clickhouse + clickhouse-jdbc + 0.9.6 + all + + org.springframework spring @@ -262,19 +299,19 @@ com.fasterxml.jackson.core jackson-core - 2.14.3 + 2.20.2 com.fasterxml.jackson.core jackson-databind - 2.14.3 + 2.20.2 com.fasterxml.jackson.core jackson-annotations - 2.14.3 + 2.20 diff --git a/automation/sqlrepo/features/jdbc/clickhouse/read_types/expected/query01.ans b/automation/sqlrepo/features/jdbc/clickhouse/read_types/expected/query01.ans new file mode 100644 index 000000000..9920d9a00 --- /dev/null +++ b/automation/sqlrepo/features/jdbc/clickhouse/read_types/expected/query01.ans @@ -0,0 +1,47 @@ +-- start_ignore +-- end_ignore +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- @description ClickHouse JDBC read: expect one row of primitive types from PXF (values compared by regress output) +SET timezone='utc'; +SET + +SET bytea_output='hex'; +SET + +SELECT + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +FROM pxf_ch_clickhouse_read_types +LIMIT 1; + i_int | s_small | b_big | f_float32 | d_float64 | b_bool | dec | t_text | bin | d_date | d_ts | d_tstz | d_uuid +-------+---------+-------+-----------+-----------+--------+-------------------+--------+------------+------------+-----------------------+-----------------------+-------------------------------------- + 1 | 2 | 3 | 1.25 | 3.1415926 | t | 12345.6789012345 | hello | \x5b36352c36362c36372c36385d | 2020-01-02 | 2020-01-02 03:04:05.006 | 2020-01-02 03:04:05.006+00 | 550e8400-e29b-41d4-a716-446655440000 +(1 row) + diff --git a/automation/sqlrepo/features/jdbc/clickhouse/read_types/sql/query01.sql b/automation/sqlrepo/features/jdbc/clickhouse/read_types/sql/query01.sql new file mode 100644 index 000000000..66faa8569 --- /dev/null +++ b/automation/sqlrepo/features/jdbc/clickhouse/read_types/sql/query01.sql @@ -0,0 +1,37 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- @description ClickHouse JDBC read: expect one row of primitive types from PXF (values compared by regress output) +SET timezone='utc'; +SET bytea_output='hex'; + +SELECT + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +FROM pxf_ch_clickhouse_read_types +LIMIT 1; diff --git a/automation/sqlrepo/features/jdbc/clickhouse/write_types/expected/query01.ans b/automation/sqlrepo/features/jdbc/clickhouse/write_types/expected/query01.ans new file mode 100644 index 000000000..96237a184 --- /dev/null +++ b/automation/sqlrepo/features/jdbc/clickhouse/write_types/expected/query01.ans @@ -0,0 +1,78 @@ +-- start_ignore +-- end_ignore +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- @description ClickHouse JDBC write: insert full row then verify via readable external table +SET timezone='utc'; +SET + +SET bytea_output='hex'; +SET + +INSERT INTO pxf_ch_clickhouse_write_types ( + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +) VALUES ( + 1, + 2, + 3, + 1.25, + 3.1415926, + true, + CAST('12345.6789012345' AS numeric), + 'hello', + decode('41424344', 'hex'), + DATE '2020-01-02', + TIMESTAMP '2020-01-02 03:04:05.006', + TIMESTAMPTZ '2020-01-02 03:04:05.006+00', + '550e8400-e29b-41d4-a716-446655440000'::uuid +); +INSERT 0 1 + +SELECT + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +FROM pxf_ch_clickhouse_write_verify +LIMIT 1; + i_int | s_small | b_big | f_float32 | d_float64 | b_bool | dec | t_text | bin | d_date | d_ts | d_tstz | d_uuid +-------+---------+-------+-----------+-----------+--------+-------------------+--------+------------+------------+-----------------------+-----------------------+-------------------------------------- + 1 | 2 | 3 | 1.25 | 3.1415926 | t | 12345.6789012345 | hello | \x41424344 | 2020-01-02 | 2020-01-02 03:04:05.006 | 2020-01-02 03:04:05.006+00 | 550e8400-e29b-41d4-a716-446655440000 +(1 row) + diff --git a/automation/sqlrepo/features/jdbc/clickhouse/write_types/sql/query01.sql b/automation/sqlrepo/features/jdbc/clickhouse/write_types/sql/query01.sql new file mode 100644 index 000000000..b69aa963a --- /dev/null +++ b/automation/sqlrepo/features/jdbc/clickhouse/write_types/sql/query01.sql @@ -0,0 +1,67 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- @description ClickHouse JDBC write: insert full row then verify via readable external table +SET timezone='utc'; +SET bytea_output='hex'; + +INSERT INTO pxf_ch_clickhouse_write_types ( + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +) VALUES ( + 1, + 2, + 3, + 1.25, + 3.1415926, + true, + CAST('12345.6789012345' AS numeric), + 'hello', + decode('41424344', 'hex'), + DATE '2020-01-02', + TIMESTAMP '2020-01-02 03:04:05.006', + TIMESTAMPTZ '2020-01-02 03:04:05.006+00', + '550e8400-e29b-41d4-a716-446655440000'::uuid +); + +SELECT + i_int, + s_small, + b_big, + f_float32, + d_float64, + b_bool, + dec, + t_text, + bin, + d_date, + d_ts, + d_tstz, + d_uuid +FROM pxf_ch_clickhouse_write_verify +LIMIT 1; diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/CloudberryApplication.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/CloudberryApplication.java new file mode 100644 index 000000000..d9b54cb81 --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/CloudberryApplication.java @@ -0,0 +1,309 @@ +package org.apache.cloudberry.pxf.automation.applications; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.google.common.collect.Lists; +import org.apache.cloudberry.pxf.automation.structures.tables.basic.Table; +import org.apache.cloudberry.pxf.automation.structures.tables.pxf.ExternalTable; +import org.apache.cloudberry.pxf.automation.testcontainers.PXFCloudberryContainer; +import org.postgresql.copy.CopyManager; +import org.postgresql.core.BaseConnection; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Properties; + +/** + * TestObject that provides methods to work with Cloudberry DB + */ +public class CloudberryApplication implements AutoCloseable { + + private static final int MAX_RETRIES = 10; + private static final long RETRY_INTERVAL_MS = 5_000; + + private final PXFCloudberryContainer container; + private final String jdbcUrl; + private final String userName; + private Connection connection; + private Statement statement; + + public CloudberryApplication(PXFCloudberryContainer container) { + this.container = container; + this.jdbcUrl = getCloudberryMappedJdbcUrl(); + this.userName = container.getCloudberryUser(); + } + + public CloudberryApplication(PXFCloudberryContainer container, String dbName) { + this.container = container; + this.jdbcUrl = getCloudberryMappedJdbcUrl(dbName); + this.userName = container.getCloudberryUser(); + } + + public void connect() throws Exception { + if (statement != null) { + return; + } + Properties props = new Properties(); + if (userName != null) { + props.setProperty("user", userName); + } + + Exception lastException = null; + for (int attempt = 1; attempt <= MAX_RETRIES; attempt++) { + try { + Class.forName("org.postgresql.Driver"); + connection = DriverManager.getConnection(jdbcUrl, props); + statement = connection.createStatement(); + System.out.println("[CloudberryApplication] Connected to " + jdbcUrl); + return; + } catch (Exception e) { + lastException = e; + System.out.println("[CloudberryApplication] Connection attempt " + attempt + " failed: " + e.getMessage()); + Thread.sleep(RETRY_INTERVAL_MS); + } + } + throw new RuntimeException("Failed to connect to CBDB at " + jdbcUrl + " after " + MAX_RETRIES + " attempts", lastException); + } + + public String getCloudberryMappedJdbcUrl() { + return getCloudberryMappedJdbcUrl("pxfautomation"); + } + + public String getCloudberryMappedJdbcUrl(String dbName) { + return "jdbc:postgresql://localhost:" + container.getCloudberryMappedPort() + "/" + dbName; + } + + public String getCloudberryInternalJdbcUrl() { + return "jdbc:postgresql://localhost:" + container.getCloudberryInternalPort() + "/pxfautomation"; + } + + + /** + * Drops (if exists) and creates the table, then verifies it exists. + */ + public void createTableAndVerify(Table table) throws Exception { + dropTable(table, true); + runQuery(table.constructCreateStmt()); + if (!checkTableExists(table)) { + throw new RuntimeException("Table " + table.getName() + " does not exist after creation"); + } + } + + public void dropTable(Table table, boolean cascade) throws Exception { + runQuery(table.constructDropStmt(cascade), true); + if (table instanceof ExternalTable) { + String dropForeign = String.format("DROP FOREIGN TABLE IF EXISTS %s%s", + table.getFullName(), cascade ? " CASCADE" : ""); + runQuery(dropForeign, true); + } + } + + /** + * Loads data from a file into a table using PostgreSQL COPY protocol. + * Uses {@link CopyManager} over JDBC instead of psql over SSH. + */ + public void copyFromFile(Table table, File path, String delimiter, String nullChar, boolean csv) throws Exception { + StringBuilder copyCmd = new StringBuilder(); + copyCmd.append("COPY ").append(table.getName()).append(" FROM STDIN"); + + String copyParams = buildCopyParams(delimiter, nullChar, csv); + if (!copyParams.isEmpty()) { + copyCmd.append(" ").append(copyParams); + } + + CopyManager copyManager = new CopyManager(connection.unwrap(BaseConnection.class)); + try (BufferedReader reader = new BufferedReader(new FileReader(path))) { + long rows = copyManager.copyIn(copyCmd.toString(), reader); + System.out.println("[CloudberryApplication] COPY loaded " + rows + " rows into " + table.getName()); + } + } + + /** + * Inserts rows from a source Table (in-memory data) into the target table. + */ + public void insertData(Table source, Table target) throws Exception { + List> data = source.getData(); + if (data == null || data.isEmpty()) { + return; + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < data.size(); i++) { + List row = data.get(i); + sb.append("("); + for (int j = 0; j < row.size(); j++) { + sb.append("E'").append(row.get(j)).append("'"); + if (j < row.size() - 1) { + sb.append(","); + } + } + sb.append(")"); + if (i < data.size() - 1) { + sb.append(","); + } + } + + String query = "INSERT INTO " + target.getName() + " VALUES " + sb.toString(); + runQuery(query); + } + + public void runQuery(String sql) throws Exception { + runQuery(sql, false); + } + + public void runQuery(String sql, boolean ignoreFail) throws Exception { + try { + statement.execute(sql); + } catch (SQLException e) { + if (!ignoreFail) { + throw e; + } + } + } + + public void createDatabase(String dbName) throws Exception { + try { + runQuery("CREATE DATABASE " + dbName); + } catch (Exception e) { + if (!e.getMessage().contains("already exists")) { + throw e; + } + } + } + + public void createExtension(String extensionName, boolean ignoreFail) throws Exception { + runQuery("CREATE EXTENSION IF NOT EXISTS " + extensionName, ignoreFail); + } + + + public void createTestFDW(boolean ignoreFail) throws Exception { + runQuery("DROP FOREIGN DATA WRAPPER IF EXISTS test_pxf_fdw CASCADE", ignoreFail); + runQuery("CREATE FOREIGN DATA WRAPPER test_pxf_fdw HANDLER pxf_fdw_handler " + + "VALIDATOR pxf_fdw_validator OPTIONS (protocol 'test', mpp_execute 'all segments')", ignoreFail); + } + + public void createSystemFDW(boolean ignoreFail) throws Exception { + runQuery("DROP FOREIGN DATA WRAPPER IF EXISTS system_pxf_fdw CASCADE", ignoreFail); + runQuery("CREATE FOREIGN DATA WRAPPER system_pxf_fdw HANDLER pxf_fdw_handler " + + "VALIDATOR pxf_fdw_validator OPTIONS (protocol 'system', mpp_execute 'all segments')", ignoreFail); + } + public void createForeignServers(boolean ignoreFail) throws Exception { + List servers = Lists.newArrayList( + "default_hdfs", + "default_hive", + "db-hive_jdbc", // Needed for JdbcHiveTest + "default_hbase", + "default_jdbc", // Needed for JdbcHiveTest and other JdbcTest which refers to the default server. + "database_jdbc", + "db-session-params_jdbc", + "default_file", + "default_s3", + "default_gs", + "default_abfss", + "default_wasbs", + "s3_s3", + "s3-invalid_s3", + "s3-non-existent_s3", + "hdfs-non-secure_hdfs", + "hdfs-secure_hdfs", + "hdfs-ipa_hdfs", + "default_test", + "default_system"); + + for (String server : servers) { + String foreignServerName = server.replace("-", "_"); + String pxfServerName = server.substring(0, server.lastIndexOf("_")); // strip protocol at the end + String fdwName = server.substring(server.lastIndexOf("_") + 1) + "_pxf_fdw"; // strip protocol at the end + runQuery(String.format("CREATE SERVER IF NOT EXISTS %s FOREIGN DATA WRAPPER %s OPTIONS(config '%s')", + foreignServerName, fdwName, pxfServerName), ignoreFail); + runQuery(String.format("CREATE USER MAPPING IF NOT EXISTS FOR CURRENT_USER SERVER %s", foreignServerName), + ignoreFail); + } + } + + public boolean checkDatabaseExists(String dbName) throws Exception { + ResultSet rs = statement.executeQuery( + "SELECT 1 FROM pg_database WHERE datname = '" + dbName + "'"); + return rs.next(); + } + + public boolean checkTableExists(Table table) throws Exception { + DatabaseMetaData meta = connection.getMetaData(); + String schema = table.getSchema(); + if (schema == null) { + schema = "public"; + } + ResultSet rs = meta.getTables(null, schema, table.getName(), null); + return rs.next(); + } + + public String getUserName() { + return userName; + } + + public PXFCloudberryContainer getContainer() { + return container; + } + + @Override + public void close() throws Exception { + if (statement != null) { + try { statement.close(); } catch (Exception ignored) {} + statement = null; + } + if (connection != null) { + try { connection.close(); } catch (Exception ignored) {} + connection = null; + } + } + + private String buildCopyParams(String delimiter, String nullChar, boolean csv) { + StringBuilder params = new StringBuilder(); + if (csv) { + params.append("CSV "); + } + if (delimiter != null) { + params.append("DELIMITER E'").append(stripEQuote(delimiter)).append("' "); + } + if (nullChar != null) { + params.append("NULL E'").append(stripEQuote(nullChar)).append("' "); + } + return params.toString().trim(); + } + + private static String stripEQuote(String value) { + if (value.startsWith("E'") && value.endsWith("'")) { + return value.substring(2, value.length() - 1); + } + if (value.startsWith("'") && value.endsWith("'")) { + return value.substring(1, value.length() - 1); + } + return value; + } + +} \ No newline at end of file diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/PXFApplication.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/PXFApplication.java new file mode 100644 index 000000000..ea67fb28f --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/PXFApplication.java @@ -0,0 +1,105 @@ +package org.apache.cloudberry.pxf.automation.applications; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.cloudberry.pxf.automation.testcontainers.PXFCloudberryContainer; +import org.testcontainers.containers.Container.ExecResult; + +import java.io.IOException; + +/** + * Manages PXF server configuration inside the container. + * Writes config files (jdbc-site.xml, s3-site.xml, etc.) and restarts the PXF process. + */ +public class PXFApplication { + + private static final String SCRIPTS_PREFIX = + "/home/gpadmin/workspace/cloudberry-pxf/automation/src/main/resources/testcontainers/pxf-cbdb/script"; + + private final PXFCloudberryContainer container; + + public PXFApplication(PXFCloudberryContainer container) { + this.container = container; + } + + public void configureJdbcServers() throws IOException, InterruptedException { + System.out.println("[PXFApplication] Configuring JDBC servers (database, db-session-params, db-hive)..."); + + String script = String.join("\n", + "set -e", + "source " + SCRIPTS_PREFIX + "/pxf-env.sh", + "PXF_BASE_SERVERS=${PXF_BASE}/servers", + "TEMPLATES_DIR=${PXF_HOME}/templates", + + "mkdir -p ${PXF_BASE_SERVERS}/database", + "cp ${TEMPLATES_DIR}/jdbc-site.xml ${PXF_BASE_SERVERS}/database/", + "sed -i 's|YOUR_DATABASE_JDBC_DRIVER_CLASS_NAME|org.postgresql.Driver|' ${PXF_BASE_SERVERS}/database/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_URL|jdbc:postgresql://localhost:7000/pxfautomation|' ${PXF_BASE_SERVERS}/database/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_USER||' ${PXF_BASE_SERVERS}/database/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_PASSWORD||' ${PXF_BASE_SERVERS}/database/jdbc-site.xml", + "cp ${PXF_BASE_SERVERS}/database/jdbc-site.xml ${PXF_BASE_SERVERS}/database/testuser-user.xml", + "sed -i 's|pxfautomation|template1|' ${PXF_BASE_SERVERS}/database/testuser-user.xml", + "cp /home/gpadmin/workspace/cloudberry-pxf/automation/src/test/resources/report.sql ${PXF_BASE_SERVERS}/database/", + + "mkdir -p ${PXF_BASE_SERVERS}/db-session-params", + "cp ${TEMPLATES_DIR}/jdbc-site.xml ${PXF_BASE_SERVERS}/db-session-params/", + "sed -i 's|YOUR_DATABASE_JDBC_DRIVER_CLASS_NAME|org.postgresql.Driver|' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_URL|jdbc:postgresql://localhost:7000/pxfautomation|' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_USER||' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_PASSWORD||' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + "sed -i 's||jdbc.session.property.client_min_messagesdebug1|' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + "sed -i 's||jdbc.session.property.default_statistics_target123|' ${PXF_BASE_SERVERS}/db-session-params/jdbc-site.xml", + + "mkdir -p ${PXF_BASE_SERVERS}/db-hive", + "cp ${TEMPLATES_DIR}/jdbc-site.xml ${PXF_BASE_SERVERS}/db-hive/", + "sed -i 's|YOUR_DATABASE_JDBC_DRIVER_CLASS_NAME|org.apache.hive.jdbc.HiveDriver|' ${PXF_BASE_SERVERS}/db-hive/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_URL|jdbc:hive2://localhost:10000/default|' ${PXF_BASE_SERVERS}/db-hive/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_USER||' ${PXF_BASE_SERVERS}/db-hive/jdbc-site.xml", + "sed -i 's|YOUR_DATABASE_JDBC_PASSWORD||' ${PXF_BASE_SERVERS}/db-hive/jdbc-site.xml", + "cp /home/gpadmin/workspace/cloudberry-pxf/automation/src/test/resources/hive-report.sql ${PXF_BASE_SERVERS}/db-hive/" + ); + + ExecResult result = container.execInContainer("bash", "-l", "-c", script); + if (result.getExitCode() != 0) { + throw new RuntimeException( + "JDBC server configuration failed (exit " + result.getExitCode() + "):\n" + + result.getStdout() + "\n" + result.getStderr()); + } + + restartPxf(); + + System.out.println("[PXFApplication] JDBC servers configured and PXF restarted"); + } + + public void restartPxf() throws IOException, InterruptedException { + String script = String.join("\n", + "set -e", + "source " + SCRIPTS_PREFIX + "/pxf-env.sh", + "$PXF_HOME/bin/pxf restart" + ); + ExecResult result = container.execInContainer("bash", "-l", "-c", script); + if (result.getExitCode() != 0) { + throw new RuntimeException( + "PXF restart failed (exit " + result.getExitCode() + "):\n" + + result.getStdout() + "\n" + result.getStderr()); + } + System.out.println("[PXFApplication] PXF restarted"); + } +} diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/RegressApplication.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/RegressApplication.java new file mode 100644 index 000000000..654bae0c3 --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/applications/RegressApplication.java @@ -0,0 +1,97 @@ +package org.apache.cloudberry.pxf.automation.applications; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.cloudberry.pxf.automation.testcontainers.PXFCloudberryContainer; +import org.testcontainers.containers.Container.ExecResult; + +/** + * Runs {@code pxf_regress} SQL tests inside the TestContainers-managed container. + * Replaces the SSH-based {@code Regress} system object. + */ +public class RegressApplication { + + private static final String REGRESS_DIR = "/home/gpadmin/workspace/cloudberry-pxf/automation/pxf_regress"; + private static final String SQL_REPO_DIR = "/home/gpadmin/workspace/cloudberry-pxf/automation/sqlrepo"; + private static final String DB_NAME = "pxfautomation"; + /** Written by {@code pxf_regress} under each test directory when comparisons fail. */ + private static final String REGRESSION_DIFFS_FILE = "regression.diffs"; + + private final PXFCloudberryContainer container; + + public RegressApplication(PXFCloudberryContainer container) { + this.container = container; + } + + /** + * Runs a SQL test using {@code pxf_regress} inside the container. + * + * @param sqlTestPath relative path under {@code sqlrepo/}, e.g. {@code "features/jdbc/single_fragment"} + * @throws Exception if the test fails or the command errors out + */ + public void runSqlTest(String sqlTestPath) throws Exception { + System.out.println("[RegressApplication] Running SQL test: " + sqlTestPath); + + String command = String.join(" ", + "cd " + SQL_REPO_DIR + " &&", + "GPHOME=${GPHOME:-/usr/local/cloudberry-db}", + "PATH=\"${GPHOME}/bin:$PATH\"", + "PGHOST=localhost", + "PGPORT=7000", + "PGDATABASE=" + DB_NAME, + REGRESS_DIR + "/pxf_regress", + sqlTestPath); + + ExecResult result = container.execInContainer("bash", "-l", "-c", command); + String output = result.getStdout(); + if (!output.isEmpty()) { + System.out.println(output); + } + String errOutput = result.getStderr(); + if (errOutput != null && !errOutput.isEmpty()) { + System.err.println(errOutput); + } + + if (result.getExitCode() != 0) { + printPxfRegressDiffsToStdout(sqlTestPath); + throw new RuntimeException( + "pxf_regress FAILED for '" + sqlTestPath + "' (exit " + result.getExitCode() + "):\n" + output); + } + System.out.println("[RegressApplication] Test passed: " + sqlTestPath); + } + + /** + * Prints the aggregated diff file produced by {@code pxf_regress} (if present) to stdout. + */ + private void printPxfRegressDiffsToStdout(String sqlTestPath) throws Exception { + String diffsPath = SQL_REPO_DIR + "/" + sqlTestPath + "/" + REGRESSION_DIFFS_FILE; + ExecResult cat = container.execInContainer("cat", diffsPath); + String diffText = cat.getStdout(); + if (cat.getExitCode() == 0 && diffText != null && !diffText.isEmpty()) { + System.out.println(); + System.out.println("===== pxf_regress " + REGRESSION_DIFFS_FILE + " ====="); + System.out.println(diffText); + return; + } + System.out.println(); + System.out.println("[RegressApplication] No readable " + REGRESSION_DIFFS_FILE + " at " + diffsPath + + " (cat exit " + cat.getExitCode() + ")"); + } +} \ No newline at end of file diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClasspathDockerContainerBuilder.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClasspathDockerContainerBuilder.java new file mode 100644 index 000000000..08a6e0cb4 --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClasspathDockerContainerBuilder.java @@ -0,0 +1,111 @@ +package org.apache.cloudberry.pxf.automation.testcontainers; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; + +/** + * + */ +public class ClasspathDockerContainerBuilder { + + private ClasspathDockerContainerBuilder() { + } + + /** + * Builds named Docker image from resources in the classpath. + * + * @param imageName - name of the image to build + * @param resourceDirectory - resource path to Dockerfile's folder + * @param resources - list of files to copy (relative to resourceDirectory) + */ + public static void ensureImageExists(String imageName, String resourceDirectory, String[] resources) { + if (imageExists(imageName)) { + System.out.println("=== Image '" + imageName + "' already exists locally, skip build ==="); + return; + } + try { + Path contextDir = Files.createTempDirectory("tc-docker-context-"); + for (String resource : resources) { + Path target = contextDir.resolve(resource); + Files.createDirectories(target.getParent()); + try (InputStream is = ClasspathDockerContainerBuilder.class + .getClassLoader() + .getResourceAsStream(resourceDirectory + "/" + resource)) { + if (is == null) { + throw new IllegalStateException("Classpath resource not found: " + resource); + } + Files.copy(is, target, StandardCopyOption.REPLACE_EXISTING); + } + } + dockerBuild(contextDir.toFile(), imageName); + } catch (IOException e) { + throw new RuntimeException("Failed to prepare Docker build context from classpath", e); + } + } + + private static boolean imageExists(String imageName) { + try { + Process process = new ProcessBuilder("docker", "image", "inspect", imageName) + .redirectErrorStream(true) + .start(); + int exitCode = process.waitFor(); + return exitCode == 0; + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Failed to check Docker image existence: " + imageName, e); + } + } + + private static void dockerBuild(File contextDir, String tag) { + System.out.println("=== docker build -t " + tag + " " + contextDir + " ==="); + try { + ProcessBuilder pb = new ProcessBuilder( + "docker", "build", + "-t", tag, ".") + .directory(contextDir) + .redirectErrorStream(true); + Process process = pb.start(); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + } + } + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException( + "docker build failed for '" + tag + "' (exit " + exitCode + "). " + + "Context dir: " + contextDir.getAbsolutePath()); + } + System.out.println("=== Image '" + tag + "' built successfully ==="); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Failed to build Docker image '" + tag + "'", e); + } + } +} diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClickHouseContainer.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClickHouseContainer.java new file mode 100644 index 000000000..7501cf791 --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/ClickHouseContainer.java @@ -0,0 +1,83 @@ +package org.apache.cloudberry.pxf.automation.testcontainers; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.DockerImageName; + +/** + * TestContainers wrapper around ClickHouse server. + * + * The container joins a shared Docker network with alias `clickhouse`, + * so PXF inside the Cloudberry container can reach it at `clickhouse:8123` (HTTP). + * + */ +public class ClickHouseContainer extends GenericContainer { + + public static final int HTTP_PORT = 8123; + + private static final String DEFAULT_IMAGE = "clickhouse/clickhouse-server"; + private static final String NETWORK_ALIAS_PREFIX = "clickhouse-"; + + /** + * Credentials for the test container. ClickHouse 24+ restricts network access for `default` + * until a password is set; `CLICKHOUSE_PASSWORD` configures the server and JDBC must match. + */ + public static final String CLICKHOUSE_USER = "default"; + public static final String CLICKHOUSE_PASSWORD = "pxf-test"; + + private final String networkAlias; + + public ClickHouseContainer(String tag, Network network) { + super(DockerImageName.parse(DEFAULT_IMAGE + ":" + tag)); + + // generate unique DNS name for this Clickhouse container: + this.networkAlias = NETWORK_ALIAS_PREFIX + tag.replaceAll("[-.]", ""); + + super.withNetwork(network) + .withNetworkAliases(this.networkAlias) + .withExposedPorts(HTTP_PORT) + .withEnv("CLICKHOUSE_USER", CLICKHOUSE_USER) + .withEnv("CLICKHOUSE_PASSWORD", CLICKHOUSE_PASSWORD) + .waitingFor(Wait.forHttp("/ping").forPort(HTTP_PORT)); + } + + /** Embedded DNS name of this container on the Testcontainers network (for JDBC from other containers). */ + public String getNetworkAlias() { + return networkAlias; + } + + /** JDBC URL over HTTP, reachable from the host (mapped `HTTP_PORT`). */ + public String getJdbcUrl() { + return "jdbc:clickhouse://localhost:" + getMappedPort(HTTP_PORT) + "/default"; + } + + /** JDBC URL over HTTP for PXF / other containers on the same Docker network. */ + public String getInternalJdbcUrl() { + return "jdbc:clickhouse://" + networkAlias + ":" + HTTP_PORT + "/default"; + } + + public int getHttpMappedPort() { + return getMappedPort(HTTP_PORT); + } + +} diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/PXFCloudberryContainer.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/PXFCloudberryContainer.java new file mode 100644 index 000000000..24392e3d2 --- /dev/null +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/testcontainers/PXFCloudberryContainer.java @@ -0,0 +1,227 @@ +package org.apache.cloudberry.pxf.automation.testcontainers; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallback; +import com.github.dockerjava.api.command.ExecCreateCmdResponse; +import com.github.dockerjava.api.model.Frame; +import org.testcontainers.DockerClientFactory; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.wait.strategy.AbstractWaitStrategy; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.MountableFile; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; + +/** + * PXF + Cloudberry colocated testcontainer. + * + * Cloudberry is built during image creation. + * Demo cluster and PXF are initialised at runtime via {@code entrypoint.sh}. + * + * Use {@link #getInstance()} to get a singleton that is started once per + * automation JVM. The container shares a Docker {@link Network} with other + * test containers so they can communicate by hostname. + */ +public class PXFCloudberryContainer extends GenericContainer { + + private static final String IMAGE_NAME = "pxf/cbdb-testcontainer:1"; + public static final int CLOUDBERRY_PORT = 7000; + public static final int PXF_PORT = 5888; + public static final String CLOUDBERRY_USER = "gpadmin"; + + private static final String CONTAINER_GRADLE_RO_CACHE = "/home/gpadmin/.gradle-host-cache"; + private static final String CONTAINER_REPO_DIR = "/home/gpadmin/workspace/cloudberry-pxf"; + private static final String CONTAINER_SCRIPT_DIR = + CONTAINER_REPO_DIR + "/automation/src/main/resources/testcontainers/pxf-cbdb/script"; + + /* files required by `server`/`fdw`/`external-table` Makefiles. */ + private static final String[] HOST_ROOT_FILES = {"version", "api_version", "common.mk"}; + + private static final Network network = Network.newNetwork(); + private static PXFCloudberryContainer instance; + + private PXFCloudberryContainer(String repoPath) { + super(DockerImageName.parse(IMAGE_NAME)); + Path root = Paths.get(repoPath).toAbsolutePath().normalize(); + + withNetwork(network) + .withNetworkAliases("mdw") + .withExposedPorts(CLOUDBERRY_PORT, PXF_PORT) + .withCommand("tail", "-f", "/dev/null") + .withCreateContainerCmdModifier(cmd -> cmd.withHostName("mdw")) + .waitingFor(new AbstractWaitStrategy() { + @Override + protected void waitUntilReady() { + // No-op: we shouldn't wait for processes to run here + // will start applications with entrypoint.sh + } + }) + .withStartupTimeout(Duration.ofMinutes(25)) + .withPrivilegedMode(true) + // Copy directories to the container at runtime: + .withCopyToContainer( + MountableFile.forHostPath(root.resolve("external-table").toString()), + CONTAINER_REPO_DIR + "/external-table") + .withCopyToContainer( + MountableFile.forHostPath(root.resolve("fdw").toString()), + CONTAINER_REPO_DIR + "/fdw") + .withCopyToContainer( + MountableFile.forHostPath(root.resolve("server").toString()), + CONTAINER_REPO_DIR + "/server") + .withCopyToContainer( + MountableFile.forHostPath(root.resolve("automation").toString()), + CONTAINER_REPO_DIR + "/automation"); + // Copy required files to the container at runtime: + for (String name : HOST_ROOT_FILES) { + withCopyToContainer( + MountableFile.forHostPath(root.resolve(name).toString()), + CONTAINER_REPO_DIR + "/" + name); + } + + // mount /home/username/.gradle/caches to the container to speed up build + Path hostGradleCache = Paths.get(System.getProperty("user.home"), ".gradle", "caches"); + boolean hasHostGradleCache = Files.exists(hostGradleCache, LinkOption.NOFOLLOW_LINKS); + if (hasHostGradleCache) { + withFileSystemBind(hostGradleCache.toString(), CONTAINER_GRADLE_RO_CACHE, BindMode.READ_ONLY); + withEnv("GRADLE_RO_DEP_CACHE", CONTAINER_GRADLE_RO_CACHE); + } + } + + /** + * Returns a singleton container, starting it and running the environment + * setup on first access. Thread-safe. + */ + public static synchronized PXFCloudberryContainer getInstance() { + if (instance == null) { + String repo = resolveProperty("pxf.test.repo.path", findRepoPath()); + + ClasspathDockerContainerBuilder.ensureImageExists( + IMAGE_NAME, + "testcontainers/pxf-cbdb/", + new String[]{ + "Dockerfile", + "script/build_cloudberrry.sh" + }); + + instance = new PXFCloudberryContainer(repo); + instance.start(); + Runtime.getRuntime().addShutdownHook(new Thread(instance::stop)); + + try { + instance.runEntrypoint(); + instance.waitingFor(Wait.forListeningPorts(CLOUDBERRY_PORT, PXF_PORT)); + } catch (Exception e) { + instance.stop(); + instance = null; + throw new RuntimeException("Failed to initialize PXF container", e); + } + } + return instance; + } + + + private void runEntrypoint() throws IOException, InterruptedException { + logger().info("Running entrypoint.sh inside container (this takes several minutes)..."); + int exitCode = execInContainerWithLiveOutput( + "bash", "-l", "-c", CONTAINER_SCRIPT_DIR + "/entrypoint.sh 2>&1"); + if (exitCode != 0) { + throw new RuntimeException("entrypoint.sh failed (exit " + exitCode + ")"); + } + logger().info("entrypoint.sh completed successfully"); + } + + private int execInContainerWithLiveOutput(String... command) throws InterruptedException { + DockerClient client = DockerClientFactory.instance().client(); + ExecCreateCmdResponse exec = client.execCreateCmd(getContainerId()) + .withCmd(command) + .withAttachStdout(true) + .withAttachStderr(true) + .exec(); + + client.execStartCmd(exec.getId()) + .exec(new ResultCallback.Adapter() { + @Override + public void onNext(Frame frame) { + System.out.print(new String(frame.getPayload(), StandardCharsets.UTF_8)); + } + }) + .awaitCompletion(); + + Long exitCode = client.inspectExecCmd(exec.getId()).exec().getExitCodeLong(); + return exitCode != null ? exitCode.intValue() : -1; + } + + private static String resolveProperty(String key, String fallback) { + String value = System.getProperty(key); + return (value != null && !value.isEmpty()) ? value : fallback; + } + + private static String findRepoPath() { + File dir = new File(System.getProperty("user.dir")); + for (int i = 0; i < 5; i++) { + if (new File(dir, "automation/pom.xml").exists()) { + return dir.getAbsolutePath(); + } + dir = dir.getParentFile(); + if (dir == null) + break; + } + throw new IllegalStateException( + "Cannot auto-detect cloudberry-pxf repo root. Set -Dpxf.test.repo.path=..."); + } + + + public Network getSharedNetwork() { + return network; + } + + public int getCloudberryMappedPort() { + return getMappedPort(CLOUDBERRY_PORT); + } + + public int getCloudberryInternalPort() { + return CLOUDBERRY_PORT; + } + + public String getCloudberryUser() { + return CLOUDBERRY_USER; + } + + public String getPxfInternalHost() { + return "localhost"; + } + + public int getPxfInternalPort() { + return PXF_PORT; + } + +} diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/Dockerfile b/automation/src/main/resources/testcontainers/pxf-cbdb/Dockerfile new file mode 100644 index 000000000..ae91e6ab0 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/Dockerfile @@ -0,0 +1,73 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +FROM apache/incubator-cloudberry:cbdb-build-ubuntu22.04-latest + +ENV DEBIAN_FRONTEND=noninteractive + +# bake in java 11 into image +RUN sudo apt-get update && \ + sudo apt-get install -y --no-install-recommends \ + openjdk-11-jdk-headless \ + curl ca-certificates git unzip maven make \ + wget lsb-release locales openssh-server iproute2 sudo && \ + sudo rm -rf /var/lib/apt/lists/* + +# Install Gradle and warm wrapper cache (it will download GRADLE_VERSION again) +# (version must match server/gradle/wrapper/gradle-wrapper.properties) +ARG GRADLE_VERSION=6.8.2 +RUN curl -fsSL "https://services.gradle.org/distributions/gradle-${GRADLE_VERSION}-bin.zip" \ + -o /tmp/gradle.zip && \ + sudo unzip -q /tmp/gradle.zip -d /opt && \ + sudo ln -s "/opt/gradle-${GRADLE_VERSION}/bin/gradle" /usr/local/bin/gradle && \ + rm /tmp/gradle.zip +RUN cd /tmp && gradle init && gradle wrapper --gradle-version ${GRADLE_VERSION} && ./gradlew javaToolchains +RUN rm -rf /tmp/gradle /tmp/gradlew /tmp/gradlew.bat /tmp/.gradle +ENV GRADLE_HOME="/opt/gradle-${GRADLE_VERSION}" + +# Go toolchain for building pxf_regress inside the container +ARG GO_VERSION=1.21.13 +RUN ARCH=$(uname -m) && \ + case "$ARCH" in \ + x86_64) GARCH=amd64 ;; \ + aarch64|arm64) GARCH=arm64 ;; \ + *) echo "unsupported arch: $ARCH" >&2; exit 1 ;; \ + esac && \ + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-${GARCH}.tar.gz" -o /tmp/go.tgz && \ + sudo rm -rf /usr/local/go && \ + sudo tar -C /usr/local -xzf /tmp/go.tgz && \ + rm /tmp/go.tgz +ENV PATH="/usr/local/go/bin:${PATH}" + +# Env vars that scripts expect +ENV GPHD_ROOT=/home/gpadmin/workspace/singlecluster +ENV GPHOME=/usr/local/cloudberry-db + +RUN sudo mkdir -p /home/gpadmin/workspace && \ + sudo chown -R gpadmin:gpadmin /home/gpadmin/workspace + +# Clone Cloudberry source (parametrized via build args) +ARG CLOUDBERRY_REPO=https://github.com/apache/cloudberry.git +ARG CLOUDBERRY_BRANCH=main +RUN git clone --depth 1 -b ${CLOUDBERRY_BRANCH} \ + ${CLOUDBERRY_REPO} /home/gpadmin/workspace/cloudberry + +# Copy and run the build script (demo cluster is created at runtime) +COPY script/build_cloudberrry.sh /tmp/build_cloudberrry.sh +RUN bash /tmp/build_cloudberrry.sh \ No newline at end of file diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_cloudberrry.sh b/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_cloudberrry.sh new file mode 100755 index 000000000..a5b867757 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_cloudberrry.sh @@ -0,0 +1,142 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Install sudo & git +sudo apt update && sudo apt install -y sudo git + +# Required configuration +## Add Cloudberry environment setup to .bashrc +echo -e '\n# Add Cloudberry entries +if [ -f /usr/local/cloudberry-db/cloudberry-env.sh ]; then + source /usr/local/cloudberry-db/cloudberry-env.sh +fi +## US English with UTF-8 character encoding +export LANG=en_US.UTF-8 +' >> /home/gpadmin/.bashrc +## Set up SSH for passwordless access +mkdir -p /home/gpadmin/.ssh +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 2048 -C 'apache-cloudberry-dev' -f /home/gpadmin/.ssh/id_rsa -N "" +fi +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +## Set proper SSH directory permissions +chmod 700 /home/gpadmin/.ssh +chmod 600 /home/gpadmin/.ssh/authorized_keys +chmod 644 /home/gpadmin/.ssh/id_rsa.pub + +# Configure system settings +sudo tee /etc/security/limits.d/90-db-limits.conf << 'EOF' +## Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited +## Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 +## Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 +EOF + +# Verify resource limits +ulimit -a + +# Install basic system packages +sudo apt update +sudo apt install -y bison \ + bzip2 \ + cmake \ + curl \ + flex \ + gcc \ + g++ \ + iproute2 \ + iputils-ping \ + language-pack-en \ + locales \ + libapr1-dev \ + libbz2-dev \ + libcurl4-gnutls-dev \ + libevent-dev \ + libkrb5-dev \ + libipc-run-perl \ + libldap2-dev \ + libpam0g-dev \ + libprotobuf-dev \ + libreadline-dev \ + libssl-dev \ + libuv1-dev \ + liblz4-dev \ + libxerces-c-dev \ + libxml2-dev \ + libyaml-dev \ + libzstd-dev \ + libperl-dev \ + make \ + pkg-config \ + protobuf-compiler \ + python3-dev \ + python3-pip \ + python3-setuptools \ + rsync \ + libsnappy-dev + +# Continue as gpadmin user + + +# Prepare the build environment for Apache Cloudberry +sudo rm -rf /usr/local/cloudberry-db +sudo chmod a+w /usr/local +mkdir -p /usr/local/cloudberry-db +sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db + +# Run configure +cd ~/workspace/cloudberry +./configure --prefix=/usr/local/cloudberry-db \ + --disable-external-fts \ + --enable-gpcloud \ + --enable-ic-proxy \ + --enable-mapreduce \ + --enable-orafce \ + --enable-orca \ + --disable-pax \ + --disable-pxf \ + --enable-tap-tests \ + --with-gssapi \ + --with-ldap \ + --with-libxml \ + --with-lz4 \ + --with-pam \ + --with-perl \ + --with-pgport=5432 \ + --with-python \ + --with-pythonsrc-ext \ + --with-ssl=openssl \ + --with-uuid=e2fs \ + --with-includes=/usr/include/xercesc + +# Build and install Cloudberry and its contrib modules +make -j$(nproc) -C ~/workspace/cloudberry +make -j$(nproc) -C ~/workspace/cloudberry/contrib +make install -C ~/workspace/cloudberry +make install -C ~/workspace/cloudberry/contrib + +# Verify the installation +/usr/local/cloudberry-db/bin/postgres --gp-version +/usr/local/cloudberry-db/bin/postgres --version +ldd /usr/local/cloudberry-db/bin/postgres diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_pxf.sh b/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_pxf.sh new file mode 100755 index 000000000..750460d08 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/script/build_pxf.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +set -euo pipefail + +source /usr/local/cloudberry-db/cloudberry-env.sh + +case "$(uname -m)" in + aarch64|arm64) JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-11-openjdk-arm64} ;; + x86_64|amd64) JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-11-openjdk-amd64} ;; + *) JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-11-openjdk-amd64} ;; +esac +export GPHOME=/usr/local/cloudberry-db +export PATH=$GPHOME/bin:$JAVA_HOME/bin:$PATH + +# Ensure source/build tree is owned by gpadmin (build runs as gpadmin) +sudo chown -R gpadmin:gpadmin /home/gpadmin/workspace/cloudberry-pxf +sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db + +export PXF_HOME=/usr/local/pxf +sudo mkdir -p "$PXF_HOME" +sudo chmod -R a+rwX "$PXF_HOME" + +# Build and Install PXF +cd /home/gpadmin/workspace/cloudberry-pxf +make -C external-table install +make -C fdw install +make -C server install-server +make -C server install-jdbc-drivers + +# Set up PXF environment +export PXF_BASE=$HOME/pxf-base +export PATH=$PXF_HOME/bin:$PATH +rm -rf "$PXF_BASE" +mkdir -p "$PXF_BASE" + +# Initialize PXF +pxf prepare +cp $PXF_HOME/lib/*.jar $PXF_BASE/lib/ +pxf start + +# Verify PXF is running +pxf status diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/script/entrypoint.sh b/automation/src/main/resources/testcontainers/pxf-cbdb/script/entrypoint.sh new file mode 100755 index 000000000..8dec2e822 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/script/entrypoint.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +set -euo pipefail +set -x + +log() { echo "[entrypoint][$(date '+%F %T')] $*"; } +die() { log "ERROR $*"; exit 1; } + +ROOT_DIR=/home/gpadmin/workspace +REPO_DIR=${ROOT_DIR}/cloudberry-pxf +PXF_SCRIPTS=${REPO_DIR}/automation/src/main/resources/testcontainers/pxf-cbdb/script +source "${PXF_SCRIPTS}/utils.sh" + +detect_java_paths() { + case "$(uname -m)" in + aarch64|arm64) JAVA_BUILD=/usr/lib/jvm/java-11-openjdk-arm64 ;; + x86_64|amd64) JAVA_BUILD=/usr/lib/jvm/java-11-openjdk-amd64 ;; + *) JAVA_BUILD=/usr/lib/jvm/java-11-openjdk-amd64 ;; + esac + export JAVA_BUILD +} + +setup_locale_and_packages() { + log "install locales" + sudo locale-gen en_US.UTF-8 ru_RU.CP1251 ru_RU.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + sudo localedef -c -i ru_RU -f CP1251 ru_RU.CP1251 || true + export LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 +} + +setup_ssh() { + log "configure ssh" + sudo ssh-keygen -A + sudo bash -c 'echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config' + sudo mkdir -p /etc/ssh/sshd_config.d + sudo bash -c 'cat >/etc/ssh/sshd_config.d/pxf-automation.conf </dev/null + echo "root ALL=(ALL) NOPASSWD: ALL" | sudo tee -a /etc/sudoers >/dev/null + + mkdir -p /home/gpadmin/.ssh + sudo chown -R gpadmin:gpadmin /home/gpadmin/.ssh + if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + sudo -u gpadmin ssh-keygen -q -t rsa -b 4096 -m PEM -C gpadmin -f /home/gpadmin/.ssh/id_rsa -N "" + fi + sudo -u gpadmin bash -lc 'cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys' + sudo -u gpadmin chmod 0600 /home/gpadmin/.ssh/authorized_keys + ssh-keyscan -t rsa mdw cdw localhost 2>/dev/null > /home/gpadmin/.ssh/known_hosts || true + sudo rm -rf /run/nologin + sudo mkdir -p /var/run/sshd && sudo chmod 0755 /var/run/sshd + sudo /usr/sbin/sshd || die "Failed to start sshd" +} + +relax_pg_hba() { + local pg_hba=/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1/pg_hba.conf + if [ -f "${pg_hba}" ] && ! grep -q "127.0.0.1/32 trust" "${pg_hba}"; then + cat >> "${pg_hba}" <<'EOF' +host all all ::1/128 trust +host all all 0.0.0.0/0 trust +EOF + source /usr/local/cloudberry-db/cloudberry-env.sh >/dev/null 2>&1 || true + GPPORT=${GPPORT:-7000} + COORDINATOR_DATA_DIRECTORY=/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 + gpstop -u || true + fi +} + +setup_cloudberry() { + log "cleanup stale gpdemo data and PG locks" + rm -rf /home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs + rm -f /tmp/.s.PGSQL.700* +} + +create_demo_cluster() { + log "set up Cloudberry demo cluster" + source /usr/local/cloudberry-db/cloudberry-env.sh + make create-demo-cluster -C ~/workspace/cloudberry + source ~/workspace/cloudberry/gpAux/gpdemo/gpdemo-env.sh + psql -P pager=off template1 -c 'SELECT * from gp_segment_configuration' + psql template1 -c 'SELECT version()' +} + +build_pxf() { + log "build PXF" + "${PXF_SCRIPTS}/build_pxf.sh" +} + +# pxf_regress is copied from the host via Testcontainers; the binary may be macOS or wrong CPU. +# Rebuild here so RegressApplication runs a Linux executable matching the container arch. +build_pxf_regress() { + log "build pxf_regress (linux)" + export PATH="/usr/local/go/bin:${PATH}" + make -C "${REPO_DIR}/automation/pxf_regress" clean pxf_regress +} + +configure_pxf() { + log "configure PXF" + source "${PXF_SCRIPTS}/pxf-env.sh" + export PATH="$PXF_HOME/bin:$PATH" + export PXF_JVM_OPTS="-Xmx512m -Xms256m" + export PXF_HOST=localhost + echo "JAVA_HOME=${JAVA_BUILD}" >> "$PXF_BASE/conf/pxf-env.sh" + sed -i 's/# server.address=localhost/server.address=0.0.0.0/' "$PXF_BASE/conf/pxf-application.properties" + echo -e "\npxf.profile.dynamic.regex=test:.*" >> "$PXF_BASE/conf/pxf-application.properties" + cp -v "$PXF_HOME"/templates/{hdfs,mapred,yarn,core,hbase,hive}-site.xml "$PXF_BASE/servers/default" + for server_dir in "$PXF_BASE/servers/default" "$PXF_BASE/servers/default-no-impersonation"; do + if [ ! -d "$server_dir" ]; then + cp -r "$PXF_BASE/servers/default" "$server_dir" + fi + if [ ! -f "$server_dir/pxf-site.xml" ]; then + cat > "$server_dir/pxf-site.xml" <<'XML' + + + +XML + fi + done + if ! grep -q "pxf.service.user.name" "$PXF_BASE/servers/default-no-impersonation/pxf-site.xml"; then + sed -i 's## \n pxf.service.user.name\n foobar\n \n \n pxf.service.user.impersonation\n false\n \n#' "$PXF_BASE/servers/default-no-impersonation/pxf-site.xml" + fi + + # Configure pxf-profiles.xml for Parquet and test profiles + cat > "$PXF_BASE/conf/pxf-profiles.xml" <<'EOF' + + + + pxf:parquet + Profile for reading and writing Parquet files + + org.apache.cloudberry.pxf.plugins.hdfs.HdfsDataFragmenter + org.apache.cloudberry.pxf.plugins.hdfs.ParquetFileAccessor + org.apache.cloudberry.pxf.plugins.hdfs.ParquetResolver + + + + test:text + Test profile for text files + + org.apache.cloudberry.pxf.plugins.hdfs.HdfsDataFragmenter + org.apache.cloudberry.pxf.plugins.hdfs.LineBreakAccessor + org.apache.cloudberry.pxf.plugins.hdfs.StringPassResolver + + + +EOF + + cat > "$PXF_HOME/conf/pxf-profiles.xml" <<'EOF' + + + + pxf:parquet + Profile for reading and writing Parquet files + + org.apache.cloudberry.pxf.plugins.hdfs.HdfsDataFragmenter + org.apache.cloudberry.pxf.plugins.hdfs.ParquetFileAccessor + org.apache.cloudberry.pxf.plugins.hdfs.ParquetResolver + + + + test:text + Test profile for text files + + org.apache.cloudberry.pxf.plugins.hdfs.HdfsDataFragmenter + org.apache.cloudberry.pxf.plugins.hdfs.LineBreakAccessor + org.apache.cloudberry.pxf.plugins.hdfs.StringPassResolver + + + +EOF + + # Configure S3 settings + mkdir -p "$PXF_BASE/servers/s3" "$PXF_HOME/servers/s3" + + for s3_site in "$PXF_BASE/servers/s3/s3-site.xml" "$PXF_BASE/servers/default/s3-site.xml" "$PXF_HOME/servers/s3/s3-site.xml"; do + mkdir -p "$(dirname "$s3_site")" + cat > "$s3_site" <<'EOF' + + + + fs.s3a.endpoint + http://localhost:9000 + + + fs.s3a.access.key + admin + + + fs.s3a.secret.key + password + + + fs.s3a.path.style.access + true + + + fs.s3a.connection.ssl.enabled + false + + + fs.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + +EOF + done + mkdir -p /home/gpadmin/.aws/ + cat > "/home/gpadmin/.aws/credentials" <<'EOF' +[default] +aws_access_key_id = admin +aws_secret_access_key = password +EOF + +} + +main() { + detect_java_paths + setup_locale_and_packages + setup_ssh + setup_cloudberry + create_demo_cluster + relax_pg_hba + build_pxf + build_pxf_regress + configure_pxf + health_check + log "entrypoint finished; environment ready for tests" +} + +main "$@" diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/script/pxf-env.sh b/automation/src/main/resources/testcontainers/pxf-cbdb/script/pxf-env.sh new file mode 100755 index 000000000..493063993 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/script/pxf-env.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Centralized environment for Cloudberry + PXF + Hadoop stack + +# -------------------------------------------------------------------- +# Architecture-aware Java selections +# -------------------------------------------------------------------- +case "$(uname -m)" in + aarch64|arm64) + JAVA_BUILD=${JAVA_BUILD:-/usr/lib/jvm/java-11-openjdk-arm64} + ;; + x86_64|amd64) + JAVA_BUILD=${JAVA_BUILD:-/usr/lib/jvm/java-11-openjdk-amd64} + ;; + *) + JAVA_BUILD=${JAVA_BUILD:-/usr/lib/jvm/java-11-openjdk} + ;; +esac + +# -------------------------------------------------------------------- +# Core paths +# -------------------------------------------------------------------- +export GPHOME=${GPHOME:-/usr/local/cloudberry-db} +export PXF_HOME=${PXF_HOME:-/usr/local/pxf} +export PXF_BASE=${PXF_BASE:-/home/gpadmin/pxf-base} +export GPHD_ROOT=${GPHD_ROOT:-/home/gpadmin/workspace/singlecluster} +export PATH="$GPHD_ROOT/bin:$JAVA_BUILD/bin:/usr/local/go/bin:$GPHOME/bin:$PXF_HOME/bin:$PATH" +export COMMON_JAVA_OPTS=${COMMON_JAVA_OPTS:-} + +# -------------------------------------------------------------------- +# Database defaults +# -------------------------------------------------------------------- +export PGHOST=${PGHOST:-localhost} +export PGPORT=${PGPORT:-7000} +export COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} +# set cloudberry timezone utc +export PGTZ=UTC + +# -------------------------------------------------------------------- +# Minio defaults +# -------------------------------------------------------------------- +export AWS_ACCESS_KEY_ID=admin +export AWS_SECRET_ACCESS_KEY=password +export PROTOCOL=minio +export ACCESS_KEY_ID=admin +export SECRET_ACCESS_KEY=password + +# -------------------------------------------------------------------- +# PXF defaults +# -------------------------------------------------------------------- +export PXF_JVM_OPTS=${PXF_JVM_OPTS:-"-Xmx512m -Xms256m"} +export PXF_HOST=${PXF_HOST:-localhost} + +# Source Cloudberry env and demo cluster if present +[ -f "$GPHOME/cloudberry-env.sh" ] && source "$GPHOME/cloudberry-env.sh" +[ -f "/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/gpdemo-env.sh" ] && source /home/gpadmin/workspace/cloudberry/gpAux/gpdemo/gpdemo-env.sh + +echo "[pxf-env] loaded (JAVA_BUILD=${JAVA_BUILD})" diff --git a/automation/src/main/resources/testcontainers/pxf-cbdb/script/utils.sh b/automation/src/main/resources/testcontainers/pxf-cbdb/script/utils.sh new file mode 100755 index 000000000..f2bec8811 --- /dev/null +++ b/automation/src/main/resources/testcontainers/pxf-cbdb/script/utils.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Shared health-check helpers for entrypoint and run_tests +set -euo pipefail + +# Fallback log/die in case caller didn't define them +log() { echo "[utils][$(date '+%F %T')] $*"; } +die() { log "ERROR $*"; exit 1; } + +wait_port() { + local host="$1" port="$2" retries="${3:-10}" sleep_sec="${4:-2}" + local i + for i in $(seq 1 "${retries}"); do + if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then + return 0 + fi + sleep "${sleep_sec}" + done + return 1 +} + +check_pxf() { + if ! curl -sf http://localhost:5888/actuator/health >/dev/null 2>&1; then + die "PXF actuator health endpoint not responding" + fi +} + +check_cloudberry() { + # shellcheck disable=SC1091 + source /usr/local/cloudberry-db/cloudberry-env.sh >/dev/null 2>&1 || true + local port="${PGPORT:-7000}" + if ! psql -p "${port}" -d postgres -tAc "SELECT 1" >/dev/null 2>&1; then + die "Cloudberry is not responding on port ${port}" + fi +} + +health_check() { + log "sanity check PXF and Cloudberry" + check_pxf + check_cloudberry + log "all components healthy: PXF, Cloudberry" +} diff --git a/automation/src/test/java/org/apache/cloudberry/pxf/automation/AbstractTestcontainersTest.java b/automation/src/test/java/org/apache/cloudberry/pxf/automation/AbstractTestcontainersTest.java new file mode 100644 index 000000000..0b5d48df4 --- /dev/null +++ b/automation/src/test/java/org/apache/cloudberry/pxf/automation/AbstractTestcontainersTest.java @@ -0,0 +1,137 @@ +package org.apache.cloudberry.pxf.automation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import listeners.CustomAutomationLogger; +import listeners.FDWSkipTestAnalyzer; +import org.apache.cloudberry.pxf.automation.applications.CloudberryApplication; +import org.apache.cloudberry.pxf.automation.applications.PXFApplication; +import org.apache.cloudberry.pxf.automation.applications.RegressApplication; +import org.apache.cloudberry.pxf.automation.testcontainers.PXFCloudberryContainer; +import org.apache.cloudberry.pxf.automation.utils.system.FDWUtils; +import org.apache.cloudberry.pxf.automation.utils.system.ProtocolUtils; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Listeners; +import reporters.CustomAutomationReport; + +@Listeners({CustomAutomationLogger.class, CustomAutomationReport.class, FDWSkipTestAnalyzer.class}) +public class AbstractTestcontainersTest { + + private static boolean sharedEnvironmentInitialized; + + protected final String pxfHost = "localhost"; + protected final String pxfPort = "5888"; + protected PXFCloudberryContainer container; + protected CloudberryApplication cloudberry; + protected RegressApplication regress; + + @BeforeClass(alwaysRun = true) + public final void doInit() throws Exception { + // redirect "doInit" logs to log file + CustomAutomationLogger.redirectStdoutStreamToFile(getClass().getSimpleName(), "doInit"); + + try { + container = PXFCloudberryContainer.getInstance(); + + try (CloudberryApplication bootstrap = new CloudberryApplication(container, "postgres")) { + bootstrap.connect(); + createTestDatabases(bootstrap); + } + + cloudberry = new CloudberryApplication(container); + cloudberry.connect(); + cloudberry.createExtension("pxf", true); + cloudberry.createExtension("pxf_fdw", true); + + if (!sharedEnvironmentInitialized) { + // Ensure PXF JDBC server configs exist for SERVER=database/db-session-params tests. + new PXFApplication(container).configureJdbcServers(); + if (FDWUtils.useFDW) { + cloudberry.createTestFDW(true); + cloudberry.createSystemFDW(true); + cloudberry.createForeignServers(true); + } + sharedEnvironmentInitialized = true; + } + + regress = new RegressApplication(container); + + // run users before class + beforeClass(); + } finally { + CustomAutomationLogger.revertStdoutStream(); + } + + } + + @AfterClass(alwaysRun = true) + public final void clean() throws Exception { + if (ProtocolUtils.getPxfTestKeepData().equals("true")) { + return; + } + CustomAutomationLogger.redirectStdoutStreamToFile(getClass().getSimpleName(), "clean"); + try { + cloudberry.close(); + } finally { + CustomAutomationLogger.revertStdoutStream(); + } + } + + /** + * clean up after the class finished + * + * @throws Exception + */ + protected void afterClass() throws Exception { + } + + /** + * Preparations needed before the class starting + * + * @throws Exception + */ + protected void beforeClass() throws Exception { + } + + /** + * clean up after the test method had finished + * + * @throws Exception + */ + protected void afterMethod() throws Exception { + } + + /** + * Preparations needed before the test method starting + * + * @throws Exception + */ + protected void beforeMethod() throws Exception { + } + + + private void createTestDatabases(CloudberryApplication bootstrap) throws Exception { + bootstrap.createDatabase("pxfautomation"); + bootstrap.createDatabase("pxfautomation_encoding"); + bootstrap.runQuery("SELECT 1"); + System.out.println("[" + getClass().getSimpleName() + "] Test databases created"); + } +} diff --git a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcClickhouseTest.java b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcClickhouseTest.java new file mode 100644 index 000000000..f3a8b4db6 --- /dev/null +++ b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcClickhouseTest.java @@ -0,0 +1,320 @@ +package org.apache.cloudberry.pxf.automation.features.jdbc; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import annotations.WorksWithFDW; +import org.apache.cloudberry.pxf.automation.AbstractTestcontainersTest; +import org.apache.cloudberry.pxf.automation.structures.tables.pxf.ExternalTable; +import org.apache.cloudberry.pxf.automation.structures.tables.utils.TableFactory; +import org.apache.cloudberry.pxf.automation.testcontainers.ClickHouseContainer; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.Date; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Timestamp; +import java.util.Properties; +import java.util.UUID; + +@WorksWithFDW +public class JdbcClickhouseTest extends AbstractTestcontainersTest { + + private static final String CLICKHOUSE_DRIVER = "com.clickhouse.jdbc.ClickHouseDriver"; + + private static final String CLICKHOUSE_TABLE_READ = "pxf_types_read"; + private static final String CLICKHOUSE_TABLE_WRITE = "pxf_types_write"; + + // PXF protocol compression options passed via LOCATION user parameters. + private static final String PROTOCOL_COMPRESS_ENABLED = "compress=true"; + private static final String PROTOCOL_COMPRESS_ALGORITHM_LZ4 = "compress_algorithm=lz4"; + private static final String HTTP_CONNECTION_PROVIDER_APACHE = "http_connection_provider=APACHE_HTTP_CLIENT"; + + /** PXF external/foreign table column definitions — same for read and write tests. */ + private static final String[] CLICKHOUSE_PXF_FIELDS = new String[] { + "i_int int", + "s_small smallint", + "b_big bigint", + "f_float32 real", + "d_float64 double precision", + "b_bool boolean", + "dec numeric", + "t_text text", + "bin bytea", + "d_date date", + "d_ts timestamp", + "d_tstz timestamp with time zone", + "d_uuid uuid" + }; + + private static final int V_I_INT = 1; + private static final short V_S_SMALL = 2; + private static final long V_B_BIG = 3L; + private static final float V_F_FLOAT32 = 1.25f; + private static final double V_D_FLOAT64 = 3.1415926d; + private static final boolean V_B_BOOL = true; + private static final String V_DEC_TEXT = "12345.6789012345"; + private static final String V_T_TEXT = "hello"; + private static final String V_D_DATE = "2020-01-02"; + private static final String V_D_TS = "2020-01-02 03:04:05.006"; + private static final String V_D_UUID = "550e8400-e29b-41d4-a716-446655440000"; + /** Four-byte payload for binary data; same bytes as `decode('41424344','hex')` in regress SQL. */ + private static final byte[] V_BIN_BYTES = "ABCD".getBytes(StandardCharsets.US_ASCII); + + private final String dockerImageTag; + + private ClickHouseContainer clickhouseContainer; + + /** + * TestNG {@link Factory}: one test class instance per {@link #clickhouseVersions()} row (separate ClickHouse container). + */ + @Factory(dataProvider = "clickhouseVersions") + public static Object[] createInstances(String imageTag) { + return new Object[] { new JdbcClickhouseTest(imageTag) }; + } + + /** Docker image tags for `clickhouse/clickhouse-server` — same regress SQL for both. */ + @DataProvider(name = "clickhouseVersions") + public static Object[][] clickhouseVersions() { + return new Object[][] { + // Test PXF & ClickHouse with old database versions + { "24" }, + // ClickHouse 25.10+ doesn't work with old JDBC drivers + // https://github.com/ClickHouse/clickhouse-java/issues/2636 + // { "26.1.4.35" }, + // { "26.2" }, + }; + } + + private JdbcClickhouseTest(String dockerImageTag) { + this.dockerImageTag = dockerImageTag; + } + + @Override + public void beforeClass() throws Exception { + clickhouseContainer = new ClickHouseContainer(dockerImageTag, container.getSharedNetwork()); + clickhouseContainer.start(); + + Assert.assertTrue(container.isRunning(), "PXFCloudberry container should be running"); + Assert.assertTrue(clickhouseContainer.isRunning(), "ClickHouse container should be running"); + } + + @Override + public void afterClass() throws Exception { + if (clickhouseContainer != null) { + clickhouseContainer.stop(); + } + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void readSupportedTypes() throws Exception { + runReadSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), false); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void readSupportedTypesWithProtocolCompression() throws Exception { + runReadSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), true); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void readSupportedTypesWithHttpConnectionProvider() throws Exception { + runReadSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), false, true); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void writeSupportedTypes() throws Exception { + runWriteSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), false); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void writeSupportedTypesWithProtocolCompression() throws Exception { + runWriteSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), true); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void writeSupportedTypesWithHttpConnectionProvider() throws Exception { + runWriteSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), false, true); + } + + @Test(groups = {"testcontainers", "jdbc-tc"}) + public void writeSupportedTypesWithHttpConnectionProviderAndCompression() throws Exception { + runWriteSupportedTypes(clickhouseContainer.getInternalJdbcUrl(), clickhouseContainer.getJdbcUrl(), true, true); + } + + private void runReadSupportedTypes(String internalJdbcUrl, String externalJdbcUrl, boolean enableProtocolCompression) throws Exception { + runReadSupportedTypes(internalJdbcUrl, externalJdbcUrl, enableProtocolCompression, false); + } + + private void runReadSupportedTypes(String internalJdbcUrl, String externalJdbcUrl, boolean enableProtocolCompression, boolean enableHttpConnectionProvider) throws Exception { + createAndSeedClickhouseReadTable(externalJdbcUrl); + + ExternalTable pxfRead = TableFactory.getPxfJdbcReadableTable( + "pxf_ch_clickhouse_read_types", + CLICKHOUSE_PXF_FIELDS, + CLICKHOUSE_TABLE_READ, + CLICKHOUSE_DRIVER, + internalJdbcUrl, + ClickHouseContainer.CLICKHOUSE_USER, + "PASS=" + ClickHouseContainer.CLICKHOUSE_PASSWORD); + pxfRead.setHost(pxfHost); + pxfRead.setPort(pxfPort); + if (enableProtocolCompression) { + pxfRead.addUserParameter(PROTOCOL_COMPRESS_ENABLED); + pxfRead.addUserParameter(PROTOCOL_COMPRESS_ALGORITHM_LZ4); + } + if (enableHttpConnectionProvider) { + pxfRead.addUserParameter(HTTP_CONNECTION_PROVIDER_APACHE); + } + cloudberry.createTableAndVerify(pxfRead); + + try { + regress.runSqlTest("features/jdbc/clickhouse/read_types"); + } finally { + cloudberry.dropTable(pxfRead, true); + } + } + + private void runWriteSupportedTypes(String internalJdbcUrl, String externalJdbcUrl, boolean enableProtocolCompression) throws Exception { + runWriteSupportedTypes(internalJdbcUrl, externalJdbcUrl, enableProtocolCompression, false); + } + + private void runWriteSupportedTypes(String internalJdbcUrl, String externalJdbcUrl, boolean enableProtocolCompression, boolean enableHttpConnectionProvider) throws Exception { + createClickhouseWriteTable(externalJdbcUrl); + + ExternalTable pxfWrite = TableFactory.getPxfJdbcWritableTable( + "pxf_ch_clickhouse_write_types", + CLICKHOUSE_PXF_FIELDS, + CLICKHOUSE_TABLE_WRITE, + CLICKHOUSE_DRIVER, + internalJdbcUrl, + ClickHouseContainer.CLICKHOUSE_USER, + "PASS=" + ClickHouseContainer.CLICKHOUSE_PASSWORD); + pxfWrite.setHost(pxfHost); + pxfWrite.setPort(pxfPort); + if (enableProtocolCompression) { + pxfWrite.addUserParameter(PROTOCOL_COMPRESS_ENABLED); + pxfWrite.addUserParameter(PROTOCOL_COMPRESS_ALGORITHM_LZ4); + } + if (enableHttpConnectionProvider) { + pxfWrite.addUserParameter(HTTP_CONNECTION_PROVIDER_APACHE); + } + cloudberry.createTableAndVerify(pxfWrite); + + ExternalTable pxfVerify = TableFactory.getPxfJdbcReadableTable( + "pxf_ch_clickhouse_write_verify", + CLICKHOUSE_PXF_FIELDS, + CLICKHOUSE_TABLE_WRITE, + CLICKHOUSE_DRIVER, + internalJdbcUrl, + ClickHouseContainer.CLICKHOUSE_USER, + "PASS=" + ClickHouseContainer.CLICKHOUSE_PASSWORD); + pxfVerify.setHost(pxfHost); + pxfVerify.setPort(pxfPort); + if (enableProtocolCompression) { + pxfVerify.addUserParameter(PROTOCOL_COMPRESS_ENABLED); + pxfVerify.addUserParameter(PROTOCOL_COMPRESS_ALGORITHM_LZ4); + } + if (enableHttpConnectionProvider) { + pxfVerify.addUserParameter(HTTP_CONNECTION_PROVIDER_APACHE); + } + cloudberry.createTableAndVerify(pxfVerify); + + try { + regress.runSqlTest("features/jdbc/clickhouse/write_types"); + } finally { + cloudberry.dropTable(pxfVerify, true); + cloudberry.dropTable(pxfWrite, true); + } + } + + private void createAndSeedClickhouseReadTable(String jdbcUrl) throws SQLException { + try (Connection chConn = openClickhouseConnection(jdbcUrl)) { + createClickhouseServerTable(chConn, CLICKHOUSE_TABLE_READ); + insertClickhouseReadFixture(chConn); + } + } + + private void createClickhouseWriteTable(String externalJdbcUrl) throws SQLException { + try (Connection chConn = openClickhouseConnection(externalJdbcUrl)) { + createClickhouseServerTable(chConn, CLICKHOUSE_TABLE_WRITE); + } + } + + /** Creates ClickHouse MergeTree table ({@code DROP IF EXISTS} + {@code CREATE}). */ + private void createClickhouseServerTable(Connection chConn, String tableName) throws SQLException { + try (Statement st = chConn.createStatement()) { + st.execute("DROP TABLE IF EXISTS " + tableName); + st.execute("CREATE TABLE " + tableName + " ( " + + "i_int Int32, " + + "s_small Int16, " + + "b_big Int64, " + + "f_float32 Float32, " + + "d_float64 Float64, " + + "b_bool Bool, " + + "dec Decimal(38,10), " + + "t_text String, " + + "bin String, " // binary data + + "d_date Date, " + + "d_ts DateTime64(3,'UTC'), " + + "d_tstz DateTime64(3,'UTC'), " + + "d_uuid UUID " + + ") ENGINE = MergeTree ORDER BY (i_int)"); + } + } + + /** Inserts fixture row into {@link #CLICKHOUSE_TABLE_READ} for the read test. */ + private void insertClickhouseReadFixture(Connection chConn) throws SQLException { + String insertSql = "INSERT INTO " + CLICKHOUSE_TABLE_READ + " (" + + "i_int, s_small, b_big, f_float32, d_float64, b_bool, dec, t_text, bin, d_date, d_ts, d_tstz, d_uuid" + + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + + try (PreparedStatement ps = chConn.prepareStatement(insertSql)) { + ps.setInt(1, V_I_INT); + ps.setShort(2, V_S_SMALL); + ps.setLong(3, V_B_BIG); + ps.setFloat(4, V_F_FLOAT32); + ps.setDouble(5, V_D_FLOAT64); + ps.setBoolean(6, V_B_BOOL); + ps.setBigDecimal(7, new BigDecimal(V_DEC_TEXT)); + ps.setString(8, V_T_TEXT); + ps.setBytes(9, V_BIN_BYTES); + ps.setDate(10, Date.valueOf(V_D_DATE)); + ps.setTimestamp(11, Timestamp.valueOf(V_D_TS)); + ps.setTimestamp(12, Timestamp.valueOf(V_D_TS)); + ps.setObject(13, UUID.fromString(V_D_UUID)); + ps.executeUpdate(); + } + } + + private Connection openClickhouseConnection(String jdbcUrl) throws SQLException { + Properties props = new Properties(); + props.setProperty("user", ClickHouseContainer.CLICKHOUSE_USER); + props.setProperty("password", ClickHouseContainer.CLICKHOUSE_PASSWORD); + return DriverManager.getConnection(jdbcUrl, props); + } + +} diff --git a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcTest.java b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcTest.java index 4ec3be33d..5539d8e90 100755 --- a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcTest.java +++ b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/jdbc/JdbcTest.java @@ -1,23 +1,44 @@ package org.apache.cloudberry.pxf.automation.features.jdbc; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + import java.io.File; import annotations.FailsWithFDW; import annotations.WorksWithFDW; +import org.apache.cloudberry.pxf.automation.AbstractTestcontainersTest; +import org.apache.cloudberry.pxf.automation.applications.CloudberryApplication; import org.apache.cloudberry.pxf.automation.structures.tables.basic.Table; import org.apache.cloudberry.pxf.automation.structures.tables.pxf.ExternalTable; import org.apache.cloudberry.pxf.automation.structures.tables.utils.TableFactory; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import org.apache.cloudberry.pxf.automation.enums.EnumPartitionType; -import org.apache.cloudberry.pxf.automation.features.BaseFeature; - @WorksWithFDW -public class JdbcTest extends BaseFeature { +public class JdbcTest extends AbstractTestcontainersTest { private static final String POSTGRES_DRIVER_CLASS = "org.postgresql.Driver"; - private static final String GPDB_PXF_AUTOMATION_DB_JDBC = "jdbc:postgresql://"; + private static final String localDataResourcesFolder = "src/test/resources/data"; + private static final String[] TYPES_TABLE_FIELDS = new String[]{ "t1 text", "t2 text", @@ -61,6 +82,9 @@ public class JdbcTest extends BaseFeature { "count int", "max int"}; + @Deprecated + private CloudberryApplication gpdb; + private ExternalTable pxfJdbcSingleFragment; private ExternalTable pxfJdbcDateWideRangeOn; private ExternalTable pxfJdbcDateWideRangeOff; @@ -86,11 +110,16 @@ public class JdbcTest extends BaseFeature { private Table gpdbWritableTargetTableNoBatch, gpdbWritableTargetTablePool; private Table gpdbDeptTable, gpdbEmpTable; - @Override - protected void beforeClass() throws Exception { + @BeforeClass(alwaysRun = true) + public void setUp() throws Exception { + gpdb = cloudberry; // alias prepareData(); } + private void runSqlTest(String sqlTestPath) throws Exception { + regress.runSqlTest(sqlTestPath); + } + protected void prepareData() throws Exception { prepareTypesData(); prepareSingleFragment(); @@ -193,7 +222,7 @@ private void prepareSingleFragment() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypes.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcSingleFragment.setHost(pxfHost); pxfJdbcSingleFragment.setPort(pxfPort); @@ -207,7 +236,7 @@ private void prepareMultipleFragmentsByEnum() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypes.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), 13, "USD:UAH", "1", @@ -226,7 +255,7 @@ private void prepareMultipleFragmentsByInt() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypes.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), 2, "1:6", "1", @@ -245,7 +274,7 @@ private void prepareMultipleFragmentsByDate() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypes.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), 11, "2015-03-06:2015-03-20", "1:DAY", @@ -302,7 +331,7 @@ private void prepareWritable() throws Exception { TYPES_TABLE_FIELDS, gpdbWritableTargetTable.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), null); pxfJdbcWritable.setHost(pxfHost); pxfJdbcWritable.setPort(pxfPort); @@ -314,7 +343,7 @@ private void prepareWritable() throws Exception { TYPES_TABLE_FIELDS, dateTimeWritableTargetTableWithDateWideRangeOn.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), null); pxfJdbcDateTimeWritableWithDateWideRangeOn.setHost(pxfHost); pxfJdbcDateTimeWritableWithDateWideRangeOn.setPort(pxfPort); @@ -326,7 +355,7 @@ private void prepareWritable() throws Exception { TYPES_TABLE_FIELDS, dateTimeWritableTargetTableWithDateWideRangeOff.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), null); pxfJdbcDateTimeWritableWithDateWideRangeOff.setHost(pxfHost); pxfJdbcDateTimeWritableWithDateWideRangeOff.setPort(pxfPort); @@ -338,7 +367,7 @@ private void prepareWritable() throws Exception { TYPES_TABLE_FIELDS_SMALL, gpdbWritableTargetTableNoBatch.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), "BATCH_SIZE=1"); pxfJdbcWritableNoBatch.setHost(pxfHost); pxfJdbcWritableNoBatch.setPort(pxfPort); @@ -349,7 +378,7 @@ private void prepareWritable() throws Exception { TYPES_TABLE_FIELDS_SMALL, gpdbWritableTargetTablePool.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), "POOL_SIZE=2"); pxfJdbcWritablePool.setHost(pxfHost); pxfJdbcWritablePool.setPort(pxfPort); @@ -362,7 +391,7 @@ private void prepareColumns() throws Exception { COLUMNS_TABLE_FIELDS, gpdbNativeTableColumns.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcColumns.setHost(pxfHost); pxfJdbcColumns.setPort(pxfPort); @@ -375,7 +404,7 @@ private void prepareColumnProjectionSubsetInDifferentOrder() throws Exception { COLUMNS_TABLE_FIELDS_IN_DIFFERENT_ORDER_SUBSET, gpdbNativeTableColumns.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcColumnProjectionSubset.setHost(pxfHost); pxfJdbcColumnProjectionSubset.setPort(pxfPort); @@ -388,7 +417,7 @@ private void prepareColumnProjectionSuperset() throws Exception { COLUMNS_TABLE_FIELDS_SUPERSET, gpdbNativeTableColumns.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcColumnProjectionSuperset.setHost(pxfHost); pxfJdbcColumnProjectionSuperset.setPort(pxfPort); @@ -401,7 +430,7 @@ private void prepareFetchSizeZero() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypes.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName(), "FETCH_SIZE=0"); pxfJdbcSingleFragment.setHost(pxfHost); pxfJdbcSingleFragment.setPort(pxfPort); @@ -414,7 +443,7 @@ private void prepareDateWideRange() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypesWithDateWideRange.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcDateWideRangeOn.setHost(pxfHost); pxfJdbcDateWideRangeOn.setPort(pxfPort); @@ -426,7 +455,7 @@ private void prepareDateWideRange() throws Exception { TYPES_TABLE_FIELDS, gpdbNativeTableTypesWithDateWideRange.getName(), POSTGRES_DRIVER_CLASS, - GPDB_PXF_AUTOMATION_DB_JDBC + gpdb.getMasterHost() + ":" + gpdb.getPort() + "/pxfautomation", + gpdb.getCloudberryMappedJdbcUrl("pxfautomation"), gpdb.getUserName()); pxfJdbcDateWideRangeOff.setHost(pxfHost); pxfJdbcDateWideRangeOff.setPort(pxfPort); @@ -461,22 +490,22 @@ private void prepareNamedQuery() throws Exception { gpdb.createTableAndVerify(pxfJdbcNamedQuery); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void singleFragmentTable() throws Exception { runSqlTest("features/jdbc/single_fragment"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void multipleFragmentsTables() throws Exception { runSqlTest("features/jdbc/multiple_fragments"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void readServerConfig() throws Exception { runSqlTest("features/jdbc/server_config"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void readViewSessionParams() throws Exception { runSqlTest("features/jdbc/session_params"); } @@ -485,7 +514,7 @@ public void readViewSessionParams() throws Exception { // All the Writable Tests are failing with this Error: // ERROR: PXF server error : class java.io.DataInputStream cannot be cast to class // [B (java.io.DataInputStream and [B are in module java.base of loader 'bootstrap') - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcWritableTable() throws Exception { runSqlTest("features/jdbc/writable"); } @@ -494,44 +523,44 @@ public void jdbcWritableTable() throws Exception { // All the Writable Tests are failing with this Error: // ERROR: PXF server error : class java.io.DataInputStream cannot be cast to class // [B (java.io.DataInputStream and [B are in module java.base of loader 'bootstrap') - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcWritableTableWithDateWideRange() throws Exception { runSqlTest("features/jdbc/writable_date_wide_range"); } @FailsWithFDW - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcWritableTableNoBatch() throws Exception { runSqlTest("features/jdbc/writable_nobatch"); } @FailsWithFDW - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcWritableTablePool() throws Exception { runSqlTest("features/jdbc/writable_pool"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcColumns() throws Exception { runSqlTest("features/jdbc/columns"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcColumnProjection() throws Exception { runSqlTest("features/jdbc/column_projection"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcReadableTableNoBatch() throws Exception { runSqlTest("features/jdbc/readable_nobatch"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcReadableTableWithDateWideRange() throws Exception { runSqlTest("features/jdbc/readable_date_wide_range"); } - @Test(groups = {"features", "gpdb", "security", "jdbc"}) + @Test(groups = {"testcontainers", "jdbc-tc"}) public void jdbcNamedQuery() throws Exception { runSqlTest("features/jdbc/named_query"); } diff --git a/docs/content/jdbc_pxf.html.md.erb b/docs/content/jdbc_pxf.html.md.erb index cef58764b..e562a6428 100644 --- a/docs/content/jdbc_pxf.html.md.erb +++ b/docs/content/jdbc_pxf.html.md.erb @@ -187,6 +187,7 @@ Refer to the following topics for examples on how to use PXF to read data from a - [Reading From and Writing to a PostgreSQL Table](jdbc_pxf_postgresql.html) - [Reading From and Writing to a MySQL Table](jdbc_pxf_mysql.html) - [Reading From and Writing to an Oracle Table](jdbc_pxf_oracle.html) +- [Reading From and Writing to a ClickHouse Table](jdbc_pxf_clickhouse.html) - [Reading From and Writing to a Trino Table](jdbc_pxf_trino.html) ## About Using Named Queries diff --git a/docs/content/jdbc_pxf_clickhouse.html.md.erb b/docs/content/jdbc_pxf_clickhouse.html.md.erb new file mode 100644 index 000000000..d0b67534e --- /dev/null +++ b/docs/content/jdbc_pxf_clickhouse.html.md.erb @@ -0,0 +1,210 @@ +--- +title: 'Example: Reading From and Writing to a ClickHouse Table' +--- + + + +In this example, you: + +- Install the ClickHouse JDBC driver on the Apache Cloudberry (see [Registering PXF Library Dependencies](reg_jar_depend.html)) +- Create a ClickHouse table, and insert data into the table +- Create a PXF readable external table or a foreign table (`jdbc_pxf_fdw`) that references the ClickHouse table +- Read the data in the ClickHouse table using PXF +- Create a PXF writable external table or a writable foreign table that references the ClickHouse table +- Write data to the ClickHouse table using PXF +- Read the data in the ClickHouse table again + +## Create a ClickHouse Table + +Perform the following steps to create a ClickHouse table named `pxf_ch_types` in the `default` database: + +```sql +CREATE TABLE default.pxf_ch_types ( + i_int Int32, + s_small Int16, + b_big Int64, + f_float32 Float32, + d_float64 Float64, + b_bool Bool, + dec Decimal(38,10), + t_text String, + bin String, + d_date Date, + d_ts DateTime64(3,'UTC'), + d_tstz DateTime64(3,'UTC'), + d_uuid UUID +) ENGINE = MergeTree ORDER BY (i_int); + +INSERT INTO default.pxf_ch_types + (i_int, s_small, b_big, f_float32, d_float64, b_bool, dec, t_text, bin, d_date, d_ts, d_tstz, d_uuid) +VALUES + (1, 2, 3, 1.25, 3.1415926, true, toDecimal64('12345.6789012345', 10), 'hello', 'ABCD', + toDate('2020-01-02'), toDateTime64('2020-01-02 03:04:05.006', 3, 'UTC'), + toDateTime64('2020-01-02 03:04:05.006', 3, 'UTC'), toUUID('550e8400-e29b-41d4-a716-446655440000')); +``` + +## JDBC driver + +Place the ClickHouse JDBC driver JAR under `$PXF_BASE/lib` on coordinator and segment host, synchronize PXF, and restart PXF. + +You do **not** need a `servers/clickhouse/jdbc-site.xml` if you pass connection options in the external table `LOCATION` or in `CREATE SERVER ... OPTIONS` for a foreign table, as shown below. + +Automated tests exercise ClickHouse server versions **24** and **26** together with the ClickHouse JDBC drivers. + +For ClickHouse versions < 25.10 we tested clickhouse-jdbc **0.6.5**. In practice this driver tends to work more reliably when the HTTP stack uses **Apache HttpClient**; add the JDBC URL parameter `http_connection_provider=APACHE_HTTP_CLIENT` (for example `jdbc:clickhouse://clickhousehost:8123/default?http_connection_provider=APACHE_HTTP_CLIENT`). To use that mode, you must also place the **Apache HttpClient 5** JAR on the PXF classpath: copy `org.apache.httpcomponents.client5:httpclient5` (and its required dependencies, if any) into `$PXF_BASE/lib` next to the ClickHouse driver JAR, then synchronize PXF and restart PXF. See [Registering PXF Library Dependencies](reg_jar_depend.html). + +For ClickHouse versions > 25.10 we recommend latest clickhouse-jdbc-all (fat jar version with shaded dependencies). + +## Read from the ClickHouse Table + +You can use either an external table (PXF `pxf` protocol) or a foreign table (`jdbc_pxf_fdw`). + +### Using an external table + +Perform the following procedure to create a PXF external table that references the `pxf_ch_types` ClickHouse table, and reads the data in the table: + +1. Create a PXF readable external table using the `jdbc` profile. Substitute your ClickHouse host, port, and driver class as appropriate: + + ```sql + CREATE EXTERNAL TABLE pxf_ch_read( + i_int int, s_small smallint, b_big bigint, + f_float32 real, d_float64 double precision, b_bool boolean, + dec numeric, t_text text, bin bytea, d_date date, + d_ts timestamp, d_tstz timestamp with time zone, d_uuid uuid) + LOCATION ('pxf://default.pxf_ch_types?PROFILE=jdbc&JDBC_DRIVER=com.clickhouse.jdbc.ClickHouseDriver&DB_URL=jdbc:clickhouse://clickhousehost:8123/default') + FORMAT 'CUSTOM' (FORMATTER='pxfwritable_import'); + ``` + +2. Read the data: + + ```sql + SELECT * FROM pxf_ch_read ORDER BY i_int; + ``` + +### Using a foreign table + +For foreign tables use `jdbc_pxf_fdw` foreign data wrapper. Define the JDBC driver and URL on the foreign server using `jdbc_driver` and `db_url` in `OPTIONS` (see [Overriding the JDBC Server Configuration with DDL](jdbc_pxf.html#jdbc_override)), then set `resource` to the remote table identifier (`.`). + +**Advantage over external tables:** with the `pxf` protocol you must define two objects—a `CREATE EXTERNAL TABLE` for reads and a `CREATE WRITABLE EXTERNAL TABLE` for writes—because import and export use different formatters. With **foreign tables**, one `CREATE FOREIGN TABLE` is enough: the same definition supports both `SELECT` and `INSERT` (when the JDBC connector supports writes), so you avoid duplicated DDL and keep a single logical name for the remote table. + +1. Create a foreign server, a user mapping, and one foreign table that references the remote ClickHouse table: + + ```sql + CREATE SERVER clickhouse_jdbc_srv FOREIGN DATA WRAPPER jdbc_pxf_fdw + OPTIONS ( + jdbc_driver 'com.clickhouse.jdbc.ClickHouseDriver', + db_url 'jdbc:clickhouse://clickhousehost:8123/default' + ); + + CREATE USER MAPPING FOR CURRENT_USER SERVER clickhouse_jdbc_srv; + + CREATE FOREIGN TABLE pxf_ch_fdw( + i_int int, s_small smallint, b_big bigint, + f_float32 real, d_float64 double precision, b_bool boolean, + dec numeric, t_text text, bin bytea, d_date date, + d_ts timestamp, d_tstz timestamp with time zone, d_uuid uuid + ) SERVER clickhouse_jdbc_srv + OPTIONS ( resource 'default.pxf_ch_types' ); + ``` + + Alternatively, you can keep credentials and JDBC settings in a PXF server configuration directory and reference it with `OPTIONS ( config 'your_server_dir' )` on `CREATE SERVER`; see [Configuring the JDBC Connector](jdbc_cfg.html). + +2. Read the data: + + ```sql + SELECT * FROM pxf_ch_fdw ORDER BY i_int; + ``` + +## Write to the ClickHouse Table + +### Using an external table + +Perform the following procedure to create a PXF writable external table that references the `pxf_ch_types` ClickHouse table, and writes data to the table: + +1. Create a PXF writable external table: + + ```sql + CREATE WRITABLE EXTERNAL TABLE pxf_ch_write( + i_int int, s_small smallint, b_big bigint, + f_float32 real, d_float64 double precision, b_bool boolean, + dec numeric, t_text text, bin bytea, d_date date, + d_ts timestamp, d_tstz timestamp with time zone, d_uuid uuid) + LOCATION ('pxf://default.pxf_ch_types?PROFILE=jdbc&JDBC_DRIVER=com.clickhouse.jdbc.ClickHouseDriver&DB_URL=jdbc:clickhouse://clickhousehost:8123/default') + FORMAT 'CUSTOM' (FORMATTER='pxfwritable_export'); + ``` + +2. Write data: + + ```sql + INSERT INTO pxf_ch_write VALUES + (2, 3, 4, 2.25, 4.1415926, true, CAST('12345.6789012345' AS numeric), + 'hello2', decode('41424344','hex'), DATE '2020-01-02', + TIMESTAMP '2020-01-02 03:04:05.006', + TIMESTAMPTZ '2020-01-02 03:04:05.006+00', + '550e8400-e29b-41d4-a716-446655440000'::uuid); + ``` + +3. Read the data back (using the readable external table from the previous section): + + ```sql + SELECT * FROM pxf_ch_read ORDER BY i_int; + ``` + +### Using a foreign table + +Reuse the same foreign server and foreign table `pxf_ch_fdw` from [Using a foreign table](#ex_readjdbc_fdw). A `INSERT` into that foreign table is routed to the remote ClickHouse table named in `resource`; you do **not** need a second foreign table for writes. + +1. Insert a row: + + ```sql + INSERT INTO pxf_ch_fdw VALUES + (2, 3, 4, 2.25, 4.1415926, true, CAST('12345.6789012345' AS numeric), + 'hello2', decode('41424344','hex'), DATE '2020-01-02', + TIMESTAMP '2020-01-02 03:04:05.006', + TIMESTAMPTZ '2020-01-02 03:04:05.006+00', + '550e8400-e29b-41d4-a716-446655440000'::uuid); + ``` + +2. Read the data back: + + ```sql + SELECT * FROM pxf_ch_fdw ORDER BY i_int; + ``` + +## Data Type Mapping and Limitations + +PXF supports the following JDBC data types (see [JDBC Data Types Supported](jdbc_pxf.html#datatypes)). +For ClickHouse, the typical JDBC-side mappings are: + +- `INTEGER`, `SMALLINT`, `BIGINT` → `Int32`, `Int16`, `Int64` +- `REAL`, `FLOAT8` → `Float32`, `Float64` +- `NUMERIC` → `Decimal(precision, scale)` +- `BOOLEAN` → `Bool` +- `VARCHAR`, `BPCHAR`, `TEXT` → `String` +- `DATE` → `Date` +- `TIMESTAMP` → `DateTime64(precision, 'UTC')` +- `TIMESTAMPTZ` → `DateTime64(precision, 'UTC')` +- `BYTEA` → `String` +- `UUID` → `UUID` + +Notes: + +- `UUID` and `TIMESTAMPTZ` are supported for reading and writing with PXF JDBC when mapped to ClickHouse `UUID` and `DateTime64` as above. +- For `BYTEA`, prefer `String` on the ClickHouse side for reliable JDBC round-trip; use `FixedString(n)` only if you control the wire format and length. diff --git a/server/Makefile b/server/Makefile index edd2cbb17..f251a6520 100644 --- a/server/Makefile +++ b/server/Makefile @@ -40,7 +40,10 @@ help: @echo " - test - runs unit tests for all PXF modules" @echo " - coverage - runs unit tests for all PXF modules and gives a coverage report" @echo " - install - setup PXF in the configured deployPath" - @echo " - stage - build PXF server and stage resulting artifacts for packaging" + @echo " - stage - build PXF server and stage resulting artifacts for packaging (no JDBC drivers in build/stage/lib)" + @echo " - stage-notest - same as stage but skip tests (no JDBC drivers in build/stage/lib)" + @echo " - stage-jdbc-drivers - copy JDBC driver JARs to build/stage/lib" + @echo " - install-jdbc-drivers - setup PXF JDBC drivers in the configured deployPath" @echo " - doc - creates aggregate javadoc under docs" all: prepare-gradle-wrapper @@ -95,6 +98,18 @@ stage-notest: prepare-gradle-wrapper install -m 700 -d "build/stage/run" install -m 700 -d "build/stage/keytabs" +.PHONY: stage-jdbc-drivers +stage-jdbc-drivers: prepare-gradle-wrapper + ./gradlew $(PXF_GRADLE_PROPERTIES) stageJdbcDrivers + +.PHONY: install +install-jdbc-drivers: stage-jdbc-drivers + @if [ -z "$(PXF_HOME)" ]; then \ + echo "ERROR: PXF_HOME is not set"; exit 2; \ + fi + mkdir -p "$(PXF_HOME)"/lib + cp -R build/stage/lib/. "$(PXF_HOME)"/lib/ + clean: prepare-gradle-wrapper ./gradlew clean rm -rf build diff --git a/server/build.gradle b/server/build.gradle index e530d8727..57f31a35b 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -212,8 +212,9 @@ configure(javaProjects) { entry("avro") entry("avro-mapred") } - // Zstd support for Avro/Parquet + // Compression support for Avro/Parquet dependency("com.github.luben:zstd-jni:1.5.7-6") + dependency("at.yawk.lz4:lz4-java:1.10.3") // Jackson 1.x dependencies dependencySet(group:"org.codehaus.jackson", version:"1.9.13") { @@ -374,3 +375,11 @@ task stage(type: Copy) { doLast { new File("${buildDir}/stage/version").text = "${version}\n" } } + +task stageJdbcDrivers(type: Copy) { + description "Copy JDBC driver JARs from pxf-jdbc-drivers to build/stage/lib." + dependsOn ":pxf-jdbc-drivers:jar" + from(project(':pxf-jdbc-drivers').configurations.runtimeClasspath) + into("${buildDir}/stage/lib") + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} diff --git a/server/pxf-jdbc-drivers/build.gradle b/server/pxf-jdbc-drivers/build.gradle new file mode 100644 index 000000000..fafafa93a --- /dev/null +++ b/server/pxf-jdbc-drivers/build.gradle @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +dependencies { + // PostgreSQL driver is used by PXF itself. + // Its version is defined in server/build.gradle + + // Clickhouse + implementation('com.clickhouse:clickhouse-jdbc:0.6.5') + implementation('com.clickhouse:clickhouse-jdbc:0.6.5:http') + implementation('at.yawk.lz4:lz4-java') + implementation('org.apache.httpcomponents.client5:httpclient5:5.3.1') { transitive = false } + implementation('org.apache.httpcomponents.core5:httpcore5:5.2.4') { transitive = false } + implementation('org.apache.httpcomponents.core5:httpcore5-h2:5.2.4') { transitive = false } + +} \ No newline at end of file diff --git a/server/settings.gradle b/server/settings.gradle index dd4196798..5df3ff21b 100644 --- a/server/settings.gradle +++ b/server/settings.gradle @@ -25,6 +25,7 @@ include 'pxf-service', 'pxf-hdfs', 'pxf-hive', 'pxf-jdbc', + 'pxf-jdbc-drivers', 'pxf-json', 'pxf-s3', 'pxf-diagnostic'