From 4e0bf7fd110f2d066a24c028e1d612cf06b630bc Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sat, 7 Feb 2026 15:09:17 -0500 Subject: [PATCH] Support fuzzing on datetime types and functions --- README.md | 4 +- src/sqlancer/datafusion/DataFusionErrors.java | 4 + src/sqlancer/datafusion/DataFusionSchema.java | 40 ++++- .../datafusion/ast/DataFusionConstant.java | 152 ++++++++++++++++ .../datafusion/gen/DataFusionBaseExpr.java | 25 +++ .../gen/DataFusionBaseExprFactory.java | 162 ++++++++++++++++-- 6 files changed, 373 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index d90a247b..b5a5c674 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ Notes for query generation: - String scalar functions/expression operators - Aggregate functions, `HAVING` clause - Window functions -- (TODO) Time related data type functions +- Time-related data type functions - (TODO) Subquery - (TODO) Queries from parquet, csv - (TODO) Exploit different configurations (change config knobs like `target_partition`, `prefer_hash_join` etc. @@ -104,7 +104,7 @@ Note: most oracles only apply to a subset of available query types, for advanced More context for below test oracles at https://github.com/sqlancer/sqlancer/tree/main - NoREC - TLP -- (TODO) PQS +- PQS - (TODO) DQP for logical bugs in joins - (TODO) [EET](https://www.usenix.org/conference/osdi24/presentation/jiang#:~:text=To%20find%20logic%20bugs%20in,is%20independent%20of%20query%20patterns.) for logic bugs in joins and subqueries # Bug Report diff --git a/src/sqlancer/datafusion/DataFusionErrors.java b/src/sqlancer/datafusion/DataFusionErrors.java index 9e62bbe9..72c385a2 100644 --- a/src/sqlancer/datafusion/DataFusionErrors.java +++ b/src/sqlancer/datafusion/DataFusionErrors.java @@ -47,6 +47,10 @@ public static void registerExpectedExecutionErrors(ExpectedErrors errors) { errors.add("This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal"); errors.add( "This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal"); + errors.add("Error parsing timestamp"); + errors.add("Unable to cast to Date32 for converting from i64 to i32"); + errors.add("Invalid input syntax for type interval"); + errors.add("This feature is not implemented: DATE_BIN only supports literal values for the origin argument, not arrays"); /* * Known bugs diff --git a/src/sqlancer/datafusion/DataFusionSchema.java b/src/sqlancer/datafusion/DataFusionSchema.java index 2d08a347..35dd8137 100644 --- a/src/sqlancer/datafusion/DataFusionSchema.java +++ b/src/sqlancer/datafusion/DataFusionSchema.java @@ -115,7 +115,7 @@ private static List getTableColumns(SQLConnection con, String */ public enum DataFusionDataType { - STRING, BIGINT, DOUBLE, BOOLEAN, NULL; + STRING, BIGINT, DOUBLE, BOOLEAN, NULL, DATE, TIMESTAMP, TIME; public static DataFusionDataType getRandomWithoutNull() { DataFusionDataType dt; @@ -129,6 +129,10 @@ public boolean isNumeric() { return this == BIGINT || this == DOUBLE; } + public boolean isTemporal() { + return this == DATE || this == TIMESTAMP || this == TIME; + } + // How to parse type in DataFusion's catalog to `DataFusionDataType` // As displayed in: // create table t1(v1 int, v2 bigint); @@ -145,7 +149,15 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) { return DataFusionDataType.STRING; case "Utf8View": return DataFusionDataType.STRING; + case "Date32": + return DataFusionDataType.DATE; + case "Time64(ns)": + return DataFusionDataType.TIME; default: + // Handle Timestamp variants with timezone info + if (typeString.startsWith("Timestamp(")) { + return DataFusionDataType.TIMESTAMP; + } dfAssert(false, "Uncovered branch typeString: " + typeString); } @@ -182,6 +194,32 @@ public Node getRandomConstant(DataFusionGlobalState state) return DataFusionConstant.createNullConstant(); case STRING: return new DataFusionConstant.DataFusionStringConstant(state.getRandomly().getString()); + case DATE: + // Generate dates in a reasonable range: -10000 to 10000 days from epoch (1970-01-01) + long daysFromEpoch = state.getRandomly().getInteger(-10000, 10000); + return DataFusionConstant.createDateConstant(daysFromEpoch); + case TIMESTAMP: + // Generate timestamps: random value or specific edge cases + if (Randomly.getBooleanWithSmallProbability()) { + // Edge cases: very old, very new, or around epoch + long edgeTimestamp = Randomly.fromOptions( + 0L, // Unix epoch + -2208988800L, // 1900-01-01 + 946684800L, // 2000-01-01 + 1577836800L, // 2020-01-01 + 253402300799L // 9999-12-31 + ); + return DataFusionConstant.createTimestampConstant(edgeTimestamp); + } + // Random timestamp in reasonable range (use days as base, then convert to seconds) + // Generate days from epoch: -10000 to 20000 days, then multiply by seconds per day + long randomDays = state.getRandomly().getInteger(-10000, 20000); + long randomTimestamp = randomDays * 86400; // Convert to seconds + return DataFusionConstant.createTimestampConstant(randomTimestamp); + case TIME: + // Generate time values (0 to 86400 seconds in a day) + long secondsInDay = state.getRandomly().getInteger(0, 86400); + return DataFusionConstant.createTimeConstant(secondsInDay); default: dfAssert(false, "Unreachable. All branches should be eovered"); } diff --git a/src/sqlancer/datafusion/ast/DataFusionConstant.java b/src/sqlancer/datafusion/ast/DataFusionConstant.java index 0b084db2..1a86ac23 100644 --- a/src/sqlancer/datafusion/ast/DataFusionConstant.java +++ b/src/sqlancer/datafusion/ast/DataFusionConstant.java @@ -15,6 +15,18 @@ public static Node createNullConstant() { return new DataFusionNullConstant(); } + public static Node createDateConstant(long daysFromEpoch) { + return new DataFusionDateConstant(daysFromEpoch); + } + + public static Node createTimestampConstant(long secondsSinceEpoch) { + return new DataFusionTimestampConstant(secondsSinceEpoch); + } + + public static Node createTimeConstant(long secondsInDay) { + return new DataFusionTimeConstant(secondsInDay); + } + public static class DataFusionNullConstant extends DataFusionConstant { @Override @@ -134,4 +146,144 @@ public String toString() { } } + + public static class DataFusionDateConstant extends DataFusionConstant { + private final String value; + + public DataFusionDateConstant(long daysFromEpoch) { + // Convert days from epoch to date string + // Epoch is 1970-01-01, so we add/subtract days + long totalDays = daysFromEpoch; + + // Simple date calculation (not accounting for all edge cases, but good enough for fuzzing) + // Start from 1970-01-01 + int year = 1970; + int month = 1; + int day = 1; + + // Approximate calculation for fuzzing purposes + if (totalDays >= 0) { + year += (int) (totalDays / 365); + totalDays = totalDays % 365; + month = (int) (totalDays / 30) + 1; + day = (int) (totalDays % 30) + 1; + } else { + totalDays = -totalDays; + year -= (int) (totalDays / 365); + totalDays = totalDays % 365; + month = (int) (totalDays / 30) + 1; + day = (int) (totalDays % 30) + 1; + } + + // Keep values in valid ranges + if (year < 1) { + year = 1; + } + if (year > 9999) { + year = 9999; + } + if (month < 1) { + month = 1; + } + if (month > 12) { + month = 12; + } + if (day < 1) { + day = 1; + } + if (day > 28) { + day = 28; // Safe for all months + } + + this.value = String.format("%04d-%02d-%02d", year, month, day); + } + + @Override + public String toString() { + return "DATE '" + value + "'"; + } + } + + public static class DataFusionTimestampConstant extends DataFusionConstant { + private final String value; + + public DataFusionTimestampConstant(long secondsSinceEpoch) { + // Convert seconds to timestamp string YYYY-MM-DD HH:MM:SS + // Simple approximation for fuzzing + long totalSeconds = Math.abs(secondsSinceEpoch); + long seconds = totalSeconds % 60; + long totalMinutes = totalSeconds / 60; + long minutes = totalMinutes % 60; + long totalHours = totalMinutes / 60; + long hours = totalHours % 24; + long totalDays = totalHours / 24; + + // Calculate date from days (approximate) + int year = 1970; + int month = 1; + int day = 1; + + if (secondsSinceEpoch >= 0) { + year += (int) (totalDays / 365); + totalDays = totalDays % 365; + month = (int) (totalDays / 30) + 1; + day = (int) (totalDays % 30) + 1; + } else { + year -= (int) (totalDays / 365); + totalDays = totalDays % 365; + month = (int) (totalDays / 30) + 1; + day = (int) (totalDays % 30) + 1; + } + + // Keep in valid ranges + if (year < 1) { + year = 1; + } + if (year > 9999) { + year = 9999; + } + if (month < 1) { + month = 1; + } + if (month > 12) { + month = 12; + } + if (day < 1) { + day = 1; + } + if (day > 28) { + day = 28; + } + + this.value = String.format("%04d-%02d-%02d %02d:%02d:%02d", year, month, day, hours, minutes, seconds); + } + + @Override + public String toString() { + return "TIMESTAMP '" + value + "'"; + } + } + + public static class DataFusionTimeConstant extends DataFusionConstant { + private final String value; + + public DataFusionTimeConstant(long secondsInDay) { + // Convert seconds to HH:MM:SS + long totalSeconds = secondsInDay % 86400; // Ensure within a day + if (totalSeconds < 0) { + totalSeconds += 86400; + } + + long hours = totalSeconds / 3600; + long minutes = (totalSeconds % 3600) / 60; + long seconds = totalSeconds % 60; + + this.value = String.format("%02d:%02d:%02d", hours, minutes, seconds); + } + + @Override + public String toString() { + return "TIME '" + value + "'"; + } + } } diff --git a/src/sqlancer/datafusion/gen/DataFusionBaseExpr.java b/src/sqlancer/datafusion/gen/DataFusionBaseExpr.java index 59f83055..1fba1b52 100644 --- a/src/sqlancer/datafusion/gen/DataFusionBaseExpr.java +++ b/src/sqlancer/datafusion/gen/DataFusionBaseExpr.java @@ -303,6 +303,31 @@ public enum DataFusionBaseExprType { FUNC_REGEXP_REPLACE2, // regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i') // Time and Date Functions + // Date/Time Construction + FUNC_CURRENT_DATE, // current_date() + FUNC_CURRENT_TIME, // current_time() + FUNC_NOW, // now() + FUNC_TODAY, // today() + FUNC_MAKE_DATE, // make_date(year, month, day) + // Date/Time Conversion + FUNC_TO_DATE1, // to_date(expression) + FUNC_TO_DATE2, // to_date(expression, format) + FUNC_TO_TIMESTAMP1, // to_timestamp(expression) + FUNC_TO_TIMESTAMP2, // to_timestamp(expression, format) + FUNC_TO_TIMESTAMP_SECONDS1, // to_timestamp_seconds(expression) + FUNC_TO_TIMESTAMP_SECONDS2, // to_timestamp_seconds(expression, format) + FUNC_TO_TIMESTAMP_MILLIS1, // to_timestamp_millis(expression) + FUNC_TO_TIMESTAMP_MILLIS2, // to_timestamp_millis(expression, format) + FUNC_TO_TIMESTAMP_MICROS1, // to_timestamp_micros(expression) + FUNC_TO_TIMESTAMP_MICROS2, // to_timestamp_micros(expression, format) + FUNC_TO_TIMESTAMP_NANOS1, // to_timestamp_nanos(expression) + FUNC_TO_TIMESTAMP_NANOS2, // to_timestamp_nanos(expression, format) + FUNC_FROM_UNIXTIME, // from_unixtime(expression) + // Date/Time Extraction & Manipulation + FUNC_DATE_PART, // date_part('year', timestamp) + FUNC_DATE_TRUNC, // date_trunc('day', timestamp) + FUNC_DATE_BIN, // date_bin(interval '1 day', timestamp, origin) + FUNC_TO_CHAR, // to_char(timestamp, format) // Array Functions diff --git a/src/sqlancer/datafusion/gen/DataFusionBaseExprFactory.java b/src/sqlancer/datafusion/gen/DataFusionBaseExprFactory.java index 7151c308..17761044 100644 --- a/src/sqlancer/datafusion/gen/DataFusionBaseExprFactory.java +++ b/src/sqlancer/datafusion/gen/DataFusionBaseExprFactory.java @@ -26,13 +26,15 @@ public static DataFusionBaseExpr createExpr(DataFusionBaseExprType type) { Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList(new ArgumentType.Fixed( new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BOOLEAN, - DataFusionDataType.DOUBLE, DataFusionDataType.BIGINT, DataFusionDataType.NULL))))); + DataFusionDataType.DOUBLE, DataFusionDataType.BIGINT, DataFusionDataType.NULL, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))))); case IS_NOT_NULL: return new DataFusionBaseExpr("IS NOT NULL", 1, DataFusionBaseExprCategory.UNARY_POSTFIX, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList(new ArgumentType.Fixed( new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BOOLEAN, - DataFusionDataType.DOUBLE, DataFusionDataType.BIGINT, DataFusionDataType.NULL))))); + DataFusionDataType.DOUBLE, DataFusionDataType.BIGINT, DataFusionDataType.NULL, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))))); case BITWISE_AND: return new DataFusionBaseExpr("&", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE), @@ -114,63 +116,72 @@ public static DataFusionBaseExpr createExpr(DataFusionBaseExprType type) { Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case EQUAL2: return new DataFusionBaseExpr("==", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case NOT_EQUAL: return new DataFusionBaseExpr("!=", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case LESS_THAN: return new DataFusionBaseExpr("<", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case LESS_THAN_OR_EQUAL_TO: return new DataFusionBaseExpr("<=", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case GREATER_THAN: return new DataFusionBaseExpr(">", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case GREATER_THAN_OR_EQUAL_TO: return new DataFusionBaseExpr(">=", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case IS_DISTINCT_FROM: return new DataFusionBaseExpr("IS DISTINCT FROM", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); case IS_NOT_DISTINCT_FROM: return new DataFusionBaseExpr("IS NOT DISTINCT FROM", 2, DataFusionBaseExprCategory.BINARY, Arrays.asList(DataFusionDataType.BOOLEAN), Arrays.asList( new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING, - DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN))), + DataFusionDataType.BIGINT, DataFusionDataType.DOUBLE, DataFusionDataType.BOOLEAN, + DataFusionDataType.DATE, DataFusionDataType.TIMESTAMP, DataFusionDataType.TIME))), new ArgumentType.SameAsFirstArgType())); // String related operators case LIKE: @@ -595,6 +606,129 @@ public static DataFusionBaseExpr createExpr(DataFusionBaseExprType type) { Arrays.asList(new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + + // Date/Time Functions - Construction + case FUNC_CURRENT_DATE: + return new DataFusionBaseExpr("CURRENT_DATE", 0, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.DATE), Arrays.asList()); + case FUNC_CURRENT_TIME: + return new DataFusionBaseExpr("CURRENT_TIME", 0, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIME), Arrays.asList()); + case FUNC_NOW: + return new DataFusionBaseExpr("NOW", 0, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), Arrays.asList()); + case FUNC_TODAY: + return new DataFusionBaseExpr("TODAY", 0, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.DATE), Arrays.asList()); + case FUNC_MAKE_DATE: + return new DataFusionBaseExpr("MAKE_DATE", 3, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.DATE), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.BIGINT))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.BIGINT))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.BIGINT))))); + + // Date/Time Functions - Conversion + case FUNC_TO_DATE1: + return new DataFusionBaseExpr("TO_DATE", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.DATE), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_DATE2: + return new DataFusionBaseExpr("TO_DATE", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.DATE), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_TO_TIMESTAMP1: + return new DataFusionBaseExpr("TO_TIMESTAMP", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_TIMESTAMP2: + return new DataFusionBaseExpr("TO_TIMESTAMP", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_TO_TIMESTAMP_SECONDS1: + return new DataFusionBaseExpr("TO_TIMESTAMP_SECONDS", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_TIMESTAMP_SECONDS2: + return new DataFusionBaseExpr("TO_TIMESTAMP_SECONDS", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_TO_TIMESTAMP_MILLIS1: + return new DataFusionBaseExpr("TO_TIMESTAMP_MILLIS", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_TIMESTAMP_MILLIS2: + return new DataFusionBaseExpr("TO_TIMESTAMP_MILLIS", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_TO_TIMESTAMP_MICROS1: + return new DataFusionBaseExpr("TO_TIMESTAMP_MICROS", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_TIMESTAMP_MICROS2: + return new DataFusionBaseExpr("TO_TIMESTAMP_MICROS", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_TO_TIMESTAMP_NANOS1: + return new DataFusionBaseExpr("TO_TIMESTAMP_NANOS", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.STRING, DataFusionDataType.BIGINT))))); + case FUNC_TO_TIMESTAMP_NANOS2: + return new DataFusionBaseExpr("TO_TIMESTAMP_NANOS", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case FUNC_FROM_UNIXTIME: + return new DataFusionBaseExpr("FROM_UNIXTIME", 1, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList(new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.BIGINT))))); + + // Date/Time Functions - Extraction & Manipulation + case FUNC_DATE_PART: + return new DataFusionBaseExpr("DATE_PART", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.BIGINT), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.TIMESTAMP, DataFusionDataType.DATE))))); + case FUNC_DATE_TRUNC: + return new DataFusionBaseExpr("DATE_TRUNC", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.TIMESTAMP))))); + case FUNC_DATE_BIN: + return new DataFusionBaseExpr("DATE_BIN", 3, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.TIMESTAMP), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.TIMESTAMP))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.TIMESTAMP))))); + case FUNC_TO_CHAR: + return new DataFusionBaseExpr("TO_CHAR", 2, DataFusionBaseExprCategory.FUNC, + Arrays.asList(DataFusionDataType.STRING), + Arrays.asList( + new ArgumentType.Fixed(new ArrayList<>( + Arrays.asList(DataFusionDataType.TIMESTAMP, DataFusionDataType.DATE, DataFusionDataType.TIME))), + new ArgumentType.Fixed(new ArrayList<>(Arrays.asList(DataFusionDataType.STRING))))); + case AGGR_MIN: return DataFusionBaseExpr.createCommonNumericAggrFuncSingleArg("MIN"); case AGGR_MAX: @@ -796,6 +930,12 @@ public static List getExprsWithReturnType(Optional exprsWithReturnType = allExpressions.stream() .filter(expr -> expr.possibleReturnTypes.contains(filterType)).collect(Collectors.toList()); + // For temporal types, we need to keep FUNC expressions since they're the only way to generate + // temporal values (we don't have temporal operators yet) + if (filterType.isTemporal()) { + return exprsWithReturnType; + } + if (Randomly.getBoolean()) { // Too many similar function, so test them less often return exprsWithReturnType;