Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Notes for query generation:
- String scalar functions/expression operators
- Aggregate functions, `HAVING` clause
- Window functions
- (TODO) Time related data type functions
- Time-related data type functions
- (TODO) Subquery
- (TODO) Queries from parquet, csv
- (TODO) Exploit different configurations (change config knobs like `target_partition`, `prefer_hash_join` etc.
Expand All @@ -104,7 +104,7 @@ Note: most oracles only apply to a subset of available query types, for advanced
More context for below test oracles at https://github.com/sqlancer/sqlancer/tree/main
- NoREC
- TLP
- (TODO) PQS
- PQS
- (TODO) DQP for logical bugs in joins
- (TODO) [EET](https://www.usenix.org/conference/osdi24/presentation/jiang#:~:text=To%20find%20logic%20bugs%20in,is%20independent%20of%20query%20patterns.) for logic bugs in joins and subqueries
# Bug Report
Expand Down
4 changes: 4 additions & 0 deletions src/sqlancer/datafusion/DataFusionErrors.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ public static void registerExpectedExecutionErrors(ExpectedErrors errors) {
errors.add("This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal");
errors.add(
"This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal");
errors.add("Error parsing timestamp");
errors.add("Unable to cast to Date32 for converting from i64 to i32");
errors.add("Invalid input syntax for type interval");
errors.add("This feature is not implemented: DATE_BIN only supports literal values for the origin argument, not arrays");

/*
* Known bugs
Expand Down
40 changes: 39 additions & 1 deletion src/sqlancer/datafusion/DataFusionSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ private static List<DataFusionColumn> getTableColumns(SQLConnection con, String
*/
public enum DataFusionDataType {

STRING, BIGINT, DOUBLE, BOOLEAN, NULL;
STRING, BIGINT, DOUBLE, BOOLEAN, NULL, DATE, TIMESTAMP, TIME;

public static DataFusionDataType getRandomWithoutNull() {
DataFusionDataType dt;
Expand All @@ -129,6 +129,10 @@ public boolean isNumeric() {
return this == BIGINT || this == DOUBLE;
}

public boolean isTemporal() {
return this == DATE || this == TIMESTAMP || this == TIME;
}

// How to parse type in DataFusion's catalog to `DataFusionDataType`
// As displayed in:
// create table t1(v1 int, v2 bigint);
Expand All @@ -145,7 +149,15 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
return DataFusionDataType.STRING;
case "Utf8View":
return DataFusionDataType.STRING;
case "Date32":
return DataFusionDataType.DATE;
case "Time64(ns)":
return DataFusionDataType.TIME;
default:
// Handle Timestamp variants with timezone info
if (typeString.startsWith("Timestamp(")) {
return DataFusionDataType.TIMESTAMP;
}
dfAssert(false, "Uncovered branch typeString: " + typeString);
}

Expand Down Expand Up @@ -182,6 +194,32 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
return DataFusionConstant.createNullConstant();
case STRING:
return new DataFusionConstant.DataFusionStringConstant(state.getRandomly().getString());
case DATE:
// Generate dates in a reasonable range: -10000 to 10000 days from epoch (1970-01-01)
long daysFromEpoch = state.getRandomly().getInteger(-10000, 10000);
return DataFusionConstant.createDateConstant(daysFromEpoch);
case TIMESTAMP:
// Generate timestamps: random value or specific edge cases
if (Randomly.getBooleanWithSmallProbability()) {
// Edge cases: very old, very new, or around epoch
long edgeTimestamp = Randomly.fromOptions(
0L, // Unix epoch
-2208988800L, // 1900-01-01
946684800L, // 2000-01-01
1577836800L, // 2020-01-01
253402300799L // 9999-12-31
);
return DataFusionConstant.createTimestampConstant(edgeTimestamp);
}
// Random timestamp in reasonable range (use days as base, then convert to seconds)
// Generate days from epoch: -10000 to 20000 days, then multiply by seconds per day
long randomDays = state.getRandomly().getInteger(-10000, 20000);
long randomTimestamp = randomDays * 86400; // Convert to seconds
return DataFusionConstant.createTimestampConstant(randomTimestamp);
case TIME:
// Generate time values (0 to 86400 seconds in a day)
long secondsInDay = state.getRandomly().getInteger(0, 86400);
return DataFusionConstant.createTimeConstant(secondsInDay);
default:
dfAssert(false, "Unreachable. All branches should be eovered");
}
Expand Down
152 changes: 152 additions & 0 deletions src/sqlancer/datafusion/ast/DataFusionConstant.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ public static Node<DataFusionExpression> createNullConstant() {
return new DataFusionNullConstant();
}

public static Node<DataFusionExpression> createDateConstant(long daysFromEpoch) {
return new DataFusionDateConstant(daysFromEpoch);
}

public static Node<DataFusionExpression> createTimestampConstant(long secondsSinceEpoch) {
return new DataFusionTimestampConstant(secondsSinceEpoch);
}

public static Node<DataFusionExpression> createTimeConstant(long secondsInDay) {
return new DataFusionTimeConstant(secondsInDay);
}

public static class DataFusionNullConstant extends DataFusionConstant {

@Override
Expand Down Expand Up @@ -134,4 +146,144 @@ public String toString() {
}

}

public static class DataFusionDateConstant extends DataFusionConstant {
private final String value;

public DataFusionDateConstant(long daysFromEpoch) {
// Convert days from epoch to date string
// Epoch is 1970-01-01, so we add/subtract days
long totalDays = daysFromEpoch;

// Simple date calculation (not accounting for all edge cases, but good enough for fuzzing)
// Start from 1970-01-01
int year = 1970;
int month = 1;
int day = 1;

// Approximate calculation for fuzzing purposes
if (totalDays >= 0) {
year += (int) (totalDays / 365);
totalDays = totalDays % 365;
month = (int) (totalDays / 30) + 1;
day = (int) (totalDays % 30) + 1;
} else {
totalDays = -totalDays;
year -= (int) (totalDays / 365);
totalDays = totalDays % 365;
month = (int) (totalDays / 30) + 1;
day = (int) (totalDays % 30) + 1;
}

// Keep values in valid ranges
if (year < 1) {
year = 1;
}
if (year > 9999) {
year = 9999;
}
if (month < 1) {
month = 1;
}
if (month > 12) {
month = 12;
}
if (day < 1) {
day = 1;
}
if (day > 28) {
day = 28; // Safe for all months
}

this.value = String.format("%04d-%02d-%02d", year, month, day);
}

@Override
public String toString() {
return "DATE '" + value + "'";
}
}

public static class DataFusionTimestampConstant extends DataFusionConstant {
private final String value;

public DataFusionTimestampConstant(long secondsSinceEpoch) {
// Convert seconds to timestamp string YYYY-MM-DD HH:MM:SS
// Simple approximation for fuzzing
long totalSeconds = Math.abs(secondsSinceEpoch);
long seconds = totalSeconds % 60;
long totalMinutes = totalSeconds / 60;
long minutes = totalMinutes % 60;
long totalHours = totalMinutes / 60;
long hours = totalHours % 24;
long totalDays = totalHours / 24;

// Calculate date from days (approximate)
int year = 1970;
int month = 1;
int day = 1;

if (secondsSinceEpoch >= 0) {
year += (int) (totalDays / 365);
totalDays = totalDays % 365;
month = (int) (totalDays / 30) + 1;
day = (int) (totalDays % 30) + 1;
} else {
year -= (int) (totalDays / 365);
totalDays = totalDays % 365;
month = (int) (totalDays / 30) + 1;
day = (int) (totalDays % 30) + 1;
}

// Keep in valid ranges
if (year < 1) {
year = 1;
}
if (year > 9999) {
year = 9999;
}
if (month < 1) {
month = 1;
}
if (month > 12) {
month = 12;
}
if (day < 1) {
day = 1;
}
if (day > 28) {
day = 28;
}

this.value = String.format("%04d-%02d-%02d %02d:%02d:%02d", year, month, day, hours, minutes, seconds);
}

@Override
public String toString() {
return "TIMESTAMP '" + value + "'";
}
}

public static class DataFusionTimeConstant extends DataFusionConstant {
private final String value;

public DataFusionTimeConstant(long secondsInDay) {
// Convert seconds to HH:MM:SS
long totalSeconds = secondsInDay % 86400; // Ensure within a day
if (totalSeconds < 0) {
totalSeconds += 86400;
}

long hours = totalSeconds / 3600;
long minutes = (totalSeconds % 3600) / 60;
long seconds = totalSeconds % 60;

this.value = String.format("%02d:%02d:%02d", hours, minutes, seconds);
}

@Override
public String toString() {
return "TIME '" + value + "'";
}
}
}
25 changes: 25 additions & 0 deletions src/sqlancer/datafusion/gen/DataFusionBaseExpr.java
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,31 @@ public enum DataFusionBaseExprType {
FUNC_REGEXP_REPLACE2, // regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i')

// Time and Date Functions
// Date/Time Construction
FUNC_CURRENT_DATE, // current_date()
FUNC_CURRENT_TIME, // current_time()
FUNC_NOW, // now()
FUNC_TODAY, // today()
FUNC_MAKE_DATE, // make_date(year, month, day)
// Date/Time Conversion
FUNC_TO_DATE1, // to_date(expression)
FUNC_TO_DATE2, // to_date(expression, format)
FUNC_TO_TIMESTAMP1, // to_timestamp(expression)
FUNC_TO_TIMESTAMP2, // to_timestamp(expression, format)
FUNC_TO_TIMESTAMP_SECONDS1, // to_timestamp_seconds(expression)
FUNC_TO_TIMESTAMP_SECONDS2, // to_timestamp_seconds(expression, format)
FUNC_TO_TIMESTAMP_MILLIS1, // to_timestamp_millis(expression)
FUNC_TO_TIMESTAMP_MILLIS2, // to_timestamp_millis(expression, format)
FUNC_TO_TIMESTAMP_MICROS1, // to_timestamp_micros(expression)
FUNC_TO_TIMESTAMP_MICROS2, // to_timestamp_micros(expression, format)
FUNC_TO_TIMESTAMP_NANOS1, // to_timestamp_nanos(expression)
FUNC_TO_TIMESTAMP_NANOS2, // to_timestamp_nanos(expression, format)
FUNC_FROM_UNIXTIME, // from_unixtime(expression)
// Date/Time Extraction & Manipulation
FUNC_DATE_PART, // date_part('year', timestamp)
FUNC_DATE_TRUNC, // date_trunc('day', timestamp)
FUNC_DATE_BIN, // date_bin(interval '1 day', timestamp, origin)
FUNC_TO_CHAR, // to_char(timestamp, format)

// Array Functions

Expand Down
Loading