From 1734d3f540e3ae83d84d4a498a6e5ff594aee288 Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Tue, 30 Dec 2025 00:26:38 +0800 Subject: [PATCH] [feat](olap) Support lazy reading mode for pruned complex columns --- be/src/olap/olap_common.h | 1 + .../olap/rowset/segment_v2/column_reader.cpp | 360 +++++++++++----- be/src/olap/rowset/segment_v2/column_reader.h | 137 +++++- .../rowset/segment_v2/segment_iterator.cpp | 60 ++- .../olap/rowset/segment_v2/segment_iterator.h | 6 +- be/src/pipeline/exec/olap_scan_operator.cpp | 2 + be/src/pipeline/exec/olap_scan_operator.h | 1 + be/src/runtime/runtime_state.h | 5 + be/src/vec/exec/scan/olap_scanner.cpp | 1 + .../org/apache/doris/qe/SessionVariable.java | 2 + gensrc/thrift/PaloInternalService.thrift | 2 + .../complex_types/test_pruned_columns.out | 395 +++++++++++++++--- .../complex_types/test_pruned_columns.groovy | 179 ++++++-- 13 files changed, 967 insertions(+), 184 deletions(-) diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 7c28084624a66d..2a660a11d6b704 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -322,6 +322,7 @@ struct OlapReaderStatistics { int64_t lazy_read_ns = 0; int64_t block_lazy_read_seek_num = 0; int64_t block_lazy_read_seek_ns = 0; + int64_t lazy_read_pruned_ns = 0; int64_t raw_rows_read = 0; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index fa5299df2f0030..78272d468280b0 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -940,8 +940,30 @@ Status ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator, return Status::OK(); } -Result ColumnIterator::_get_sub_access_paths( - const TColumnAccessPaths& access_paths) { +void ColumnIterator::_convert_to_place_holder_column(vectorized::MutableColumnPtr& dst, + size_t count) { + if (_reading_flag != ReadingFlag::SKIP_READING && _reading_mode == ReadingMode::PREDICATE) { + if (!_place_holder_columns.contains(dst.get())) { + _place_holder_columns.emplace(dst.get()); + } + } + + if (_reading_mode != ReadingMode::LAZY) { + dst->insert_many_defaults(count); + } +} + +void ColumnIterator::_recovery_from_place_holder_column(vectorized::MutableColumnPtr& dst) { + if (_reading_mode == ReadingMode::LAZY) { + if (_place_holder_columns.contains(dst.get())) { + dst->clear(); + _place_holder_columns.erase(dst.get()); + } + } +} + +Result ColumnIterator::_process_sub_access_paths( + const TColumnAccessPaths& access_paths, const bool is_predicate) { TColumnAccessPaths sub_access_paths = access_paths; for (auto it = sub_access_paths.begin(); it != sub_access_paths.end();) { TColumnAccessPath& name_path = *it; @@ -960,7 +982,11 @@ Result ColumnIterator::_get_sub_access_paths( if (!name_path.data_access_path.path.empty()) { ++it; } else { - set_need_to_read(); + if (is_predicate) { + _reading_flag = ReadingFlag::READING_FOR_PREDICATE; + } else { + set_need_to_read(); + } it = sub_access_paths.erase(it); } } @@ -997,7 +1023,7 @@ Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { } Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1016,16 +1042,22 @@ Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); auto column_offsets_ptr = column_map.get_offsets_column().assume_mutable(); + if (!need_to_read_meta_columns()) { + column_offsets_ptr = vectorized::ColumnMap::COffsets::create(); + } + bool offsets_has_null = false; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offsets_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); @@ -1053,7 +1085,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr column_map.get_values_ptr() = std::move(val_ptr); } - if (dst->is_nullable()) { + if (dst->is_nullable() && need_to_read_meta_columns()) { size_t num_read = *n; auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); @@ -1077,16 +1109,20 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + if (!need_to_read()) { + DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; + _convert_to_place_holder_column(dst, count); return Status::OK(); } + + _recovery_from_place_holder_column(dst); + if (count == 0) { return Status::OK(); } + // resolve ColumnMap and nullable wrapper - const auto* column_map = vectorized::check_and_get_column( + auto* column_map = vectorized::check_and_get_column( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); auto offsets_ptr = column_map->get_offsets_column().assume_mutable(); @@ -1095,31 +1131,51 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t // 1. bulk read null-map if nullable std::vector null_mask; // 0: not null, 1: null - if (_map_reader->is_nullable()) { - // For nullable map columns, the destination column must also be nullable. - if (UNLIKELY(!dst->is_nullable())) { - return Status::InternalError( - "unexpected non-nullable destination column for nullable map reader"); + if (need_to_read_meta_columns()) { + if (_map_reader->is_nullable()) { + // For nullable map columns, the destination column must also be nullable. + if (UNLIKELY(!dst->is_nullable())) { + return Status::InternalError( + "unexpected non-nullable destination column for nullable map reader"); + } + auto null_map_ptr = + static_cast(*dst).get_null_map_column_ptr(); + size_t null_before = null_map_ptr->size(); + RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, null_map_ptr)); + // extract a light-weight view to decide element reads + auto& null_map_col = assert_cast(*null_map_ptr); + null_mask.reserve(count); + for (size_t i = 0; i < count; ++i) { + null_mask.push_back(null_map_col.get_element(null_before + i)); + } + } else if (dst->is_nullable()) { + // in not-null to null linked-schemachange mode, + // actually we do not change dat data include meta in footer, + // so may dst from changed meta which is nullable but old data is not nullable, + // if so, we should set null_map to all null by default + auto null_map_ptr = + static_cast(*dst).get_null_map_column_ptr(); + auto& null_map = assert_cast(*null_map_ptr); + null_map.insert_many_vals(0, count); } - auto null_map_ptr = - static_cast(*dst).get_null_map_column_ptr(); - size_t null_before = null_map_ptr->size(); - RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, null_map_ptr)); - // extract a light-weight view to decide element reads - auto& null_map_col = assert_cast(*null_map_ptr); - null_mask.reserve(count); - for (size_t i = 0; i < count; ++i) { - null_mask.push_back(null_map_col.get_element(null_before + i)); + } else { + if (_map_reader->is_nullable()) { + // For nullable map columns, the destination column must also be nullable. + if (UNLIKELY(!dst->is_nullable())) { + return Status::InternalError( + "unexpected non-nullable destination column for nullable map reader"); + } + + vectorized::MutableColumnPtr null_map_ptr = + vectorized::ColumnVector::create(); + RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, null_map_ptr)); + + auto& null_map_col = assert_cast(*null_map_ptr); + null_mask.reserve(count); + for (size_t i = 0; i < count; ++i) { + null_mask.push_back(null_map_col.get_element(i)); + } } - } else if (dst->is_nullable()) { - // in not-null to null linked-schemachange mode, - // actually we do not change dat data include meta in footer, - // so may dst from changed meta which is nullable but old data is not nullable, - // if so, we should set null_map to all null by default - auto null_map_ptr = - static_cast(*dst).get_null_map_column_ptr(); - auto& null_map = assert_cast(*null_map_ptr); - null_map.insert_many_vals(0, count); } // 2. bulk read start ordinals for requested rows @@ -1162,16 +1218,19 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t auto& next_starts_data = assert_cast(*next_starts_col).get_data(); std::vector sizes(count, 0); size_t acc = base; - const auto original_size = offsets.get_data().back(); - offsets.get_data().reserve(offsets.get_data().size() + count); + if (need_to_read_meta_columns()) { + offsets.get_data().reserve(offsets.get_data().size() + count); + } for (size_t i = 0; i < count; ++i) { - size_t sz = static_cast(next_starts_data[i] - starts_data[i]); + auto sz = static_cast(next_starts_data[i] - starts_data[i]); if (_map_reader->is_nullable() && !null_mask.empty() && null_mask[i]) { sz = 0; // null rows do not consume elements } sizes[i] = sz; acc += sz; - offsets.get_data().push_back(acc); + if (need_to_read_meta_columns()) { + offsets.get_data().push_back(acc); + } } // 6. read key/value elements for non-empty sizes @@ -1192,18 +1251,14 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t bool dummy_has_null = false; if (this_run != 0) { - if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - - if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - n = this_run; - RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); + + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); } start_idx = start; this_run = sz; @@ -1216,29 +1271,19 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t } size_t n = this_run; - const size_t total_count = offsets.get_data().back() - original_size; bool dummy_has_null = false; - if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - if (this_run != 0) { - RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - } else { - keys_ptr->insert_many_defaults(total_count); - } - - if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - if (this_run != 0) { - n = this_run; - RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - } else { - vals_ptr->insert_many_defaults(total_count); - } - + if (this_run != 0) { + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); + + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); + } + column_map->get_keys_ptr() = std::move(keys_ptr); + column_map->get_values_ptr() = std::move(vals_ptr); return Status::OK(); } @@ -1265,8 +1310,18 @@ Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_acc << " to READING_FOR_PREDICATE"; } - auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); + _pruned = true; + + auto sub_all_access_paths = DORIS_TRY(_process_sub_access_paths(all_access_paths, false)); + auto sub_predicate_access_paths = + DORIS_TRY(_process_sub_access_paths(predicate_access_paths, true)); + + if (sub_predicate_access_paths.empty() && _reading_flag == ReadingFlag::READING_FOR_PREDICATE) { + // if no sub-column in predicate_access_paths, but current column is READING_FOR_PREDICATE, + // then we should set key/value iterator to READING_FOR_PREDICATE too. + _key_iterator->set_reading_flag_recursively(ReadingFlag::READING_FOR_PREDICATE); + _val_iterator->set_reading_flag_recursively(ReadingFlag::READING_FOR_PREDICATE); + } if (sub_all_access_paths.empty()) { return Status::OK(); @@ -1329,6 +1384,31 @@ Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_acc return Status::OK(); } +void MapFileColumnIterator::set_reading_mode(ReadingMode mode) { + ColumnIterator::set_reading_mode(mode); + _key_iterator->set_reading_mode(mode); + _val_iterator->set_reading_mode(mode); +} + +void MapFileColumnIterator::finalize_lazy_mode(vectorized::MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto& map_column = assert_cast( + dst->is_nullable() ? static_cast(*dst).get_nested_column() + : *dst); + auto keys_ptr = map_column.get_keys().assume_mutable(); + auto vals_ptr = map_column.get_values().assume_mutable(); + _key_iterator->finalize_lazy_mode(keys_ptr); + _val_iterator->finalize_lazy_mode(vals_ptr); + map_column.get_keys_ptr() = std::move(keys_ptr); + map_column.get_values_ptr() = std::move(vals_ptr); +} + +void MapFileColumnIterator::set_reading_flag_recursively(ReadingFlag flag) { + set_reading_flag(flag); + _key_iterator->set_reading_flag_recursively(flag); + _val_iterator->set_reading_flag_recursively(flag); +} + //////////////////////////////////////////////////////////////////////////////// StructFileColumnIterator::StructFileColumnIterator( @@ -1357,12 +1437,14 @@ Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) { Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + auto& column_struct = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); @@ -1376,7 +1458,7 @@ Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumn column_struct.get_column_ptr(i) = std::move(sub_column_ptr); } - if (dst->is_nullable()) { + if (dst->is_nullable() && need_to_read_meta_columns()) { size_t num_read = *n; auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); @@ -1400,7 +1482,7 @@ Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumn } Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1408,7 +1490,8 @@ Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { for (auto& column_iterator : _sub_column_iterators) { RETURN_IF_ERROR(column_iterator->seek_to_ordinal(ord)); } - if (_struct_reader->is_nullable()) { + + if (_struct_reader->is_nullable() && need_to_read_meta_columns()) { RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord)); } return Status::OK(); @@ -1416,12 +1499,14 @@ Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (count == 0) { return Status::OK(); } @@ -1485,8 +1570,12 @@ Status StructFileColumnIterator::set_access_paths( DLOG(INFO) << "Struct column iterator set sub-column " << _column_name << " to READING_FOR_PREDICATE"; } - auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); + + _pruned = true; + + auto sub_all_access_paths = DORIS_TRY(_process_sub_access_paths(all_access_paths, false)); + auto sub_predicate_access_paths = + DORIS_TRY(_process_sub_access_paths(predicate_access_paths, true)); const auto no_sub_column_to_skip = sub_all_access_paths.empty(); const auto no_predicate_sub_column = sub_predicate_access_paths.empty(); @@ -1521,6 +1610,10 @@ Status StructFileColumnIterator::set_access_paths( sub_predicate_access_paths_of_this.emplace_back(paths); } } + } else if (_reading_flag == ReadingFlag::READING_FOR_PREDICATE) { + // if no sub-column in predicate_access_paths, but current column is READING_FOR_PREDICATE, + // then we should set sub iterator to READING_FOR_PREDICATE too. + sub_iterator->set_reading_flag_recursively(ReadingFlag::READING_FOR_PREDICATE); } RETURN_IF_ERROR(sub_iterator->set_access_paths(sub_all_access_paths_of_this, @@ -1529,6 +1622,34 @@ Status StructFileColumnIterator::set_access_paths( return Status::OK(); } +void StructFileColumnIterator::set_reading_mode(ReadingMode mode) { + ColumnIterator::set_reading_mode(mode); + for (auto& sub_iterator : _sub_column_iterators) { + sub_iterator->set_reading_mode(mode); + } +} + +void StructFileColumnIterator::finalize_lazy_mode(vectorized::MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto& column_struct = assert_cast( + dst->is_nullable() ? static_cast(*dst).get_nested_column() + : *dst); + + for (size_t i = 0; i < _sub_column_iterators.size(); ++i) { + auto& sub_column = column_struct.get_column_ptr(i); + vectorized::MutableColumnPtr mutable_sub_column = sub_column->assume_mutable(); + _sub_column_iterators[i]->finalize_lazy_mode(mutable_sub_column); + sub_column = std::move(mutable_sub_column); + } +} + +void StructFileColumnIterator::set_reading_flag_recursively(ReadingFlag flag) { + set_reading_flag(flag); + for (const auto& sub_column_iterator : _sub_column_iterators) { + sub_column_iterator->set_reading_flag_recursively(flag); + } +} + //////////////////////////////////////////////////////////////////////////////// Status OffsetFileColumnIterator::init(const ColumnIteratorOptions& opts) { RETURN_IF_ERROR(_offset_iterator->init(opts)); @@ -1626,7 +1747,7 @@ Status ArrayFileColumnIterator::_seek_by_offsets(ordinal_t ord) { } Status ArrayFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1640,18 +1761,26 @@ Status ArrayFileColumnIterator::seek_to_ordinal(ordinal_t ord) { Status ArrayFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + if (!need_to_read()) { + DLOG(INFO) << "Array column iterator column " << _column_name + << " skip reading, reading mode" << static_cast(_reading_mode) + << ", reading flag: " << static_cast(_reading_flag); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } - const auto* column_array = vectorized::check_and_get_column( + _recovery_from_place_holder_column(dst); + + auto* column_array = vectorized::check_and_get_column( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); bool offsets_has_null = false; auto column_offsets_ptr = column_array->get_offsets_column().assume_mutable(); + if (!need_to_read_meta_columns()) { + column_offsets_ptr = vectorized::ColumnArray::ColumnOffsets::create(); + } + ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offset_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); if (*n == 0) { @@ -1670,7 +1799,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnP DCHECK(num_read == num_items); } - if (dst->is_nullable()) { + if (dst->is_nullable() && need_to_read_meta_columns()) { auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); size_t num_read = *n; @@ -1690,17 +1819,21 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnP DCHECK(num_read == *n); } + column_array->get_data_ptr() = std::move(column_items_ptr); + return Status::OK(); } Status ArrayFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + for (size_t i = 0; i < count; ++i) { // TODO(cambyzju): now read array one by one, need optimize later RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); @@ -1720,6 +1853,26 @@ void ArrayFileColumnIterator::remove_pruned_sub_iterators() { _item_iterator->remove_pruned_sub_iterators(); } +void ArrayFileColumnIterator::set_reading_mode(ReadingMode mode) { + ColumnIterator::set_reading_mode(mode); + _item_iterator->set_reading_mode(mode); +} + +void ArrayFileColumnIterator::finalize_lazy_mode(vectorized::MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto* column_array = vectorized::check_and_get_column( + dst->is_nullable() ? static_cast(*dst).get_nested_column() + : *dst); + auto item_column_ptr = column_array->get_data().assume_mutable(); + _item_iterator->finalize_lazy_mode(item_column_ptr); + column_array->get_data_ptr() = std::move(item_column_ptr); +} + +void ArrayFileColumnIterator::set_reading_flag_recursively(ReadingFlag flag) { + set_reading_flag(flag); + _item_iterator->set_reading_flag_recursively(flag); +} + Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { if (all_access_paths.empty()) { @@ -1732,8 +1885,11 @@ Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_a << " to READING_FOR_PREDICATE"; } - auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); + _pruned = true; + + auto sub_all_access_paths = DORIS_TRY(_process_sub_access_paths(all_access_paths, false)); + auto sub_predicate_access_paths = + DORIS_TRY(_process_sub_access_paths(predicate_access_paths, true)); const auto no_sub_column_to_skip = sub_all_access_paths.empty(); const auto no_predicate_sub_column = sub_predicate_access_paths.empty(); @@ -1752,6 +1908,10 @@ Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_a path.data_access_path.path[0] = _item_iterator->column_name(); } } + } else if (_reading_flag == ReadingFlag::READING_FOR_PREDICATE) { + // if no sub-column in predicate_access_paths, but current column is READING_FOR_PREDICATE, + // then we should set item_iterator to READING_FOR_PREDICATE too. + _item_iterator->set_reading_flag_recursively(ReadingFlag::READING_FOR_PREDICATE); } if (!no_sub_column_to_skip || !no_predicate_sub_column) { @@ -1803,7 +1963,7 @@ Status FileColumnIterator::init(const ColumnIteratorOptions& opts) { FileColumnIterator::~FileColumnIterator() = default; Status FileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1858,12 +2018,14 @@ Status FileColumnIterator::next_batch_of_zone_map(size_t* n, vectorized::Mutable Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + size_t curr_size = dst->byte_size(); dst->reserve(*n); size_t remaining = *n; @@ -1924,12 +2086,14 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + size_t remaining = count; size_t total_read_count = 0; size_t nrows_to_read = 0; diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index a8046c5e76c1f8..df4305672d7c66 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -19,6 +19,7 @@ #include #include +#include #include #include // for size_t @@ -403,12 +404,65 @@ class ColumnIterator { virtual void remove_pruned_sub_iterators() {}; + enum class ReadingMode : int { + NORMAL, // default mode + PREDICATE, + LAZY + }; + + virtual void set_reading_mode(ReadingMode mode) { _reading_mode = mode; } + + virtual bool need_to_read() const { + switch (_reading_mode) { + case ReadingMode::NORMAL: + return _reading_flag != ReadingFlag::SKIP_READING; + case ReadingMode::PREDICATE: + return _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + case ReadingMode::LAZY: + return _reading_flag == ReadingFlag::NEED_TO_READ; + default: + return false; + } + } + + // Whether need to read meta columns, such as null map column, offset column. + bool need_to_read_meta_columns() const { + if (_reading_flag == ReadingFlag::SKIP_READING) { + return false; + } + switch (_reading_mode) { + case ReadingMode::NORMAL: + case ReadingMode::PREDICATE: + return true; + case ReadingMode::LAZY: + return _reading_flag != ReadingFlag::READING_FOR_PREDICATE; + } + return false; + } + + virtual void finalize_lazy_mode(vectorized::MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + } + + virtual void set_reading_flag_recursively(ReadingFlag flag) { set_reading_flag(flag); } + + bool is_pruned() const { return _pruned; } + protected: - Result _get_sub_access_paths(const TColumnAccessPaths& access_paths); + void _convert_to_place_holder_column(vectorized::MutableColumnPtr& dst, size_t count); + + void _recovery_from_place_holder_column(vectorized::MutableColumnPtr& dst); + + Result _process_sub_access_paths(const TColumnAccessPaths& access_paths, + const bool is_predicate); ColumnIteratorOptions _opts; ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING}; + ReadingMode _reading_mode {ReadingMode::NORMAL}; std::string _column_name; + bool _pruned {false}; + + std::set _place_holder_columns; }; // This iterator is used to read column data from file @@ -560,6 +614,33 @@ class MapFileColumnIterator final : public ColumnIterator { void remove_pruned_sub_iterators() override; + void set_reading_mode(ReadingMode mode) override; + + bool need_to_read() const override { + switch (_reading_mode) { + case ReadingMode::NORMAL: + return _reading_flag != ReadingFlag::SKIP_READING; + case ReadingMode::PREDICATE: + return _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + case ReadingMode::LAZY: + // For lazy mode, maybe some of key/value columns are needed to be read. + // For example: + // Map the reading flags are: + // - Key: NEED_TO_READ + // - Value: READING_FOR_PREDICATE + // So the reading flag of the map column should be READING_FOR_PREDICATE. + // Thus when the reading mode is LAZY, we need to read the Key. + return _reading_flag == ReadingFlag::NEED_TO_READ || + _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + default: + return false; + } + } + + void finalize_lazy_mode(vectorized::MutableColumnPtr& dst) override; + + void set_reading_flag_recursively(ReadingFlag flag) override; + private: std::shared_ptr _map_reader = nullptr; ColumnIteratorUPtr _null_iterator; @@ -596,6 +677,33 @@ class StructFileColumnIterator final : public ColumnIterator { void remove_pruned_sub_iterators() override; + void set_reading_mode(ReadingMode mode) override; + + bool need_to_read() const override { + switch (_reading_mode) { + case ReadingMode::NORMAL: + return _reading_flag != ReadingFlag::SKIP_READING; + case ReadingMode::PREDICATE: + return _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + case ReadingMode::LAZY: + // For lazy mode, maybe some of sub-columns are needed to be read. + // For example: + // struct the reading flags are: + // - col1: NEED_TO_READ + // - col2: SKIP_READING + // - col3: READING_FOR_PREDICATE + // So the reading flag of the struct column should be READING_FOR_PREDICATE. + // Thus when the reading mode is LAZY, we need to read the col1. + return _reading_flag == ReadingFlag::NEED_TO_READ || + _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + default: + return false; + } + } + + void finalize_lazy_mode(vectorized::MutableColumnPtr& dst) override; + void set_reading_flag_recursively(ReadingFlag flag) override; + private: std::shared_ptr _struct_reader = nullptr; ColumnIteratorUPtr _null_iterator; @@ -630,6 +738,33 @@ class ArrayFileColumnIterator final : public ColumnIterator { void remove_pruned_sub_iterators() override; + void set_reading_mode(ReadingMode mode) override; + + bool need_to_read() const override { + switch (_reading_mode) { + case ReadingMode::NORMAL: + return _reading_flag != ReadingFlag::SKIP_READING; + case ReadingMode::PREDICATE: + return _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + case ReadingMode::LAZY: + // For lazy mode, maybe some of key/value columns are needed to be read. + // For example: + // Map the reading flags are: + // - Key: NEED_TO_READ + // - Value: READING_FOR_PREDICATE + // So the reading flag of the map column should be READING_FOR_PREDICATE. + // Thus when the reading mode is LAZY, we need to read the Key. + return _reading_flag == ReadingFlag::NEED_TO_READ || + _reading_flag == ReadingFlag::READING_FOR_PREDICATE; + default: + return false; + } + } + + void finalize_lazy_mode(vectorized::MutableColumnPtr& dst) override; + + void set_reading_flag_recursively(ReadingFlag flag) override; + private: std::shared_ptr _array_reader = nullptr; std::unique_ptr _offset_iterator; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 3e52a9815abd36..ef2a5344b1d4c2 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -374,6 +375,10 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; + _enable_prune_nested_column = _opts.io_ctx.reader_type == ReaderType::READER_QUERY && + _opts.runtime_state && + _opts.runtime_state->enable_prune_nested_column(); + if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); } @@ -1736,6 +1741,17 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (_is_common_expr_column[cid] || _is_pred_column[cid]) { auto loc = _schema_block_id_map[cid]; _columns_to_filter.push_back(loc); + + const auto field_type = _schema->column(cid)->type(); + if (_is_common_expr_column[cid] && _enable_prune_nested_column && + (field_type == FieldType::OLAP_FIELD_TYPE_STRUCT || + field_type == FieldType::OLAP_FIELD_TYPE_ARRAY || + field_type == FieldType::OLAP_FIELD_TYPE_MAP)) { + DCHECK(_column_iterators[cid]); + if (_column_iterators[cid]->is_pruned()) { + _support_lazy_read_pruned_columns.emplace(cid); + } + } } } @@ -2080,6 +2096,13 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16 }) } + const bool read_for_predicate = _support_lazy_read_pruned_columns.contains(cid); + if (read_for_predicate) { + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::PREDICATE); + } else { + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::NORMAL); + } + if (is_continuous) { size_t rows_read = nrows_read; _opts.stats->predicate_column_read_seek_num += 1; @@ -2262,7 +2285,8 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector& read_colu std::vector& rowid_vector, uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns, - bool init_condition_cache) { + bool init_condition_cache, + bool read_for_predicate) { SCOPED_RAW_TIMER(&_opts.stats->lazy_read_ns); std::vector rowids(select_size); @@ -2306,6 +2330,15 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector& read_colu "SegmentIterator meet invalid column, return columns size {}, cid {}", _current_return_columns.size(), cid); } + + const bool should_read_for_predicate = + read_for_predicate && _support_lazy_read_pruned_columns.contains(cid); + if (should_read_for_predicate) { + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::PREDICATE); + } else { + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::NORMAL); + } + RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(rowids.data(), select_size, _current_return_columns[cid])); } @@ -2483,7 +2516,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { SCOPED_RAW_TIMER(&_opts.stats->non_predicate_read_ns); RETURN_IF_ERROR(_read_columns_by_rowids( _common_expr_column_ids, _block_rowids, _sel_rowid_idx.data(), - _selected_size, &_current_return_columns)); + _selected_size, &_current_return_columns, false, true)); _replace_version_col_if_needed(_common_expr_column_ids, _selected_size); RETURN_IF_ERROR(_process_columns(_common_expr_column_ids, block)); } @@ -2514,7 +2547,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { RETURN_IF_ERROR(_read_columns_by_rowids( _non_predicate_columns, _block_rowids, _sel_rowid_idx.data(), _selected_size, &_current_return_columns, - _opts.condition_cache_digest && !_find_condition_cache)); + _opts.condition_cache_digest && !_find_condition_cache, false)); _replace_version_col_if_needed(_non_predicate_columns, _selected_size); } else { if (_opts.condition_cache_digest && !_find_condition_cache) { @@ -2526,6 +2559,27 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } } + + if (!_support_lazy_read_pruned_columns.empty()) { + SCOPED_RAW_TIMER(&_opts.stats->lazy_read_pruned_ns); + DorisVector rowids(_selected_size); + for (size_t i = 0; i < _selected_size; ++i) { + rowids[i] = _block_rowids[_sel_rowid_idx[i]]; + } + + for (auto cid : _support_lazy_read_pruned_columns) { + auto loc = _schema_block_id_map[cid]; + auto column = block->get_by_position(loc).column->assume_mutable(); + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::LAZY); + if (_selected_size > 0) { + RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(rowids.data(), + _selected_size, column)); + } + _column_iterators[cid]->finalize_lazy_mode(column); + _column_iterators[cid]->set_reading_mode(ColumnIterator::ReadingMode::NORMAL); + block->get_by_position(loc).column = std::move(column); + } + } } // step5: output columns diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 5ef63c3c6ec4c2..1a80125a83e8a3 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -232,7 +232,8 @@ class SegmentIterator : public RowwiseIterator { std::vector& rowid_vector, uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns, - bool init_condition_cache = false); + bool init_condition_cache = false, + bool read_for_predicate = false); Status copy_column_data_by_selector(vectorized::IColumn* input_col_ptr, vectorized::MutableColumnPtr& output_col, @@ -417,6 +418,9 @@ class SegmentIterator : public RowwiseIterator { bool _is_need_short_eval = false; bool _is_need_expr_eval = false; + std::set _support_lazy_read_pruned_columns; + bool _enable_prune_nested_column = false; + // fields for vectorization execution std::vector _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 7738c195474f64..f23a20e0f664ae 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -177,6 +177,8 @@ Status OlapScanLocalState::_init_profile() { _lazy_read_seek_timer = ADD_TIMER(_segment_profile, "LazyReadSeekTime"); _lazy_read_seek_counter = ADD_COUNTER(_segment_profile, "LazyReadSeekCount", TUnit::UNIT); + _lazy_read_pruned_timer = ADD_TIMER(_segment_profile, "LazyReadPrunedTime"); + _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime"); _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT); diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index 331091a36504c4..90bfa3ff915ae1 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -185,6 +185,7 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _lazy_read_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_counter = nullptr; + RuntimeProfile::Counter* _lazy_read_pruned_timer = nullptr; // total pages read // used by segment v2 diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e7c66436981b66..cd9cd0414b56ec 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -543,6 +543,11 @@ class RuntimeState { return _query_options.__isset.enable_parallel_scan && _query_options.enable_parallel_scan; } + bool enable_prune_nested_column() const { + return _query_options.__isset.enable_prune_nested_column && + _query_options.enable_prune_nested_column; + } + bool is_read_csv_empty_line_as_null() const { return _query_options.__isset.read_csv_empty_line_as_null && _query_options.read_csv_empty_line_as_null; diff --git a/be/src/vec/exec/scan/olap_scanner.cpp b/be/src/vec/exec/scan/olap_scanner.cpp index 9f72d5caeddf86..9363319ec37a8d 100644 --- a/be/src/vec/exec/scan/olap_scanner.cpp +++ b/be/src/vec/exec/scan/olap_scanner.cpp @@ -732,6 +732,7 @@ void OlapScanner::_collect_profile_before_close() { COUNTER_UPDATE(local_state->_predicate_column_read_seek_counter, stats.predicate_column_read_seek_num); COUNTER_UPDATE(local_state->_lazy_read_timer, stats.lazy_read_ns); + COUNTER_UPDATE(local_state->_lazy_read_pruned_timer, stats.lazy_read_pruned_ns); COUNTER_UPDATE(local_state->_lazy_read_seek_timer, stats.block_lazy_read_seek_ns); COUNTER_UPDATE(local_state->_lazy_read_seek_counter, stats.block_lazy_read_seek_num); COUNTER_UPDATE(local_state->_output_col_timer, stats.output_col_ns); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5ea2da385e666c..f6d72a85a876f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -4927,6 +4927,8 @@ public TQueryOptions toThrift() { tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull); tResult.setSerdeDialect(getSerdeDialect()); + tResult.setEnablePruneNestedColumn(enablePruneNestedColumns); + tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex); tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex); tResult.setEnableInvertedIndexSearcherCache(enableInvertedIndexSearcherCache); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 4d9a7dcde2dae0..171615db37de91 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -422,6 +422,8 @@ struct TQueryOptions { 182: optional i32 ivf_nprobe = 1; + 183: optional bool enable_prune_nested_column = false; + // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. diff --git a/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out index b3312aa670c066..054c14dc57348d 100644 --- a/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out +++ b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out @@ -1,86 +1,367 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- -1 {"city":"beijing", "data":[{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}], "value":1} -2 {"city":"shanghai", "data":[{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}], "value":2} -3 {"city":"guangzhou", "data":[{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}], "value":3} -4 {"city":"shenzhen", "data":[{2:{"a":130, "b":20}, 1:{"a":150, "b":40}}], "value":4} -5 {"city":"hangzhou", "data":[{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}], "value":5} -6 {"city":"nanjing", "data":[{2:{"a":210, "b":60}, 1:{"a":230, "b":40}}], "value":6} -7 {"city":"tianjin", "data":[{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}], "value":7} -8 {"city":"chongqing", "data":[{2:{"a":290, "b":80}, 1:{"a":310, "b":40}}], "value":8} -9 {"city":"wuhan", "data":[{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}], "value":9} -10 {"city":"xian", "data":[{2:{"a":370, "b":20}, 1:{"a":390, "b":40}}], "value":10} -11 {"city":"changsha", "data":[{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}], "value":11} -12 {"city":"qingdao", "data":[{2:{"a":450, "b":60}, 1:{"a":470, "b":40}}], "value":12} -13 {"city":"dalian", "data":[{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}], "value":13} +\N 300 +beijing 300 +chengdu 300 +guangzhou 300 +hangzhou 300 +nanjing 300 +shanghai 300 +shenzhen 300 +wuhan 300 +xian 300 -- !sql1 -- -1 [10] +1 [10, 5] + +-- !sql1_1 -- + +-- !sql1_2 -- -- !sql2 -- -1 beijing -2 shanghai +0 beijing +1 shanghai +2 shenzhen 3 guangzhou -4 shenzhen -5 hangzhou -6 nanjing -7 tianjin -8 chongqing -9 wuhan -10 xian -11 changsha -12 qingdao -13 dalian +4 hangzhou +5 chengdu +6 wuhan +7 xian +8 nanjing +9 \N +10 beijing +11 shanghai +12 shenzhen +13 guangzhou +14 hangzhou +15 chengdu +16 wuhan +17 xian +18 nanjing +19 \N + +-- !sql2_1 -- +100 beijing +101 shanghai +102 shenzhen +103 guangzhou +104 hangzhou +105 chengdu +106 wuhan +107 xian +108 nanjing +109 \N +110 beijing +111 shanghai +112 shenzhen +113 guangzhou +114 hangzhou +115 chengdu +116 wuhan +117 xian +118 nanjing +119 \N + +-- !sql2_2 -- +2999 \N +2998 nanjing +2997 xian +2996 wuhan +2995 chengdu +2994 hangzhou +2993 guangzhou +2992 shenzhen +2991 shanghai +2990 beijing +2989 \N +2988 nanjing +2987 xian +2986 wuhan +2985 chengdu +2984 hangzhou +2983 guangzhou +2982 shenzhen +2981 shanghai +2980 beijing -- !sql3 -- -1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}] -2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}] -3 [{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}] -4 [{2:{"a":130, "b":20}, 1:{"a":150, "b":40}}] -5 [{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}] -6 [{2:{"a":210, "b":60}, 1:{"a":230, "b":40}}] -7 [{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}] -8 [{2:{"a":290, "b":80}, 1:{"a":310, "b":40}}] -9 [{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}] -10 [{2:{"a":370, "b":20}, 1:{"a":390, "b":40}}] -11 [{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}] -12 [{2:{"a":450, "b":60}, 1:{"a":470, "b":40}}] -13 [{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}] +0 [{1:{"a":0, "b":0}, 2:{"a":20, "b":10}}, {1:{"a":0, "b":0}, 2:{"a":0, "b":0}}] +1 [{1:{"a":10, "b":11}, 2:{"a":30, "b":20}}, {2:{"a":5, "b":2.5}, 3:{"a":3, "b":1.5}}] +2 [{1:{"a":20, "b":22}, 2:{"a":40, "b":30}}, {3:{"a":10, "b":5}, 4:{"a":6, "b":3}}] +3 [{1:{"a":30, "b":33}, 2:{"a":50, "b":40}}, {1:{"a":15, "b":7.5}, 5:{"a":9, "b":4.5}}] +4 [{1:{"a":40, "b":44}, 2:{"a":60, "b":50}}, {2:{"a":20, "b":10}, 6:{"a":12, "b":6}}] +5 [{1:{"a":50, "b":50}, 2:{"a":70, "b":60}}, {3:{"a":25, "b":12.5}, 2:{"a":15, "b":7.5}}] +6 [{1:{"a":60, "b":61}, 2:{"a":80, "b":70}}, {1:{"a":30, "b":15}, 3:{"a":18, "b":9}}] +7 [{1:{"a":70, "b":72}, 2:{"a":90, "b":80}}, {2:{"a":35, "b":17.5}, 4:{"a":21, "b":10.5}}] +8 [{1:{"a":80, "b":83}, 2:{"a":100, "b":90}}, {3:{"a":40, "b":20}, 5:{"a":24, "b":12}}] +9 [{1:{"a":90, "b":94}, 2:{"a":110, "b":100}}, {1:{"a":45, "b":22.5}, 6:{"a":27, "b":13.5}}] +10 [{1:{"a":100, "b":100}, 2:{"a":120, "b":10}}, {2:{"a":30, "b":15}}] +11 [{1:{"a":110, "b":111}, 2:{"a":130, "b":20}}, {3:{"a":33, "b":16.5}}] +12 [{1:{"a":120, "b":122}, 2:{"a":140, "b":30}}, {1:{"a":60, "b":30}, 4:{"a":36, "b":18}}] +13 [{1:{"a":130, "b":133}, 2:{"a":150, "b":40}}, {2:{"a":65, "b":32.5}, 5:{"a":39, "b":19.5}}] +14 [{1:{"a":140, "b":144}, 2:{"a":160, "b":50}}, {3:{"a":70, "b":35}, 6:{"a":42, "b":21}}] +15 [{1:{"a":150, "b":150}, 2:{"a":170, "b":60}}, {1:{"a":75, "b":37.5}, 2:{"a":45, "b":22.5}}] +16 [{1:{"a":160, "b":161}, 2:{"a":180, "b":70}}, {2:{"a":80, "b":40}, 3:{"a":48, "b":24}}] +17 [{1:{"a":170, "b":172}, 2:{"a":190, "b":80}}, {3:{"a":85, "b":42.5}, 4:{"a":51, "b":25.5}}] +18 [{1:{"a":180, "b":183}, 2:{"a":200, "b":90}}, {1:{"a":90, "b":45}, 5:{"a":54, "b":27}}] +19 [{1:{"a":190, "b":194}, 2:{"a":210, "b":100}}, {2:{"a":95, "b":47.5}, 6:{"a":57, "b":28.5}}] + +-- !sql3_1 -- +200 [{1:{"a":2000, "b":2000}, 2:{"a":2020, "b":10}}, {3:{"a":1000, "b":500}, 2:{"a":600, "b":300}}] +201 [{1:{"a":2010, "b":2011}, 2:{"a":2030, "b":20}}, {1:{"a":1005, "b":502.5}, 3:{"a":603, "b":301.5}}] +202 [{1:{"a":2020, "b":2022}, 2:{"a":2040, "b":30}}, {2:{"a":1010, "b":505}, 4:{"a":606, "b":303}}] +203 [{1:{"a":2030, "b":2033}, 2:{"a":2050, "b":40}}, {3:{"a":1015, "b":507.5}, 5:{"a":609, "b":304.5}}] +204 [{1:{"a":2040, "b":2044}, 2:{"a":2060, "b":50}}, {1:{"a":1020, "b":510}, 6:{"a":612, "b":306}}] +205 [{1:{"a":2050, "b":2050}, 2:{"a":2070, "b":60}}, {2:{"a":615, "b":307.5}}] +206 [{1:{"a":2060, "b":2061}, 2:{"a":2080, "b":70}}, {3:{"a":618, "b":309}}] +207 [{1:{"a":2070, "b":2072}, 2:{"a":2090, "b":80}}, {1:{"a":1035, "b":517.5}, 4:{"a":621, "b":310.5}}] +208 [{1:{"a":2080, "b":2083}, 2:{"a":2100, "b":90}}, {2:{"a":1040, "b":520}, 5:{"a":624, "b":312}}] +209 [{1:{"a":2090, "b":2094}, 2:{"a":2110, "b":100}}, {3:{"a":1045, "b":522.5}, 6:{"a":627, "b":313.5}}] +210 [{1:{"a":2100, "b":2100}, 2:{"a":2120, "b":10}}, {1:{"a":1050, "b":525}, 2:{"a":630, "b":315}}] +211 [{1:{"a":2110, "b":2111}, 2:{"a":2130, "b":20}}, {2:{"a":1055, "b":527.5}, 3:{"a":633, "b":316.5}}] +212 [{1:{"a":2120, "b":2122}, 2:{"a":2140, "b":30}}, {3:{"a":1060, "b":530}, 4:{"a":636, "b":318}}] +213 [{1:{"a":2130, "b":2133}, 2:{"a":2150, "b":40}}, {1:{"a":1065, "b":532.5}, 5:{"a":639, "b":319.5}}] +214 [{1:{"a":2140, "b":2144}, 2:{"a":2160, "b":50}}, {2:{"a":1070, "b":535}, 6:{"a":642, "b":321}}] +215 [{1:{"a":2150, "b":2150}, 2:{"a":2170, "b":60}}, {3:{"a":1075, "b":537.5}, 2:{"a":645, "b":322.5}}] +216 [{1:{"a":2160, "b":2161}, 2:{"a":2180, "b":70}}, {1:{"a":1080, "b":540}, 3:{"a":648, "b":324}}] +217 [{1:{"a":2170, "b":2172}, 2:{"a":2190, "b":80}}, {2:{"a":1085, "b":542.5}, 4:{"a":651, "b":325.5}}] +218 [{1:{"a":2180, "b":2183}, 2:{"a":2200, "b":90}}, {3:{"a":1090, "b":545}, 5:{"a":654, "b":327}}] +219 [{1:{"a":2190, "b":2194}, 2:{"a":2210, "b":100}}, {1:{"a":1095, "b":547.5}, 6:{"a":657, "b":328.5}}] + +-- !sql3_2 -- +2999 [{1:{"a":29990, "b":29994}, 2:{"a":30010, "b":100}}, {3:{"a":14995, "b":7497.5}, 6:{"a":8997, "b":4498.5}}] +2998 [{1:{"a":29980, "b":29983}, 2:{"a":30000, "b":90}}, {2:{"a":14990, "b":7495}, 5:{"a":8994, "b":4497}}] +2997 [{1:{"a":29970, "b":29972}, 2:{"a":29990, "b":80}}, {1:{"a":14985, "b":7492.5}, 4:{"a":8991, "b":4495.5}}] +2996 [{1:{"a":29960, "b":29961}, 2:{"a":29980, "b":70}}, {3:{"a":8988, "b":4494}}] +2995 [{1:{"a":29950, "b":29950}, 2:{"a":29970, "b":60}}, {2:{"a":8985, "b":4492.5}}] +2994 [{1:{"a":29940, "b":29944}, 2:{"a":29960, "b":50}}, {1:{"a":14970, "b":7485}, 6:{"a":8982, "b":4491}}] +2993 [{1:{"a":29930, "b":29933}, 2:{"a":29950, "b":40}}, {3:{"a":14965, "b":7482.5}, 5:{"a":8979, "b":4489.5}}] +2992 [{1:{"a":29920, "b":29922}, 2:{"a":29940, "b":30}}, {2:{"a":14960, "b":7480}, 4:{"a":8976, "b":4488}}] +2991 [{1:{"a":29910, "b":29911}, 2:{"a":29930, "b":20}}, {1:{"a":14955, "b":7477.5}, 3:{"a":8973, "b":4486.5}}] +2990 [{1:{"a":29900, "b":29900}, 2:{"a":29920, "b":10}}, {3:{"a":14950, "b":7475}, 2:{"a":8970, "b":4485}}] +2989 [{1:{"a":29890, "b":29894}, 2:{"a":29910, "b":100}}, {2:{"a":14945, "b":7472.5}, 6:{"a":8967, "b":4483.5}}] +2988 [{1:{"a":29880, "b":29883}, 2:{"a":29900, "b":90}}, {1:{"a":14940, "b":7470}, 5:{"a":8964, "b":4482}}] +2987 [{1:{"a":29870, "b":29872}, 2:{"a":29890, "b":80}}, {3:{"a":14935, "b":7467.5}, 4:{"a":8961, "b":4480.5}}] +2986 [{1:{"a":29860, "b":29861}, 2:{"a":29880, "b":70}}, {2:{"a":14930, "b":7465}, 3:{"a":8958, "b":4479}}] +2985 [{1:{"a":29850, "b":29850}, 2:{"a":29870, "b":60}}, {1:{"a":14925, "b":7462.5}, 2:{"a":8955, "b":4477.5}}] +2984 [{1:{"a":29840, "b":29844}, 2:{"a":29860, "b":50}}, {3:{"a":14920, "b":7460}, 6:{"a":8952, "b":4476}}] +2983 [{1:{"a":29830, "b":29833}, 2:{"a":29850, "b":40}}, {2:{"a":14915, "b":7457.5}, 5:{"a":8949, "b":4474.5}}] +2982 [{1:{"a":29820, "b":29822}, 2:{"a":29840, "b":30}}, {1:{"a":14910, "b":7455}, 4:{"a":8946, "b":4473}}] +2981 [{1:{"a":29810, "b":29811}, 2:{"a":29830, "b":20}}, {3:{"a":8943, "b":4471.5}}] +2980 [{1:{"a":29800, "b":29800}, 2:{"a":29820, "b":10}}, {2:{"a":8940, "b":4470}}] -- !sql4 -- -1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}] -2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}] -3 [{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}] -5 [{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}] -7 [{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}] -9 [{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}] -11 [{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}] -13 [{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}] +3 [{1:{"a":30, "b":33}, 2:{"a":50, "b":40}}, {1:{"a":15, "b":7.5}, 5:{"a":9, "b":4.5}}] +13 [{1:{"a":130, "b":133}, 2:{"a":150, "b":40}}, {2:{"a":65, "b":32.5}, 5:{"a":39, "b":19.5}}] +23 [{1:{"a":230, "b":233}, 2:{"a":250, "b":40}}, {3:{"a":115, "b":57.5}, 5:{"a":69, "b":34.5}}] +33 [{1:{"a":330, "b":333}, 2:{"a":350, "b":40}}, {1:{"a":165, "b":82.5}, 5:{"a":99, "b":49.5}}] +43 [{1:{"a":430, "b":433}, 2:{"a":450, "b":40}}, {2:{"a":215, "b":107.5}, 5:{"a":129, "b":64.5}}] +53 [{1:{"a":530, "b":533}, 2:{"a":550, "b":40}}, {3:{"a":265, "b":132.5}, 5:{"a":159, "b":79.5}}] +63 [{1:{"a":630, "b":633}, 2:{"a":650, "b":40}}, {1:{"a":315, "b":157.5}, 5:{"a":189, "b":94.5}}] +73 [{1:{"a":730, "b":733}, 2:{"a":750, "b":40}}, {2:{"a":365, "b":182.5}, 5:{"a":219, "b":109.5}}] +83 [{1:{"a":830, "b":833}, 2:{"a":850, "b":40}}, {3:{"a":415, "b":207.5}, 5:{"a":249, "b":124.5}}] +93 [{1:{"a":930, "b":933}, 2:{"a":950, "b":40}}, {1:{"a":465, "b":232.5}, 5:{"a":279, "b":139.5}}] +103 [{1:{"a":1030, "b":1033}, 2:{"a":1050, "b":40}}, {2:{"a":515, "b":257.5}, 5:{"a":309, "b":154.5}}] +113 [{1:{"a":1130, "b":1133}, 2:{"a":1150, "b":40}}, {3:{"a":565, "b":282.5}, 5:{"a":339, "b":169.5}}] +123 [{1:{"a":1230, "b":1233}, 2:{"a":1250, "b":40}}, {1:{"a":615, "b":307.5}, 5:{"a":369, "b":184.5}}] +133 [{1:{"a":1330, "b":1333}, 2:{"a":1350, "b":40}}, {2:{"a":665, "b":332.5}, 5:{"a":399, "b":199.5}}] +143 [{1:{"a":1430, "b":1433}, 2:{"a":1450, "b":40}}, {3:{"a":715, "b":357.5}, 5:{"a":429, "b":214.5}}] +153 [{1:{"a":1530, "b":1533}, 2:{"a":1550, "b":40}}, {1:{"a":765, "b":382.5}, 5:{"a":459, "b":229.5}}] +163 [{1:{"a":1630, "b":1633}, 2:{"a":1650, "b":40}}, {2:{"a":815, "b":407.5}, 5:{"a":489, "b":244.5}}] +173 [{1:{"a":1730, "b":1733}, 2:{"a":1750, "b":40}}, {3:{"a":865, "b":432.5}, 5:{"a":519, "b":259.5}}] +183 [{1:{"a":1830, "b":1833}, 2:{"a":1850, "b":40}}, {1:{"a":915, "b":457.5}, 5:{"a":549, "b":274.5}}] +193 [{1:{"a":1930, "b":1933}, 2:{"a":1950, "b":40}}, {2:{"a":965, "b":482.5}, 5:{"a":579, "b":289.5}}] + +-- !sql4_1 -- +1003 [{1:{"a":10030, "b":10033}, 2:{"a":10050, "b":40}}, {2:{"a":5015, "b":2507.5}, 5:{"a":3009, "b":1504.5}}] +1013 [{1:{"a":10130, "b":10133}, 2:{"a":10150, "b":40}}, {3:{"a":5065, "b":2532.5}, 5:{"a":3039, "b":1519.5}}] +1023 [{1:{"a":10230, "b":10233}, 2:{"a":10250, "b":40}}, {1:{"a":5115, "b":2557.5}, 5:{"a":3069, "b":1534.5}}] +1033 [{1:{"a":10330, "b":10333}, 2:{"a":10350, "b":40}}, {2:{"a":5165, "b":2582.5}, 5:{"a":3099, "b":1549.5}}] +1043 [{1:{"a":10430, "b":10433}, 2:{"a":10450, "b":40}}, {3:{"a":5215, "b":2607.5}, 5:{"a":3129, "b":1564.5}}] +1053 [{1:{"a":10530, "b":10533}, 2:{"a":10550, "b":40}}, {1:{"a":5265, "b":2632.5}, 5:{"a":3159, "b":1579.5}}] +1063 [{1:{"a":10630, "b":10633}, 2:{"a":10650, "b":40}}, {2:{"a":5315, "b":2657.5}, 5:{"a":3189, "b":1594.5}}] +1073 [{1:{"a":10730, "b":10733}, 2:{"a":10750, "b":40}}, {3:{"a":5365, "b":2682.5}, 5:{"a":3219, "b":1609.5}}] +1083 [{1:{"a":10830, "b":10833}, 2:{"a":10850, "b":40}}, {1:{"a":5415, "b":2707.5}, 5:{"a":3249, "b":1624.5}}] +1093 [{1:{"a":10930, "b":10933}, 2:{"a":10950, "b":40}}, {2:{"a":5465, "b":2732.5}, 5:{"a":3279, "b":1639.5}}] +1103 [{1:{"a":11030, "b":11033}, 2:{"a":11050, "b":40}}, {3:{"a":5515, "b":2757.5}, 5:{"a":3309, "b":1654.5}}] +1113 [{1:{"a":11130, "b":11133}, 2:{"a":11150, "b":40}}, {1:{"a":5565, "b":2782.5}, 5:{"a":3339, "b":1669.5}}] +1123 [{1:{"a":11230, "b":11233}, 2:{"a":11250, "b":40}}, {2:{"a":5615, "b":2807.5}, 5:{"a":3369, "b":1684.5}}] +1133 [{1:{"a":11330, "b":11333}, 2:{"a":11350, "b":40}}, {3:{"a":5665, "b":2832.5}, 5:{"a":3399, "b":1699.5}}] +1143 [{1:{"a":11430, "b":11433}, 2:{"a":11450, "b":40}}, {1:{"a":5715, "b":2857.5}, 5:{"a":3429, "b":1714.5}}] +1153 [{1:{"a":11530, "b":11533}, 2:{"a":11550, "b":40}}, {2:{"a":5765, "b":2882.5}, 5:{"a":3459, "b":1729.5}}] +1163 [{1:{"a":11630, "b":11633}, 2:{"a":11650, "b":40}}, {3:{"a":5815, "b":2907.5}, 5:{"a":3489, "b":1744.5}}] +1173 [{1:{"a":11730, "b":11733}, 2:{"a":11750, "b":40}}, {1:{"a":5865, "b":2932.5}, 5:{"a":3519, "b":1759.5}}] +1183 [{1:{"a":11830, "b":11833}, 2:{"a":11850, "b":40}}, {2:{"a":5915, "b":2957.5}, 5:{"a":3549, "b":1774.5}}] +1193 [{1:{"a":11930, "b":11933}, 2:{"a":11950, "b":40}}, {3:{"a":5965, "b":2982.5}, 5:{"a":3579, "b":1789.5}}] + +-- !sql4_2 -- +2993 [{1:{"a":29930, "b":29933}, 2:{"a":29950, "b":40}}, {3:{"a":14965, "b":7482.5}, 5:{"a":8979, "b":4489.5}}] +2983 [{1:{"a":29830, "b":29833}, 2:{"a":29850, "b":40}}, {2:{"a":14915, "b":7457.5}, 5:{"a":8949, "b":4474.5}}] +2973 [{1:{"a":29730, "b":29733}, 2:{"a":29750, "b":40}}, {1:{"a":14865, "b":7432.5}, 5:{"a":8919, "b":4459.5}}] +2963 [{1:{"a":29630, "b":29633}, 2:{"a":29650, "b":40}}, {3:{"a":14815, "b":7407.5}, 5:{"a":8889, "b":4444.5}}] +2953 [{1:{"a":29530, "b":29533}, 2:{"a":29550, "b":40}}, {2:{"a":14765, "b":7382.5}, 5:{"a":8859, "b":4429.5}}] +2943 [{1:{"a":29430, "b":29433}, 2:{"a":29450, "b":40}}, {1:{"a":14715, "b":7357.5}, 5:{"a":8829, "b":4414.5}}] +2933 [{1:{"a":29330, "b":29333}, 2:{"a":29350, "b":40}}, {3:{"a":14665, "b":7332.5}, 5:{"a":8799, "b":4399.5}}] +2923 [{1:{"a":29230, "b":29233}, 2:{"a":29250, "b":40}}, {2:{"a":14615, "b":7307.5}, 5:{"a":8769, "b":4384.5}}] +2913 [{1:{"a":29130, "b":29133}, 2:{"a":29150, "b":40}}, {1:{"a":14565, "b":7282.5}, 5:{"a":8739, "b":4369.5}}] +2903 [{1:{"a":29030, "b":29033}, 2:{"a":29050, "b":40}}, {3:{"a":14515, "b":7257.5}, 5:{"a":8709, "b":4354.5}}] +2893 [{1:{"a":28930, "b":28933}, 2:{"a":28950, "b":40}}, {2:{"a":14465, "b":7232.5}, 5:{"a":8679, "b":4339.5}}] +2883 [{1:{"a":28830, "b":28833}, 2:{"a":28850, "b":40}}, {1:{"a":14415, "b":7207.5}, 5:{"a":8649, "b":4324.5}}] +2873 [{1:{"a":28730, "b":28733}, 2:{"a":28750, "b":40}}, {3:{"a":14365, "b":7182.5}, 5:{"a":8619, "b":4309.5}}] +2863 [{1:{"a":28630, "b":28633}, 2:{"a":28650, "b":40}}, {2:{"a":14315, "b":7157.5}, 5:{"a":8589, "b":4294.5}}] +2853 [{1:{"a":28530, "b":28533}, 2:{"a":28550, "b":40}}, {1:{"a":14265, "b":7132.5}, 5:{"a":8559, "b":4279.5}}] +2843 [{1:{"a":28430, "b":28433}, 2:{"a":28450, "b":40}}, {3:{"a":14215, "b":7107.5}, 5:{"a":8529, "b":4264.5}}] +2833 [{1:{"a":28330, "b":28333}, 2:{"a":28350, "b":40}}, {2:{"a":14165, "b":7082.5}, 5:{"a":8499, "b":4249.5}}] +2823 [{1:{"a":28230, "b":28233}, 2:{"a":28250, "b":40}}, {1:{"a":14115, "b":7057.5}, 5:{"a":8469, "b":4234.5}}] +2813 [{1:{"a":28130, "b":28133}, 2:{"a":28150, "b":40}}, {3:{"a":14065, "b":7032.5}, 5:{"a":8439, "b":4219.5}}] +2803 [{1:{"a":28030, "b":28033}, 2:{"a":28050, "b":40}}, {2:{"a":14015, "b":7007.5}, 5:{"a":8409, "b":4204.5}}] -- !sql5 -- -1 beijing -2 shanghai 3 guangzhou -5 hangzhou -7 tianjin -9 wuhan -11 changsha -13 dalian +13 guangzhou +23 guangzhou +33 guangzhou +43 guangzhou +53 guangzhou +63 guangzhou +73 guangzhou +83 guangzhou +93 guangzhou +103 guangzhou +113 guangzhou +123 guangzhou +133 guangzhou +143 guangzhou +153 guangzhou +163 guangzhou +173 guangzhou +183 guangzhou +193 guangzhou -- !sql5_1 -- -61 +1003 guangzhou +1013 guangzhou +1023 guangzhou +1033 guangzhou +1043 guangzhou +1053 guangzhou +1063 guangzhou +1073 guangzhou +1083 guangzhou +1093 guangzhou +1103 guangzhou +1113 guangzhou +1123 guangzhou +1133 guangzhou +1143 guangzhou +1153 guangzhou +1163 guangzhou +1173 guangzhou +1183 guangzhou +1193 guangzhou -- !sql5_2 -- +2993 guangzhou +2983 guangzhou +2973 guangzhou +2963 guangzhou +2953 guangzhou +2943 guangzhou +2933 guangzhou +2923 guangzhou +2913 guangzhou +2903 guangzhou +2893 guangzhou +2883 guangzhou +2873 guangzhou +2863 guangzhou +2853 guangzhou +2843 guangzhou +2833 guangzhou +2823 guangzhou +2813 guangzhou +2803 guangzhou + +-- !sql5_3 -- +61 + +-- !sql5_4 -- 61 -- !sql6 -- -2 +5 12.5 +15 \N +25 \N +35 87.5 +45 \N +55 \N +65 162.5 +75 \N +85 \N +95 237.5 +105 \N +115 \N +125 312.5 +135 \N +145 \N +155 387.5 +165 \N +175 \N +185 462.5 +195 \N + +-- !sql6_1 -- +1005 \N +1015 \N +1025 2562.5 +1035 \N +1045 \N +1055 2637.5 +1065 \N +1075 \N +1085 2712.5 +1095 \N +1105 \N +1115 2787.5 +1125 \N +1135 \N +1145 2862.5 +1155 \N +1165 \N +1175 2937.5 +1185 \N +1195 \N + +-- !sql6_2 -- +2995 \N +2985 \N +2975 7437.5 +2965 \N +2955 \N +2945 7362.5 +2935 \N +2925 \N +2915 7287.5 +2905 \N +2895 \N +2885 7212.5 +2875 \N +2865 \N +2855 7137.5 +2845 \N +2835 \N +2825 7062.5 +2815 \N +2805 \N -- !sql7 -- +2 + +-- !sql8 -- 0.41 0.99 --- !sql8 -- +-- !sql9 -- \N added_z diff --git a/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy index a99b7b6da5998b..51fa13d9263629 100644 --- a/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy +++ b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy @@ -16,6 +16,7 @@ // under the License. suite("test_pruned_columns") { + sql "set batch_size = 32;" sql """DROP TABLE IF EXISTS `tbl_test_pruned_columns`""" sql """ CREATE TABLE `tbl_test_pruned_columns` ( @@ -23,58 +24,161 @@ suite("test_pruned_columns") { `s` struct>>, value:int> NULL ) ENGINE=OLAP DUPLICATE KEY(`id`) - DISTRIBUTED BY RANDOM BUCKETS AUTO + DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" ); """ sql """ - insert into `tbl_test_pruned_columns` values - (1, named_struct('city', 'beijing', 'data', array(map(1, named_struct('a', 10, 'b', 20.0), 2, named_struct('a', 30, 'b', 40))), 'value', 1)), - (2, named_struct('city', 'shanghai', 'data', array(map(2, named_struct('a', 50, 'b', 40.0), 1, named_struct('a', 70, 'b', 80))), 'value', 2)), - (3, named_struct('city', 'guangzhou', 'data', array(map(1, named_struct('a', 90, 'b', 60.0), 2, named_struct('a', 110, 'b', 40))), 'value', 3)), - (4, named_struct('city', 'shenzhen', 'data', array(map(2, named_struct('a', 130, 'b', 20.0), 1, named_struct('a', 150, 'b', 40))), 'value', 4)), - (5, named_struct('city', 'hangzhou', 'data', array(map(1, named_struct('a', 170, 'b', 80.0), 2, named_struct('a', 190, 'b', 40))), 'value', 5)), - (6, named_struct('city', 'nanjing', 'data', array(map(2, named_struct('a', 210, 'b', 60.0), 1, named_struct('a', 230, 'b', 40))), 'value', 6)), - (7, named_struct('city', 'tianjin', 'data', array(map(1, named_struct('a', 250, 'b', 20.0), 2, named_struct('a', 270, 'b', 40))), 'value', 7)), - (8, named_struct('city', 'chongqing', 'data', array(map(2, named_struct('a', 290, 'b', 80.0), 1, named_struct('a', 310, 'b', 40))), 'value', 8)), - (9, named_struct('city', 'wuhan', 'data', array(map(1, named_struct('a', 330, 'b', 60.0), 2, named_struct('a', 350, 'b', 40))), 'value', 9)), - (10, named_struct('city', 'xian', 'data', array(map(2, named_struct('a', 370, 'b', 20.0), 1, named_struct('a', 390, 'b', 40))), 'value', 10)), - (11, named_struct('city', 'changsha', 'data', array(map(1, named_struct('a', 410, 'b', 80.0), 2, named_struct('a', 430, 'b', 40))), 'value', 11)), - (12, named_struct('city', 'qingdao', 'data', array(map(2, named_struct('a', 450, 'b', 60.0), 1, named_struct('a', 470, 'b', 40))), 'value', 12)), - (13, named_struct('city', 'dalian', 'data', array(map(1, named_struct('a', 490, 'b', 20.0), 2, named_struct('a', 510, 'b', 40))), 'value', 13)); + insert into `tbl_test_pruned_columns` + select + number as id, + named_struct( + 'city', + case (number % 10) + when 0 then 'beijing' + when 1 then 'shanghai' + when 2 then 'shenzhen' + when 3 then 'guangzhou' + when 4 then 'hangzhou' + when 5 then 'chengdu' + when 6 then 'wuhan' + when 7 then 'xian' + when 8 then 'nanjing' + else null + end, + 'data', + array( + map( + 1, named_struct('a', number * 10, 'b', (number * 10 + number % 5) * 1.0), + 2, named_struct('a', number * 10 + 20, 'b', (number % 10 + 1) * 10.0) + ), + map( + (number % 3 + 1), named_struct('a', number * 5, 'b', number * 2.5), + (number % 5 + 2), named_struct('a', number * 3, 'b', number * 1.5) + ) + ), + 'value', + number + ) as s + from numbers("number" = "3000"); """ qt_sql """ - select * from `tbl_test_pruned_columns` order by 1; + select struct_element(s, 'city'), count() from `tbl_test_pruned_columns` group by struct_element(s, 'city') order by 1, 2; """ qt_sql1 """ - select b.id, array_map(x -> struct_element(map_values(x)[1], 'a'), struct_element(s, 'data')) from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id order by 1; + select + b.id + , array_map(x -> struct_element(map_values(x)[1], 'a') + , struct_element(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1, 2 limit 0, 20; + """ + + qt_sql1_1 """ + select + b.id + , array_map(x -> struct_element(map_values(x)[1], 'a') + , struct_element(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1, 2 limit 100, 20; + """ + + qt_sql1_2 """ + select + b.id + , array_map(x -> struct_element(map_values(x)[1], 'a') + , struct_element(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1 desc, 2 limit 100, 20; """ qt_sql2 """ - select id, struct_element(s, 'city') from `tbl_test_pruned_columns` order by 1; + select id, struct_element(s, 'city') from `tbl_test_pruned_columns` order by 1 limit 0, 20; + """ + + qt_sql2_1 """ + select id, struct_element(s, 'city') from `tbl_test_pruned_columns` order by 1 limit 100, 20; + """ + + qt_sql2_2 """ + select id, struct_element(s, 'city') from `tbl_test_pruned_columns` order by 1 desc limit 0, 20; """ qt_sql3 """ - select id, struct_element(s, 'data') from `tbl_test_pruned_columns` order by 1; + select id, struct_element(s, 'data') from `tbl_test_pruned_columns` order by 1 limit 0, 20; + """ + + qt_sql3_1 """ + select id, struct_element(s, 'data') from `tbl_test_pruned_columns` order by 1 limit 200, 20; + """ + + qt_sql3_2 """ + select id, struct_element(s, 'data') from `tbl_test_pruned_columns` order by 1 desc limit 0, 20; """ qt_sql4 """ - select id, struct_element(s, 'data') from `tbl_test_pruned_columns` where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 order by 1; + select + id + , struct_element(s, 'data') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1 limit 0, 20; + """ + + qt_sql4_1 """ + select + id + , struct_element(s, 'data') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1 limit 100, 20; + """ + + qt_sql4_2 """ + select + id + , struct_element(s, 'data') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1 desc limit 0, 20; """ qt_sql5 """ - select id, struct_element(s, 'city') from `tbl_test_pruned_columns` where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 order by 1; + select + id + , struct_element(s, 'city') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1, 2 limit 0, 20; """ qt_sql5_1 """ - select /*+ set enable_prune_nested_column = 1; */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + select + id + , struct_element(s, 'city') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1, 2 limit 100, 20; """ qt_sql5_2 """ + select + id + , struct_element(s, 'city') + from `tbl_test_pruned_columns` + where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 + order by 1 desc, 2 limit 0, 20; + """ + + qt_sql5_3 """ + select /*+ set enable_prune_nested_column = 1; */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + """ + + qt_sql5_4 """ select /*+ set enable_prune_nested_column = 0; */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); """ @@ -98,10 +202,37 @@ suite("test_pruned_columns") { """ qt_sql6 """ - select count(struct_element(dynamic_attributes['theme_preference'], 'confidence_score')) from `tbl_test_pruned_columns_map`; + select + id + , struct_element(struct_element(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where struct_element(s, 'city') = 'chengdu' + order by 1, 2 limit 0, 20; + """ + + qt_sql6_1 """ + select + id + , struct_element(struct_element(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where struct_element(s, 'city') = 'chengdu' + order by 1, 2 limit 100, 20; + """ + + qt_sql6_2 """ + select + id + , struct_element(struct_element(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where struct_element(s, 'city') = 'chengdu' + order by 1 desc, 2 limit 0, 20; """ qt_sql7 """ + select count(struct_element(dynamic_attributes['theme_preference'], 'confidence_score')) from `tbl_test_pruned_columns_map`; + """ + + qt_sql8 """ select struct_element(dynamic_attributes['theme_preference'], 'confidence_score') from `tbl_test_pruned_columns_map` order by id; """ @@ -134,7 +265,7 @@ suite("test_pruned_columns") { INSERT INTO nested_sc_tbl VALUES (3, struct(30.5, 'v3', 888), array(struct(500, 600, 'added_z'), struct(501, 601, 'added_z_2')), map('k3', struct(3, 3.3))); """ - qt_sql8 """ + qt_sql9 """ select struct_element(element_at(arr_s, 1), 'z') as inner_z FROM nested_sc_tbl ORDER BY id; """ } \ No newline at end of file