Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[opt](mow) merge delete bitmap when compaction #41451

Open
wants to merge 4 commits into
base: branch-2.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,8 @@ DEFINE_mInt64(tablet_meta_serialize_size_limit, "1610612736");
DEFINE_Validator(tablet_meta_serialize_size_limit,
[](const int64_t config) -> bool { return config < 1717986918; });

DEFINE_mBool(merge_mow_delete_bitmap_when_compaction, "false")

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,8 @@ DECLARE_mBool(ignore_schema_change_check);

DECLARE_mInt64(tablet_meta_serialize_size_limit);

DECLARE_mBool(merge_mow_delete_bitmap_when_compaction);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
9 changes: 5 additions & 4 deletions be/src/olap/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows,
&location_map, _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
&output_rowset_delete_bitmap, _output_version.second);
if (!allow_delete_in_cumu_compaction()) {
missed_rows_size = missed_rows.size();
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION &&
Expand Down Expand Up @@ -830,7 +830,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, version.second + 1, missed_rows.get(),
location_map.get(), _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
&output_rowset_delete_bitmap, _output_version.second);
if (missed_rows) {
missed_rows_size = missed_rows->size();
if (stats != nullptr && stats->merged_rows != missed_rows_size &&
Expand Down Expand Up @@ -876,7 +876,8 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
DeleteBitmap txn_output_delete_bitmap(_tablet->tablet_id());
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, UINT64_MAX, missed_rows.get(),
location_map.get(), *it.delete_bitmap.get(), &txn_output_delete_bitmap);
location_map.get(), *it.delete_bitmap.get(), &txn_output_delete_bitmap,
_output_version.second);
if (config::enable_merge_on_write_correctness_check) {
RowsetIdUnorderedSet rowsetids;
rowsetids.insert(_output_rowset->rowset_id());
Expand All @@ -897,7 +898,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, version.second, UINT64_MAX,
missed_rows.get(), location_map.get(), _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
&output_rowset_delete_bitmap, _output_version.second);

if (missed_rows) {
DCHECK_EQ(missed_rows->size(), missed_rows_size);
Expand Down
21 changes: 17 additions & 4 deletions be/src/olap/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3817,7 +3817,8 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets, const RowIdConversion& rowid_conversion,
uint64_t start_version, uint64_t end_version, std::set<RowLocation>* missed_rows,
std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map,
const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap) {
const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap,
uint64_t min_version) {
RowLocation src;
RowLocation dst;
for (auto& rowset : input_rowsets) {
Expand All @@ -3831,6 +3832,11 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
for (auto iter = subset_map.delete_bitmap.begin();
iter != subset_map.delete_bitmap.end(); ++iter) {
auto cur_version = std::get<2>(iter->first);
auto output_version = cur_version;
if (config::merge_mow_delete_bitmap_when_compaction && cur_version < min_version) {
output_version = min_version;
}
size_t size = 0;
for (auto index = iter->second.begin(); index != iter->second.end(); ++index) {
src.row_id = *index;
if (rowid_conversion.get(src, &dst) != 0) {
Expand All @@ -3847,12 +3853,19 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
<< dst.rowset_id << "|" << dst.segment_id << "|" << dst.row_id
<< " src location: |" << src.rowset_id << "|" << src.segment_id
<< "|" << src.row_id << " start version: " << start_version
<< "end version" << end_version;
<< "end version" << end_version << " cur_version: " << cur_version;
++size;
if (location_map) {
(*location_map)[rowset].emplace_back(src, dst);
}
output_rowset_delete_bitmap->add({dst.rowset_id, dst.segment_id, cur_version},
dst.row_id);
output_rowset_delete_bitmap->add(
{dst.rowset_id, dst.segment_id, output_version}, dst.row_id);
}
if (size > 0 && output_version > cur_version) {
LOG(WARNING) << "Convert " << size
<< " rows delete bitmap from bitmap key: " << src.rowset_id << "|"
<< src.segment_id << "|" << cur_version
<< " to rs: " << dst.rowset_id << ", version: " << output_version;
}
}
}
Expand Down
10 changes: 9 additions & 1 deletion be/src/olap/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,12 +510,20 @@ class Tablet : public BaseTablet {
CalcDeleteBitmapToken* token, RowsetWriter* rowset_writer = nullptr);

Status update_delete_bitmap(TabletTxnInfo* txn_info, int64_t txn_id);
/**
* min_version: The min version of output delete bitmap.
* As show in #41447, there are still some issues that have not been identified and affect the
* import speed.
* But, we can merge the delete bitmap and reduce the number of delete bitmaps to improve
* the load speed.
*/
void calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets,
const RowIdConversion& rowid_conversion, uint64_t start_version, uint64_t end_version,
std::set<RowLocation>* missed_rows,
std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map,
const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap);
const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap,
uint64_t min_version);
void merge_delete_bitmap(const DeleteBitmap& delete_bitmap);
Status check_rowid_conversion(
RowsetSharedPtr dst_rowset,
Expand Down
14 changes: 13 additions & 1 deletion be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,9 @@ void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
}

void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
VLOG_DEBUG << "Merge rs: " << std::get<0>(bmk) << "|seg: " << std::get<1>(bmk) << "|"
<< std::get<2>(bmk) << " " << segment_delete_bitmap.cardinality()
<< " to delete bitmap in " << _tablet_id;
std::lock_guard l(lock);
auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap);
if (!succ) {
Expand All @@ -1106,7 +1109,16 @@ void DeleteBitmap::merge(const DeleteBitmap& other) {
std::lock_guard l(lock);
for (auto& i : other.delete_bitmap) {
auto [j, succ] = this->delete_bitmap.insert(i);
if (!succ) j->second |= i.second;
if (!succ) {
VLOG_DEBUG << "Merge rs: " << std::get<0>(j->first) << "|seg: " << std::get<1>(j->first)
<< "|" << std::get<2>(j->first) << " " << j->second.cardinality()
<< " to delete bitmap in " << _tablet_id;
j->second |= i.second;
} else {
VLOG_DEBUG << "Insert rs: " << std::get<0>(j->first)
<< "|seg: " << std::get<1>(j->first) << "|" << std::get<2>(j->first) << " "
<< j->second.cardinality() << " to delete bitmap in " << _tablet_id;
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/tablet_meta_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,10 @@ Status TabletMetaManager::remove_old_version_delete_bitmap(DataDir* store, TTabl
remove_keys.push_back(key);
return true;
};
auto st = meta->iterate(META_COLUMN_FAMILY_INDEX, begin_key, get_remove_keys_func);
LOG(INFO) << "remove old version delete bitmap, tablet_id: " << tablet_id
<< " version: " << version << " removed keys size: " << remove_keys.size();
;
RETURN_IF_ERROR(meta->iterate(META_COLUMN_FAMILY_INDEX, begin_key, get_remove_keys_func));
RETURN_IF_ERROR(st);
return meta->remove(META_COLUMN_FAMILY_INDEX, remove_keys);
}

Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/task/engine_storage_migration_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_STORAGE_MIGRATION_TASK_H
#define DORIS_BE_SRC_OLAP_TASK_ENGINE_STORAGE_MIGRATION_TASK_H

#include <stdint.h>

#include <gen_cpp/olap_file.pb.h>
liutang123 marked this conversation as resolved.
Show resolved Hide resolved
#include <stdint.h>

liutang123 marked this conversation as resolved.
Show resolved Hide resolved
#include <mutex>
#include <shared_mutex>
Expand Down
Loading