Skip to content

Commit

Permalink
Merge pull request #249 from 1261385937/performance_improve
Browse files Browse the repository at this point in the history
ColumnString improve performance(26%) by avoiding vector reallocate
  • Loading branch information
Enmk authored Nov 23, 2022
2 parents 51c62ce + 4f463cb commit 4a186a8
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 22 deletions.
40 changes: 18 additions & 22 deletions clickhouse/columns/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) {
+ std::to_string(str.size()) + " bytes.");
}

if (data_.capacity() - data_.size() < str.size())
{
if (data_.capacity() - data_.size() < str.size()) {
// round up to the next block size
const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE;
data_.reserve(new_size);
Expand Down Expand Up @@ -129,13 +128,11 @@ struct ColumnString::Block
data_(new CharT[capacity])
{}

inline auto GetAvailable() const
{
inline auto GetAvailable() const {
return capacity - size;
}

std::string_view AppendUnsafe(std::string_view str)
{
std::string_view AppendUnsafe(std::string_view str) {
const auto pos = &data_[size];

memcpy(pos, str.data(), str.size());
Expand All @@ -144,13 +141,11 @@ struct ColumnString::Block
return std::string_view(pos, str.size());
}

auto GetCurrentWritePos()
{
auto GetCurrentWritePos() {
return &data_[size];
}

std::string_view ConsumeTailAsStringViewUnsafe(size_t len)
{
std::string_view ConsumeTailAsStringViewUnsafe(size_t len) {
const auto start = &data_[size];
size += len;
return std::string_view(start, len);
Expand All @@ -166,14 +161,21 @@ ColumnString::ColumnString()
{
}

ColumnString::ColumnString(size_t element_count)
: Column(Type::CreateString())
{
items_.reserve(element_count);
// 100 is arbitrary number, assumption that string values are about ~40 bytes long.
blocks_.reserve(std::max<size_t>(1, element_count / 100));
}

ColumnString::ColumnString(const std::vector<std::string>& data)
: ColumnString()
{
items_.reserve(data.size());
blocks_.emplace_back(ComputeTotalSize(data));

for (const auto & s : data)
{
for (const auto & s : data) {
AppendUnsafe(s);
}
};
Expand All @@ -194,21 +196,15 @@ ColumnString::~ColumnString()
{}

void ColumnString::Append(std::string_view str) {
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length())
{
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
}

items_.emplace_back(blocks_.back().AppendUnsafe(str));
}

void ColumnString::Append(const char* str) {
auto len = strlen(str);
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len));
}

items_.emplace_back(blocks_.back().AppendUnsafe(str));
Append(std::string_view(str, strlen(str)));
}

void ColumnString::Append(std::string&& steal_value) {
Expand Down Expand Up @@ -295,10 +291,10 @@ ColumnRef ColumnString::Slice(size_t begin, size_t len) const {

if (begin < items_.size()) {
len = std::min(len, items_.size() - begin);
result->items_.reserve(len);

result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
for (size_t i = begin; i < begin + len; ++i)
{
for (size_t i = begin; i < begin + len; ++i) {
result->Append(items_[i]);
}
}
Expand Down
1 change: 1 addition & 0 deletions clickhouse/columns/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class ColumnString : public Column {
ColumnString();
~ColumnString();

explicit ColumnString(size_t element_count);
explicit ColumnString(const std::vector<std::string> & data);
explicit ColumnString(std::vector<std::string>&& data);
ColumnString& operator=(const ColumnString&) = delete;
Expand Down

0 comments on commit 4a186a8

Please sign in to comment.