Skip to content

Commit

Permalink
Replace get_common_parts with concat_dedup
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoinePrv committed Nov 6, 2023
1 parent 645a9b7 commit f06f5e9
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 103 deletions.
11 changes: 6 additions & 5 deletions libmamba/include/mamba/util/string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,16 @@ namespace mamba::util
rsplit(std::wstring_view input, std::wstring_view sep, std::size_t max_split = SIZE_MAX);

/**
* Return the largest suffix from @p str1 that is in @p str2.
* Concatenate string while removing the suffix of the first that may be prefix of second.
*
* Comparison are done as if comparing elements in a split given by @p sep.
* For instance "private/channel" and "channel/label/foo" with separator "/"
* would return "channel", but "private/chan" and "channel/label/foo"
* would return the empty string.
* would return "private/channel/label/foo", but "private/chan" and "channel/label/foo"
* would return the "private/chan/channel/label/foo".
*/
std::string_view
ending_splits_in(std::string_view str1, std::string_view str2, std::string_view sep);
std::string concat_dedup_splits(std::string_view str1, std::string_view str2, char sep);
std::string
concat_dedup_splits(std::string_view str1, std::string_view str2, std::string_view sep);

void replace_all(std::string& data, std::string_view search, std::string_view replace);
void replace_all(std::wstring& data, std::wstring_view search, std::wstring_view replace);
Expand Down
26 changes: 6 additions & 20 deletions libmamba/src/core/channel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,26 +372,12 @@ namespace mamba
// where `name == private/testchannel` and we need to join the remaining label part
// of the channel (e.g. -c testchannel/mylabel/xyz)
// needs to result in `name = private/testchannel/mylabel/xyz`
std::string combined_name = it->second.url().path();
if (combined_name != name)
{
// Find common string between `name` and `combined_name`
auto common_str = util::ending_splits_in(combined_name, name, "/");
// Combine names properly
if (common_str.empty())
{
url.append_path(name);
combined_name += "/" + name;
}
else
{
// NOTE We assume that the `common_str`, if not empty, is necessarily at the
// beginning of `name` and at the end of `combined_name` (I don't know about
// other use cases for now)
combined_name += name.substr(common_str.size());
url.append_path(name.substr(common_str.size()));
}
}
std::string combined_name = util::concat_dedup_splits(
util::rstrip(url.path(), '/'),
util::lstrip(name, '/'),
'/'
);
url.set_path(combined_name);

set_fallback_credential_from_db(url, m_context.authentication_info());
return Channel(
Expand Down
117 changes: 71 additions & 46 deletions libmamba/src/util/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,73 +740,98 @@ namespace mamba::util

namespace
{
auto
get_common_part_valid_substr(std::string_view sub, std::string_view main, std::string_view sep)
-> bool
template <typename Char, typename CharOrStrView>
auto starts_with_split(
std::basic_string_view<Char> str,
std::basic_string_view<Char> prefix,
CharOrStrView sep
) -> bool
{
auto start = main.find(sub);
auto end = start + sub.size();
const auto sep_size = sep.size();
const auto main_size = main.size();
auto end = prefix.size();
const auto sep_size = detail::length(sep);
const auto str_size = str.size();
return
// The substring is found
(start != std::string_view::npos)
&& (
// Either it starts at the begining
(start == 0)
// Or it is found after a separator
|| ((start >= sep_size) && starts_with(main.substr(start - sep_size), sep))
)
starts_with(str, prefix)
&& (
// Either it ends at the end
(end == main_size)
(end == str_size)
// Or it is found before a separator
|| ((end <= main_size) && ends_with(main.substr(0, end + sep_size), sep))
|| ((end <= str_size) && ends_with(str.substr(0, end + sep_size), sep))
);
}
}

std::string_view
ending_splits_in(std::string_view str1, std::string_view str2, std::string_view sep)
{
static constexpr auto npos = std::string_view::npos;

if (str1.empty() || str2.empty())
template <typename Char, typename CharOrStrView>
auto remove_suffix_splits(
std::basic_string_view<Char> str1,
std::basic_string_view<Char> str2,
CharOrStrView sep
) -> std::basic_string_view<Char>
{
return {};
}
static constexpr auto npos = std::basic_string_view<Char>::npos;

auto split1 = str1.rfind(sep);
assert(!str1.empty());
assert(!str2.empty());
const auto sep_size = detail::length(sep);
assert(sep_size > 0);

// str1 has only one segment, easy base case
if (split1 == npos)
{
if (get_common_part_valid_substr(str1, str2, sep))
{
return str1;
}
else
auto get_common_candidate = [&](auto split)
{ return str1.substr((split == npos) ? 0 : split + sep_size); };

auto split1 = str1.rfind(sep);

// In the case we did not find a match, we try a bigger common part
while (!starts_with_split(str2, get_common_candidate(split1), sep))
{
return {};
if ((split1 == npos) || (split1 < sep_size))
{
// No further possibility to find a match, nothing to remove
return str1;
}
// Add the next split element
split1 = str1.rfind(sep, split1 - sep_size);
}

return str1.substr(0, (split1 == npos) ? 0 : split1);
}

auto candidate = str1.substr(split1 + sep.size());
auto best_candidate = std::string_view{};
// In the case we find a match, we try to grow it as much as possible
while (get_common_part_valid_substr(candidate, str2, sep))
template <typename Char, typename CharOrStrView>
auto concat_dedup_splits_impl(
std::basic_string_view<Char> str1,
std::basic_string_view<Char> str2,
CharOrStrView sep
) -> std::basic_string<Char>
{
best_candidate = candidate;
if ((split1 == npos) || (split1 == 0))
if (str1.empty())
{
return std::string(str2);
}
if (str2.empty())
{
return std::string(str1);
}
if (detail::length(sep) < 1)
{
break;
throw std::invalid_argument("Cannot split on empty separator");
}
split1 = str1.rfind(sep, split1 - sep.size());
candidate = str1.substr((split1 == npos) ? 0 : (split1 + sep.size()));
auto str1_no_suffix = remove_suffix_splits(str1, str2, sep);
if (str1_no_suffix.empty())
{
return concat(str1_no_suffix, str2);
}
return concat(str1_no_suffix, sep, str2);
}
}

// Return the best match, or nothing, we are not interested in non terminating matches
return best_candidate;
std::string concat_dedup_splits(std::string_view str1, std::string_view str2, char sep)
{
return concat_dedup_splits_impl(str1, str2, sep);
}

std::string
concat_dedup_splits(std::string_view str1, std::string_view str2, std::string_view sep)
{
return concat_dedup_splits_impl(str1, str2, sep);
}

/*****************************************
Expand Down
97 changes: 65 additions & 32 deletions libmamba/tests/src/util/test_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,38 +502,71 @@ namespace
CHECK_EQ(concat("aa", std::string("bb"), std::string_view("cc"), 'd'), "aabbccd");
}

TEST_CASE("ending_splits_in")
{
CHECK_EQ(ending_splits_in("", "", "/"), "");
CHECK_EQ(ending_splits_in("", "test", "/"), "");
CHECK_EQ(ending_splits_in("test", "test", "/"), "test");
CHECK_EQ(ending_splits_in("test/chan", "test/chan", "/"), "test/chan");
CHECK_EQ(ending_splits_in("st/ch", "test/chan", "/"), "");
CHECK_EQ(ending_splits_in("st/chan", "test/chan", "/"), "chan");
CHECK_EQ(ending_splits_in("st/chan/abc", "test/chan/abc", "/"), "chan/abc");
CHECK_EQ(ending_splits_in("test/an/abc", "test/chan/abc", "/"), "abc");
CHECK_EQ(ending_splits_in("test/chan/label", "label/abcd/xyz", "/"), "label");
CHECK_EQ(ending_splits_in("test/chan/label", "chan/label/abcd", "/"), "chan/label");
CHECK_EQ(ending_splits_in("test/chan/label", "abcd/chan/label", "/"), "chan/label");
CHECK_EQ(ending_splits_in("test", "abcd", "/"), "");
CHECK_EQ(ending_splits_in("test", "abcd/xyz", "/"), "");
CHECK_EQ(ending_splits_in("test/xyz", "abcd/xyz", "/"), "xyz");
CHECK_EQ(ending_splits_in("test/xyz", "abcd/gef", "/"), "");
CHECK_EQ(ending_splits_in("abcd/test", "abcd/xyz", "/"), "");
CHECK_EQ(ending_splits_in("test/ch", "test/chan", "/"), "");
CHECK_EQ(ending_splits_in("pkgs/main", "pkgs/main/noarch", "/"), "pkgs/main");

CHECK_EQ(ending_splits_in("", "", "."), "");
CHECK_EQ(ending_splits_in("", "test", "."), "");
CHECK_EQ(ending_splits_in("test", "test", "."), "test");
CHECK_EQ(ending_splits_in("test.chan", "test.chan", "."), "test.chan");
CHECK_EQ(ending_splits_in("test.chan.label", "chan.label.abcd", "."), "chan.label");
CHECK_EQ(ending_splits_in("test/chan/label", "chan/label/abcd", "."), "");
CHECK_EQ(ending_splits_in("st/ch", "test/chan", "."), "");
CHECK_EQ(ending_splits_in("st.ch", "test.chan", "."), "");

CHECK_EQ(ending_splits_in("test..chan", "test..chan", ".."), "test..chan");
CHECK_EQ(ending_splits_in("test./chan", "test./chan", "./"), "test./chan");
TEST_CASE("concat_dedup_splits")
{
for (std::string_view sep : { "/", "//", "/////", "./", "./." })
{
CAPTURE(sep);

CHECK_EQ(concat_dedup_splits("", "", sep), "");

CHECK_EQ(
concat_dedup_splits(fmt::format("test{}chan", sep), "", sep),
fmt::format("test{}chan", sep)
);
CHECK_EQ(
concat_dedup_splits("", fmt::format("test{}chan", sep), sep),
fmt::format("test{}chan", sep)
);
CHECK_EQ(
concat_dedup_splits("test", fmt::format("test{}chan", sep), sep),
fmt::format("test{}chan", sep)
);
CHECK_EQ(concat_dedup_splits("test", "chan", sep), fmt::format("test{}chan", sep));
CHECK_EQ(
concat_dedup_splits(fmt::format("test{}chan", sep), "chan", sep),
fmt::format("test{}chan", sep)
);
CHECK_EQ(
concat_dedup_splits(fmt::format("test{}chan", sep), fmt::format("chan{}foo", sep), sep),
fmt::format("test{}chan{}foo", sep, sep)
);
CHECK_EQ(
concat_dedup_splits(
fmt::format("test{}chan-foo", sep),
fmt::format("foo{}bar", sep),
sep
),
fmt::format("test{}chan-foo{}foo{}bar", sep, sep, sep, sep)
);
CHECK_EQ(
concat_dedup_splits(
fmt::format("ab{}test{}chan", sep, sep),
fmt::format("chan{}foo{}ab", sep, sep),
sep
),
fmt::format("ab{}test{}chan{}foo{}ab", sep, sep, sep, sep)
);
CHECK_EQ(
concat_dedup_splits(
fmt::format("{}test{}chan", sep, sep),
fmt::format("chan{}foo{}", sep, sep),
sep
),
fmt::format("{}test{}chan{}foo{}", sep, sep, sep, sep)
);
CHECK_EQ(
concat_dedup_splits(
fmt::format("test{}chan", sep),
fmt::format("chan{}test", sep),
sep
),
fmt::format("test{}chan{}test", sep, sep)
);
}

CHECK_EQ(concat_dedup_splits("test/chan", "chan/foo", "//"), "test/chan//chan/foo");
CHECK_EQ(concat_dedup_splits("test/chan", "chan/foo", '/'), "test/chan/foo");
}
}
} // namespace mamba

0 comments on commit f06f5e9

Please sign in to comment.