Skip to content

Commit

Permalink
Refactor ending_splits_in
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoinePrv committed Nov 3, 2023
1 parent 6239da8 commit bdd65ed
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 85 deletions.
19 changes: 11 additions & 8 deletions libmamba/include/mamba/util/string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,17 @@ namespace mamba::util
std::vector<std::wstring>
rsplit(std::wstring_view input, std::wstring_view sep, std::size_t max_split = SIZE_MAX);

/**
* Return the largest suffix from @p str1 that is in @p str2.
*
* Comparison are done as if comparing elements in a split given by @p sep.
* For instance "private/channel" and "channel/label/foo" with separator "/"
* would return "channel", but "private/chan" and "channel/label/foo"
* would return the empty string.
*/
std::string_view
ending_splits_in(std::string_view str1, std::string_view str2, std::string_view sep);

void replace_all(std::string& data, std::string_view search, std::string_view replace);
void replace_all(std::wstring& data, std::wstring_view search, std::wstring_view replace);

Expand Down Expand Up @@ -655,14 +666,6 @@ namespace mamba::util
return hex_string(buffer, buffer.size());
}

/**
* Return the common parts of two strings by blocks located between the given sep,
* and considering that these common parts would be located at the end of str1 (search from
* left to right).
* str1 is considered smaller than (or equal to) str2.
* cf. Channels use case.
*/
std::string get_common_parts(std::string_view str1, std::string_view str2, std::string_view sep);
}

#endif
2 changes: 1 addition & 1 deletion libmamba/src/core/channel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ namespace mamba
if (combined_name != name)
{
// Find common string between `name` and `combined_name`
auto common_str = util::get_common_parts(combined_name, name, "/");
auto common_str = util::ending_splits_in(combined_name, name, "/");
// Combine names properly
if (common_str.empty())
{
Expand Down
118 changes: 75 additions & 43 deletions libmamba/src/util/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,81 @@ namespace mamba::util
return rsplit<decltype(input)::value_type>(input, sep, max_split);
}

/*************************************
* Implementation of ending_splits *
*************************************/

namespace
{
auto
get_common_part_valid_substr(std::string_view sub, std::string_view main, std::string_view sep)
-> bool
{
auto start = main.find(sub);
auto end = start + sub.size();
const auto sep_size = sep.size();
const auto main_size = main.size();
return
// The substring is found
(start != std::string_view::npos)
&& (
// Either it starts at the begining
(start == 0)
// Or it is found after a separator
|| ((start >= sep_size) && starts_with(main.substr(start - sep_size), sep))
)
&& (
// Either it ends at the end
(end == main_size)
// Or it is found before a separator
|| ((end <= main_size) && ends_with(main.substr(0, end + sep_size), sep))
);
}
}

std::string_view
ending_splits_in(std::string_view str1, std::string_view str2, std::string_view sep)
{
static constexpr auto npos = std::string_view::npos;

if (str1.empty() || str2.empty())
{
return {};
}

auto split1 = str1.rfind(sep);

// str1 has only one segment, easy base case
if (split1 == npos)
{
if (get_common_part_valid_substr(str1, str2, sep))
{
return str1;
}
else
{
return {};
}
}

auto candidate = str1.substr(split1 + sep.size());
auto best_candidate = std::string_view{};
// In the case we find a match, we try to grow it as much as possible
while (get_common_part_valid_substr(candidate, str2, sep))
{
best_candidate = candidate;
if (split1 == npos)
{
break;
}
split1 = str1.rfind(sep, split1 - sep.size());
candidate = str1.substr((split1 == npos) ? 0 : (split1 + sep.size()));
}

// Return the best match, or nothing, we are not interested in non terminating matches
return best_candidate;
}

/*****************************************
* Implementation of replace functions *
*****************************************/
Expand Down Expand Up @@ -794,47 +869,4 @@ namespace mamba::util
}

}

/********************************************************
* Implementation of Channels use case util function *
*******************************************************/

std::string get_common_parts(std::string_view str1, std::string_view str2, std::string_view sep)
{
std::string common_str{ str1 };
while ((str2.find(common_str) == std::string::npos))
{
if (common_str.find(sep) != std::string::npos)
{
common_str = common_str.substr(common_str.find(sep) + 1);
}
else
{
return "";
}
}

// Case of non empty common_str
// Check that subparts of common_str are not substrings of elements between the sep
auto vec1 = split(common_str, sep);
auto vec2 = split(str2, sep);
std::vector<std::string> res_vec;
for (std::size_t idx = 0; idx < vec1.size(); ++idx)
{
auto it = std::find(vec2.begin(), vec2.end(), vec1.at(idx));
if (it != vec2.end())
{
res_vec.emplace_back(vec1.at(idx));
}
else
{
if (idx != 0)
{
return join(sep, res_vec);
}
}
}

return join(sep, res_vec);
}
}
68 changes: 35 additions & 33 deletions libmamba/tests/src/util/test_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "doctest-printer/array.hpp"
#include "doctest-printer/optional.hpp"

namespace mamba::util
using namespace mamba::util;

namespace
{
TEST_SUITE("util::string")
{
Expand Down Expand Up @@ -442,7 +444,7 @@ namespace mamba::util
CHECK_EQ(joined, "a-bc-d");
}
{
std::vector<fs::u8path> to_join = { "/a", "bc", "d" };
std::vector<mamba::fs::u8path> to_join = { "/a", "bc", "d" };
auto joined = join("/", to_join);
static_assert(std::is_same<decltype(joined), decltype(to_join)::value_type>::value);
CHECK_EQ(joined, "/a/bc/d");
Expand Down Expand Up @@ -500,37 +502,37 @@ namespace mamba::util
CHECK_EQ(concat("aa", std::string("bb"), std::string_view("cc"), 'd'), "aabbccd");
}

TEST_CASE("get_common_parts")
{
CHECK_EQ(get_common_parts("", "", "/"), "");
CHECK_EQ(get_common_parts("", "test", "/"), "");
CHECK_EQ(get_common_parts("test", "test", "/"), "test");
CHECK_EQ(get_common_parts("test/chan", "test/chan", "/"), "test/chan");
CHECK_EQ(get_common_parts("st/ch", "test/chan", "/"), "");
CHECK_EQ(get_common_parts("st/chan", "test/chan", "/"), "chan");
CHECK_EQ(get_common_parts("st/chan/abc", "test/chan/abc", "/"), "chan/abc");
CHECK_EQ(get_common_parts("test/ch", "test/chan", "/"), "test");
CHECK_EQ(get_common_parts("test/an/abc", "test/chan/abc", "/"), "abc");
CHECK_EQ(get_common_parts("test/chan/label", "label/abcd/xyz", "/"), "label");
CHECK_EQ(get_common_parts("test/chan/label", "chan/label/abcd", "/"), "chan/label");
CHECK_EQ(get_common_parts("test/chan/label", "abcd/chan/label", "/"), "chan/label");
CHECK_EQ(get_common_parts("test", "abcd", "/"), "");
CHECK_EQ(get_common_parts("test", "abcd/xyz", "/"), "");
CHECK_EQ(get_common_parts("test/xyz", "abcd/xyz", "/"), "xyz");
CHECK_EQ(get_common_parts("test/xyz", "abcd/gef", "/"), "");
CHECK_EQ(get_common_parts("abcd/test", "abcd/xyz", "/"), "");

CHECK_EQ(get_common_parts("", "", "."), "");
CHECK_EQ(get_common_parts("", "test", "."), "");
CHECK_EQ(get_common_parts("test", "test", "."), "test");
CHECK_EQ(get_common_parts("test.chan", "test.chan", "."), "test.chan");
CHECK_EQ(get_common_parts("test.chan.label", "chan.label.abcd", "."), "chan.label");
CHECK_EQ(get_common_parts("test/chan/label", "chan/label/abcd", "."), "");
CHECK_EQ(get_common_parts("st/ch", "test/chan", "."), "");
CHECK_EQ(get_common_parts("st.ch", "test.chan", "."), "");

CHECK_EQ(get_common_parts("test..chan", "test..chan", ".."), "test..chan");
TEST_CASE("ending_splits_in")
{
CHECK_EQ(ending_splits_in("", "", "/"), "");
CHECK_EQ(ending_splits_in("", "test", "/"), "");
CHECK_EQ(ending_splits_in("test", "test", "/"), "test");
CHECK_EQ(ending_splits_in("test/chan", "test/chan", "/"), "test/chan");
CHECK_EQ(ending_splits_in("st/ch", "test/chan", "/"), "");
CHECK_EQ(ending_splits_in("st/chan", "test/chan", "/"), "chan");
CHECK_EQ(ending_splits_in("st/chan/abc", "test/chan/abc", "/"), "chan/abc");
CHECK_EQ(ending_splits_in("test/an/abc", "test/chan/abc", "/"), "abc");
CHECK_EQ(ending_splits_in("test/chan/label", "label/abcd/xyz", "/"), "label");
CHECK_EQ(ending_splits_in("test/chan/label", "chan/label/abcd", "/"), "chan/label");
CHECK_EQ(ending_splits_in("test/chan/label", "abcd/chan/label", "/"), "chan/label");
CHECK_EQ(ending_splits_in("test", "abcd", "/"), "");
CHECK_EQ(ending_splits_in("test", "abcd/xyz", "/"), "");
CHECK_EQ(ending_splits_in("test/xyz", "abcd/xyz", "/"), "xyz");
CHECK_EQ(ending_splits_in("test/xyz", "abcd/gef", "/"), "");
CHECK_EQ(ending_splits_in("abcd/test", "abcd/xyz", "/"), "");
CHECK_EQ(ending_splits_in("test/ch", "test/chan", "/"), "");

CHECK_EQ(ending_splits_in("", "", "."), "");
CHECK_EQ(ending_splits_in("", "test", "."), "");
CHECK_EQ(ending_splits_in("test", "test", "."), "test");
CHECK_EQ(ending_splits_in("test.chan", "test.chan", "."), "test.chan");
CHECK_EQ(ending_splits_in("test.chan.label", "chan.label.abcd", "."), "chan.label");
CHECK_EQ(ending_splits_in("test/chan/label", "chan/label/abcd", "."), "");
CHECK_EQ(ending_splits_in("st/ch", "test/chan", "."), "");
CHECK_EQ(ending_splits_in("st.ch", "test.chan", "."), "");

CHECK_EQ(ending_splits_in("test..chan", "test..chan", ".."), "test..chan");
CHECK_EQ(ending_splits_in("test./chan", "test./chan", "./"), "test./chan");
}
}

} // namespace mamba

0 comments on commit bdd65ed

Please sign in to comment.