Skip to content

Commit

Permalink
revert changes to named_shuffle and arch
Browse files Browse the repository at this point in the history
  • Loading branch information
SadiinsoSnowfall committed Nov 25, 2024
1 parent c6ff5b3 commit 93b76d9
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 204 deletions.
22 changes: 11 additions & 11 deletions include/eve/arch/cpu/top_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,24 +114,24 @@ namespace detail

static constexpr bool is_cheap_impl()
{
if constexpr ( has_emulated_abi_v<logical_type> ) return true;
else if constexpr ( is_aggregated ) return top_bits<half_logical>::is_cheap;
if ( has_emulated_abi_v<logical_type> ) return true;
if constexpr ( is_aggregated ) return top_bits<half_logical>::is_cheap;

else if constexpr ( x86_abi<abi_type> ) return true;
else if constexpr ( ppc_abi<abi_type> ) return true;
if ( x86_abi<abi_type> ) return true;
if ( ppc_abi<abi_type> ) return true;

else if constexpr ( arm_abi<abi_type> )
if ( arm_abi<abi_type> )
{
if constexpr ( static_size == 1 ) return true;
else if constexpr ( static_size * sizeof(scalar_type) <= 4 ) return true;
else if constexpr ( current_api >= eve::asimd )
if ( static_size == 1 ) return true;
if ( static_size * sizeof(scalar_type) <= 4 ) return true;
if ( current_api >= eve::asimd )
{
if constexpr ( sizeof(scalar_type) >= 2 ) return true;
if ( sizeof(scalar_type) >= 2 ) return true;
return static_size <= 8; // 16 chars is expensive
}
else return false;
return false;
}
else return false;
return false;
}

public:
Expand Down
8 changes: 4 additions & 4 deletions include/eve/arch/riscv/rvv_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ constexpr auto rvv_lmul_v = []
{
constexpr std::ptrdiff_t m1_len = __riscv_v_fixed_vlen;
constexpr std::ptrdiff_t min_len = m1_len * sizeof(scalar_type) / 8;
constexpr std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value;
constexpr std::ptrdiff_t reg_len = std::max(min_len, expected_len);
if constexpr ( reg_len >= m1_len ) return static_cast<int>(reg_len / m1_len);
std::ptrdiff_t expected_len = sizeof(scalar_type) * 8 * cardinal::value;
std::ptrdiff_t reg_len = std::max(min_len, expected_len);
if( reg_len >= m1_len ) return static_cast<int>(reg_len / m1_len);
else return -static_cast<int>(m1_len / reg_len);
}();

template<plain_scalar_value scalar_type, typename cardinal>
constexpr auto rvv_logical_ratio_v = []
{
constexpr auto lmul = rvv_lmul_v<scalar_type, cardinal>;
auto lmul = rvv_lmul_v<scalar_type, cardinal>;
constexpr auto element_size = sizeof(scalar_type) * 8;
return lmul > 0 ? element_size / lmul : element_size * (-lmul);
}();
Expand Down
68 changes: 30 additions & 38 deletions include/eve/module/core/named_shuffles/blend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,50 +95,42 @@ struct blend_t
level(as<half_t> {}, as<half_t> {}, g, p1));
}

else if constexpr ( ((I == 0) && ...) ) return 0;
else if constexpr ( ((I == 1) && ...) ) return 0;
if( ((I == 0) && ...) ) return 0;
if( ((I == 1) && ...) ) return 0;

else if constexpr ( current_api >= sve ) return logical_simd_value<T> ? 6 : 2;
else if constexpr ( current_api >= avx512 ) return logical_simd_value<T> ? 6 : 2;
else if constexpr ( current_api >= vmx ) return 3;
else
if( current_api >= sve ) return logical_simd_value<T> ? 6 : 2;
if( current_api >= avx512 ) return logical_simd_value<T> ? 6 : 2;
if( current_api >= vmx ) return 3;

const std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;
const std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
const std::size_t count_from_x = ((I == 0) + ...);
const std::size_t count_from_y = ((I == 1) + ...);

if( current_api >= neon )
{
constexpr std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;
constexpr std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
constexpr std::size_t count_from_x = ((I == 0) + ...);
constexpr std::size_t count_from_y = ((I == 1) + ...);
if( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2;
return 3;
}

if constexpr ( current_api >= neon )
{
if constexpr ( current_api >= asimd && (count_from_x == 1 || count_from_y == 1) ) return 2;
else return 3;
}
else if constexpr ( current_api >= sse2 )
{
if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 )
{
using half_t = decltype(T {}.slice(lower_));
auto [p0, p1] = detail::idxm::slice_pattern<pattern_t<I...>::size() / 2>(p);
auto l0 = level(as<half_t> {}, as<half_t> {}, g, p0);
auto l1 = level(as<half_t> {}, as<half_t> {}, g, p1);
return detail::idxm::add_shuffle_levels(std::array<std::ptrdiff_t, 3> {l0, l1, 4});
}
else if constexpr ( current_api >= sse4_1 )
{
return g_size >= 4 ? 2 : 3;
}
else
{
if constexpr ( g_size >= 8 ) return 2;
else if constexpr ( g_size == 2 && reg_size == 4 ) return 6;
else return 7;
}
}
else
if( current_api >= sse2 )
{
if constexpr( current_api == avx && reg_size >= 32 && g_size <= 2 )
{
return 2;
using half_t = decltype(T {}.slice(lower_));
auto [p0, p1] = detail::idxm::slice_pattern<pattern_t<I...>::size() / 2>(p);
auto l0 = level(as<half_t> {}, as<half_t> {}, g, p0);
auto l1 = level(as<half_t> {}, as<half_t> {}, g, p1);
return detail::idxm::add_shuffle_levels(std::array<std::ptrdiff_t, 3> {l0, l1, 4});
}
if( current_api >= sse4_1 ) return g_size >= 4 ? 2 : 3;

if( g_size >= 8 ) return 2;
if( g_size == 2 && reg_size == 4 ) return 6;
return 7;
}

return 2;
}

template<simd_value T, std::ptrdiff_t G>
Expand Down
60 changes: 30 additions & 30 deletions include/eve/module/core/named_shuffles/broadcast_lane.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ struct broadcast_lane_t
template<simd_value T, std::ptrdiff_t G, std::ptrdiff_t I>
static constexpr std::ptrdiff_t level(eve::as<T> tgt, eve::fixed<G> g, eve::index_t<I> i)
{
constexpr std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
constexpr std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;
const std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
const std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;

if constexpr( eve::has_aggregated_abi_v<T> )
{
Expand All @@ -80,47 +80,47 @@ struct broadcast_lane_t
else if constexpr( current_api >= vmx ) return 2;
else if constexpr( current_api >= sve )
{
if constexpr ( !logical_value<T> ) return g_size > 8 ? 3 : 2;
else if constexpr ( G == 1 ) return 4;
else if constexpr ( g_size <= 8 ) return 6;
else return 7;
if( !logical_value<T> ) return g_size > 8 ? 3 : 2;
if( G == 1 ) return 4;
if( g_size <= 8 ) return 6;
return 7;
}
else if constexpr( current_api >= neon )
{
if constexpr ( current_api >= asimd ) return 2;
else if constexpr ( reg_size <= 8 ) return 2;
else return 4;
if( current_api >= asimd ) return 2;
if( reg_size <= 8 ) return 2;
return 4;
}

// x86

if constexpr (current_api == avx512 && logical_value<T>)
if (current_api == avx512 && logical_value<T>)
{
if constexpr (G == 1) return 4;
else return level(detail::mask_type(tgt), g, i) + 4;
if (G == 1) return 4;
return level(detail::mask_type(tgt), g, i) + 4;
}
else if constexpr (reg_size == 64)
{
if constexpr (g_size >= 16) return 2;
else if constexpr (g_size >= 2) return 3;
else return 4;
}
else if constexpr (reg_size == 32)

if (reg_size == 64)
{
if constexpr (g_size >= 16) return 2;
if constexpr (current_api == avx) return 4;
if constexpr (g_size >= 8) return 2;
if constexpr (g_size >= 4) return 3;
if constexpr (g_size >= 2 && current_api == avx512) return 3;
else return 4;
if (g_size >= 16) return 2;
if (g_size >= 2) return 3;
return 4;
}
else

if (reg_size == 32)
{
if constexpr ( g_size >= 4 ) return 2;
else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2;
else if constexpr ( current_api >= ssse3 ) return 3;
else return 4;
if (g_size >= 16) return 2;
if (current_api == avx) return 4;
if (g_size >= 8) return 2;
if (g_size >= 4) return 3;
if (g_size >= 2 && current_api == avx512) return 3;
return 4;
}

if ( g_size >= 4 ) return 2;
if ( g_size == 2 && reg_size <= 8 ) return 2;
if ( current_api >= ssse3 ) return 3;
return 4;
}
};

Expand Down
101 changes: 48 additions & 53 deletions include/eve/module/core/named_shuffles/reverse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,73 +66,68 @@ struct reverse_t
if constexpr( eve::has_aggregated_abi_v<T> )
{
if constexpr( G == T::size() / 2 ) return 0;
else
{
using half_t = decltype(T {}.slice(lower_));
return level(as<half_t> {}, g);
}
using half_t = decltype(T {}.slice(lower_));
return level(as<half_t> {}, g);
}
else
{
constexpr std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;
constexpr std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
constexpr bool is_expected_cardinal = T::size() == eve::expected_cardinal_v<element_type_t<T>>;

if constexpr ( current_api >= sve )
const std::ptrdiff_t g_size = sizeof(element_type_t<T>) * G;
const std::size_t reg_size = sizeof(element_type_t<T>) * T::size();
const bool is_expected_cardinal = T::size() == eve::expected_cardinal_v<element_type_t<T>>;

if( current_api >= sve )
{
if( !logical_value<T> )
{
if constexpr ( !logical_value<T> )
{
if constexpr ( reg_size <= 8 ) return 2;
else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2;
else if constexpr ( is_expected_cardinal && g_size == reg_size / 2 ) return 2;
else return 3;
}
else if constexpr ( is_expected_cardinal && g_size <= 8 ) return 2;
else return level(detail::mask_type(tgt), g) + 4;
if( reg_size <= 8 ) return 2;
if( is_expected_cardinal && g_size <= 8 ) return 2;
if( is_expected_cardinal && g_size == reg_size / 2 ) return 2;
return 3;
}
if( is_expected_cardinal && g_size <= 8 ) return 2;
return level(detail::mask_type(tgt), g) + 4;
}

else if constexpr (current_api >= neon) {
if constexpr ( reg_size <= 8 ) return 2;
if constexpr ( g_size == 8 ) return 2;
if constexpr ( current_api >= asimd ) return 3;
else return 4;
}
if (current_api >= neon) {
if ( reg_size <= 8 ) return 2;
if ( g_size == 8 ) return 2;
if ( current_api >= asimd ) return 3;
return 4;
}

else if constexpr ( current_api >= vmx ) return 3;
if( current_api >= vmx ) return 3;

else if constexpr ( current_api == avx512 && logical_value<T> ) { return level(detail::mask_type(tgt), g) + 4; }
if( current_api == avx512 && logical_value<T> ) { return level(detail::mask_type(tgt), g) + 4; }

else if constexpr ( current_api >= avx2 && reg_size >= 32 )
{
if constexpr ( g_size >= 16 ) return 2;
else if constexpr ( g_size >= 8 ) return reg_size == 64 ? 3 : 2;
else if constexpr ( g_size >= 4 ) return 3;
else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3;
else return 5;
}
if( current_api >= avx2 && reg_size >= 32 )
{
if( g_size >= 16 ) return 2;
if( g_size >= 8 ) return reg_size == 64 ? 3 : 2;
if( g_size >= 4 ) return 3;
if( g_size == 2 && current_api >= avx512 ) return 3;
return 5;
}

else if constexpr ( current_api == avx && reg_size >= 32 )
{
if constexpr ( g_size >= 16 ) return 2;
else if constexpr ( g_size >= 4 ) return 4;
else if constexpr ( g_size == 2 && current_api >= avx512 ) return 3;
else return 9;
}
if( current_api == avx && reg_size >= 32 )
{
if( g_size >= 16 ) return 2;
if( g_size >= 4 ) return 4;
if( g_size == 2 && current_api >= avx512 ) return 3;
return 9;
}

else if constexpr ( g_size >= 4 ) return 2;
else if constexpr ( g_size == 2 && reg_size <= 8 ) return 2;
if( g_size >= 4 ) return 2;
if( g_size == 2 && reg_size <= 8 ) return 2;

else if constexpr ( current_api >= ssse3 ) return 3;
if( current_api >= ssse3 ) return 3;

else if constexpr ( g_size == 2 ) return 6;
if( g_size == 2 ) return 6;

// chars on sse2
else if constexpr ( reg_size == 2 ) return 6;
// chars on sse2
if( reg_size == 2 ) return 6;

// swap chars + reverse shorts
else if constexpr ( reg_size <= 8 ) return 8;
else return 12;
}
// swap chars + reverse shorts
if( reg_size <= 8 ) return 8;
return 12;
}
};

Expand Down
Loading

0 comments on commit 93b76d9

Please sign in to comment.