Skip to content

Commit

Permalink
Merge pull request #107 from tarudoodi/release/ccl_2021.11.2
Browse files Browse the repository at this point in the history
Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.11.2
  • Loading branch information
tarudoodi authored Dec 21, 2023
2 parents ba967fc + b519563 commit 8d18c7b
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 67 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ endif()

set(CCL_MAJOR_VERSION "2021")
set(CCL_MINOR_VERSION "11")
set(CCL_UPDATE_VERSION "0")
set(CCL_UPDATE_VERSION "2")
set(CCL_PRODUCT_STATUS "Gold")
string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
get_vcs_properties("git")
Expand Down
130 changes: 65 additions & 65 deletions src/kernels/bf16.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,68 +13,68 @@
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once

#ifdef CCL_BF16_GPU_TRUNCATE

float __bf16_to_fp32(ushort V) {
uint temp = convert_uint(V) << 16;
return as_float(temp);
}

ushort __fp32_to_bf16(float V) {
ushort2 temp = as_ushort2(V);
return temp.s1;
}

#else // CCL_BF16_GPU_TRUNCATE

#ifdef cl_intel_bfloat16_conversions
#pragma OPENCL EXTENSION cl_intel_bfloat16_conversions : enable
#else // cl_intel_bfloat16_conversions

// declare SPIR-V intrinsics directly
ushort __builtin_spirv_OpConvertFToBF16INTEL_f32(float);
float __builtin_spirv_OpConvertBF16ToFINTEL_i16(ushort);

// implement built-in functions using these intrinsics
#define __ovld __attribute__((overloadable))
ushort __ovld intel_convert_bfloat16_as_ushort(float f) {
return __builtin_spirv_OpConvertFToBF16INTEL_f32(f);
}

float __ovld intel_convert_as_bfloat16_float(ushort u) {
return __builtin_spirv_OpConvertBF16ToFINTEL_i16(u);
}

#endif // cl_intel_bfloat16_conversions

float __bf16_to_fp32(ushort V) {
return intel_convert_as_bfloat16_float(V);
}

ushort __fp32_to_bf16(float V) {
return intel_convert_bfloat16_as_ushort(V);
}

#endif // CCL_BF16_GPU_TRUNCATE

#define DEFINE_BF16SUM_OP(T) \
T __bf16_sum_##T(T lhs, T rhs) { \
return __fp32_to_bf16(__bf16_to_fp32(lhs) + __bf16_to_fp32(rhs)); \
}

#define DEFINE_BF16PROD_OP(T) \
T __bf16_prod_##T(T lhs, T rhs) { \
return __fp32_to_bf16(__bf16_to_fp32(lhs) * __bf16_to_fp32(rhs)); \
}

#define DEFINE_BF16MIN_OP(T) \
T __bf16_min_##T(T lhs, T rhs) { \
return __fp32_to_bf16(min(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
}

#define DEFINE_BF16MAX_OP(T) \
T __bf16_max_##T(T lhs, T rhs) { \
return __fp32_to_bf16(max(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
}
#pragma once

#ifdef CCL_BF16_GPU_TRUNCATE

float __bf16_to_fp32(ushort V) {
uint temp = convert_uint(V) << 16;
return as_float(temp);
}

ushort __fp32_to_bf16(float V) {
ushort2 temp = as_ushort2(V);
return temp.s1;
}

#else // CCL_BF16_GPU_TRUNCATE

#ifdef cl_intel_bfloat16_conversions
#pragma OPENCL EXTENSION cl_intel_bfloat16_conversions : enable
#else // cl_intel_bfloat16_conversions

// declare SPIR-V intrinsics directly
ushort __builtin_IB_ftobf_1(float);
float __builtin_IB_bftof_1(ushort);

// implement built-in functions using these intrinsics
#define __ovld __attribute__((overloadable))
ushort __ovld intel_convert_bfloat16_as_ushort(float f) {
return __builtin_IB_ftobf_1(f);
}

float __ovld intel_convert_as_bfloat16_float(ushort u) {
return __builtin_IB_bftof_1(u);
}

#endif // cl_intel_bfloat16_conversions

float __bf16_to_fp32(ushort V) {
return intel_convert_as_bfloat16_float(V);
}

ushort __fp32_to_bf16(float V) {
return intel_convert_bfloat16_as_ushort(V);
}

#endif // CCL_BF16_GPU_TRUNCATE

#define DEFINE_BF16SUM_OP(T) \
T __bf16_sum_##T(T lhs, T rhs) { \
return __fp32_to_bf16(__bf16_to_fp32(lhs) + __bf16_to_fp32(rhs)); \
}

#define DEFINE_BF16PROD_OP(T) \
T __bf16_prod_##T(T lhs, T rhs) { \
return __fp32_to_bf16(__bf16_to_fp32(lhs) * __bf16_to_fp32(rhs)); \
}

#define DEFINE_BF16MIN_OP(T) \
T __bf16_min_##T(T lhs, T rhs) { \
return __fp32_to_bf16(min(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
}

#define DEFINE_BF16MAX_OP(T) \
T __bf16_max_##T(T lhs, T rhs) { \
return __fp32_to_bf16(max(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
}
Binary file modified src/kernels/kernels.spv
Binary file not shown.
2 changes: 1 addition & 1 deletion third-party-programs.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Intel(R) oneAPI Collective Communications Library (oneCCL)
2021.11.1 Third Party Programs File
2021.11.2 Third Party Programs File

This file is the "third-party-programs.txt" file specified in the associated
Intel end user license agreement for the Intel software you are licensing.
Expand Down

0 comments on commit 8d18c7b

Please sign in to comment.