Merge pull request #107 from tarudoodi/release/ccl_2021.11.2

Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.11.2
oneapi-src · Dec 21, 2023 · 8d18c7b · 8d18c7b
2 parents ba967fc + b519563
commit 8d18c7b
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 67 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -303,7 +303,7 @@ endif()
 
 set(CCL_MAJOR_VERSION     "2021")
 set(CCL_MINOR_VERSION     "11")
-set(CCL_UPDATE_VERSION    "0")
+set(CCL_UPDATE_VERSION    "2")
 set(CCL_PRODUCT_STATUS    "Gold")
 string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
 get_vcs_properties("git")

diff --git a/src/kernels/bf16.h b/src/kernels/bf16.h
@@ -13,68 +13,68 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 */
-#pragma once
-
-#ifdef CCL_BF16_GPU_TRUNCATE
-
-float __bf16_to_fp32(ushort V) {
-    uint temp = convert_uint(V) << 16;
-    return as_float(temp);
-}
-
-ushort __fp32_to_bf16(float V) {
-    ushort2 temp = as_ushort2(V);
-    return temp.s1;
-}
-
-#else // CCL_BF16_GPU_TRUNCATE
-
-#ifdef cl_intel_bfloat16_conversions
-#pragma OPENCL EXTENSION cl_intel_bfloat16_conversions : enable
-#else // cl_intel_bfloat16_conversions
-
-// declare SPIR-V intrinsics directly
-ushort __builtin_spirv_OpConvertFToBF16INTEL_f32(float);
-float __builtin_spirv_OpConvertBF16ToFINTEL_i16(ushort);
-
-// implement built-in functions using these intrinsics
-#define __ovld __attribute__((overloadable))
-ushort __ovld intel_convert_bfloat16_as_ushort(float f) {
-    return __builtin_spirv_OpConvertFToBF16INTEL_f32(f);
-}
-
-float __ovld intel_convert_as_bfloat16_float(ushort u) {
-    return __builtin_spirv_OpConvertBF16ToFINTEL_i16(u);
-}
-
-#endif // cl_intel_bfloat16_conversions
-
-float __bf16_to_fp32(ushort V) {
-    return intel_convert_as_bfloat16_float(V);
-}
-
-ushort __fp32_to_bf16(float V) {
-    return intel_convert_bfloat16_as_ushort(V);
-}
-
-#endif // CCL_BF16_GPU_TRUNCATE
-
-#define DEFINE_BF16SUM_OP(T) \
-    T __bf16_sum_##T(T lhs, T rhs) { \
-        return __fp32_to_bf16(__bf16_to_fp32(lhs) + __bf16_to_fp32(rhs)); \
-    }
-
-#define DEFINE_BF16PROD_OP(T) \
-    T __bf16_prod_##T(T lhs, T rhs) { \
-        return __fp32_to_bf16(__bf16_to_fp32(lhs) * __bf16_to_fp32(rhs)); \
-    }
-
-#define DEFINE_BF16MIN_OP(T) \
-    T __bf16_min_##T(T lhs, T rhs) { \
-        return __fp32_to_bf16(min(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
-    }
-
-#define DEFINE_BF16MAX_OP(T) \
-    T __bf16_max_##T(T lhs, T rhs) { \
-        return __fp32_to_bf16(max(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
-    }
+#pragma once
+
+#ifdef CCL_BF16_GPU_TRUNCATE
+
+float __bf16_to_fp32(ushort V) {
+    uint temp = convert_uint(V) << 16;
+    return as_float(temp);
+}
+
+ushort __fp32_to_bf16(float V) {
+    ushort2 temp = as_ushort2(V);
+    return temp.s1;
+}
+
+#else // CCL_BF16_GPU_TRUNCATE
+
+#ifdef cl_intel_bfloat16_conversions
+#pragma OPENCL EXTENSION cl_intel_bfloat16_conversions : enable
+#else // cl_intel_bfloat16_conversions
+
+// declare SPIR-V intrinsics directly
+ushort __builtin_IB_ftobf_1(float);
+float __builtin_IB_bftof_1(ushort);
+
+// implement built-in functions using these intrinsics
+#define __ovld __attribute__((overloadable))
+ushort __ovld intel_convert_bfloat16_as_ushort(float f) {
+    return __builtin_IB_ftobf_1(f);
+}
+
+float __ovld intel_convert_as_bfloat16_float(ushort u) {
+    return __builtin_IB_bftof_1(u);
+}
+
+#endif // cl_intel_bfloat16_conversions
+
+float __bf16_to_fp32(ushort V) {
+    return intel_convert_as_bfloat16_float(V);
+}
+
+ushort __fp32_to_bf16(float V) {
+    return intel_convert_bfloat16_as_ushort(V);
+}
+
+#endif // CCL_BF16_GPU_TRUNCATE
+
+#define DEFINE_BF16SUM_OP(T) \
+    T __bf16_sum_##T(T lhs, T rhs) { \
+        return __fp32_to_bf16(__bf16_to_fp32(lhs) + __bf16_to_fp32(rhs)); \
+    }
+
+#define DEFINE_BF16PROD_OP(T) \
+    T __bf16_prod_##T(T lhs, T rhs) { \
+        return __fp32_to_bf16(__bf16_to_fp32(lhs) * __bf16_to_fp32(rhs)); \
+    }
+
+#define DEFINE_BF16MIN_OP(T) \
+    T __bf16_min_##T(T lhs, T rhs) { \
+        return __fp32_to_bf16(min(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
+    }
+
+#define DEFINE_BF16MAX_OP(T) \
+    T __bf16_max_##T(T lhs, T rhs) { \
+        return __fp32_to_bf16(max(__bf16_to_fp32(lhs), __bf16_to_fp32(rhs))); \
+    }
diff --git a/src/kernels/kernels.spv b/src/kernels/kernels.spv
diff --git a/third-party-programs.txt b/third-party-programs.txt
@@ -1,5 +1,5 @@
 Intel(R) oneAPI Collective Communications Library (oneCCL)
-2021.11.1 Third Party Programs File
+2021.11.2 Third Party Programs File
 
 This file is the "third-party-programs.txt" file specified in the associated
 Intel end user license agreement for the Intel software you are licensing.