forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Utils.h
125 lines (106 loc) · 3.15 KB
/
Utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#pragma once
#include <ATen/Config.h>
#include <ATen/core/List.h>
#include <ATen/core/Tensor.h>
#include <c10/util/ArrayRef.h>
#if !defined(__s390x__)
#include <cpuinfo.h>
#endif
#include <vector>
#if AT_MKLDNN_ENABLED()
#include <ideep/tensor.hpp>
#endif // AT_MKLDNN_ENABLED()
namespace at { namespace native {
std::tuple<Tensor, Tensor, Tensor> mkldnn_layer_norm_last_index_weight_bias_f32(
const Tensor& input,
IntArrayRef normalized_shape, const Tensor& weight, const Tensor& bias,
double eps, bool inplace = false);
std::vector<int64_t> pool_output_sizes(
IntArrayRef input_size,
IntArrayRef kernel_size,
IntArrayRef stride,
IntArrayRef padding_l,
IntArrayRef padding_r,
IntArrayRef dilation,
bool ceil_mode);
void check_mkldnn_binary_fusion_inputs(
const Tensor& input,
const Tensor& other,
const Tensor& weight,
const Tensor& bias);
static inline std::vector<int64_t> padding_r(
IntArrayRef padding, IntArrayRef output_padding)
{
// ConvTranpose padding adjustment
//
// PyTorch uses padding/output_padding:
// osize = (isize - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + output_padding + 1
//
// MKLDNN uses padding_l/padding_r:
// osize = (isize - 1) * stride - padding_l - padding_r + dilation * (kernel_size - 1) + 1
//
// So: padding_l = padding, padding_r = padding - output_padding
//
auto dim = padding.size();
std::vector<int64_t> pad_r(dim);
for (const auto d : c10::irange(dim)) {
pad_r[d] = padding[d] - output_padding[d];
}
return pad_r;
}
#if AT_MKLDNN_ENABLED()
using AttrFunction = std::function<ideep::attr_t(
torch::List<c10::optional<at::Scalar>>,
c10::optional<c10::string_view>)>;
const std::map<c10::string_view, AttrFunction>& fusion_unary_attr_map();
const std::map<c10::string_view, ideep::algorithm>& fusion_unary_alg_map();
const std::map<c10::string_view, ideep::algorithm>& fusion_binary_alg_map();
#endif // AT_MKLDNN_ENABLED()
};
#if defined(__aarch64__)
inline bool mkldnn_bf16_device_check_arm() {
return cpuinfo_initialize() && cpuinfo_has_arm_bf16();
}
#else
constexpr bool mkldnn_bf16_device_check_arm() {
return false;
}
#endif
#if AT_MKLDNN_ENABLED()
inline bool mkldnn_bf16_device_check() {
#if defined(__x86_64__)
// Use ideep to check bf16 on X64 as cpuinfo has no avx_ne_convert check.
return ideep::has_bf16_type_support();
#else
return mkldnn_bf16_device_check_arm();
#endif
}
inline bool mkldnn_fp16_device_check() {
#if defined(__x86_64__)
return ideep::has_fp16_type_support();
#else
return false;
#endif
}
#else
inline bool mkldnn_bf16_device_check() {
return false;
}
inline bool mkldnn_fp16_device_check() {
return false;
}
#endif
inline void mkldnn_check_low_precision(ScalarType input_t, std::string name) {
if (input_t == ScalarType::BFloat16) {
TORCH_CHECK(
mkldnn_bf16_device_check(),
name,
": bf16 path needs the cpu support avx_ne_convert or avx512bw, avx512vl and avx512dq");
} else if (input_t == ScalarType::Half) {
TORCH_CHECK(
mkldnn_fp16_device_check(),
name,
": fp16 path needs the cpu support avx_ne_convert or avx512_fp16");
}
}
}