Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FOR DRAFT-PR ONLY] Enable -ffp-mode=fast for armclang #2755

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion tensorflow/lite/kernels/internal/quantization_util.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@ limitations under the License.

namespace tflite {

#ifdef TFLITE_EMULATE_FLOAT
namespace {
// These constants are used to manipulate the binary representation of doubles.
// Double-precision binary64 floating point format is:
Expand All @@ -49,6 +50,7 @@ constexpr uint32_t kFractionShift = 22;
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
} // namespace
#endif

void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift) {
Expand Down Expand Up @@ -122,6 +124,7 @@ void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
*left_shift = shift;
}

#ifdef TFLITE_EMULATE_FLOAT
int64_t IntegerFrExp(double input, int* shift) {
// Make sure our assumptions about the double layout hold.
TFLITE_CHECK_EQ(8, sizeof(double));
Expand Down Expand Up @@ -278,6 +281,7 @@ int IntegerDoubleCompare(double a, double b) {
return 0;
}
}
#endif

void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
Expand Down
10 changes: 2 additions & 8 deletions tensorflow/lite/micro/kernels/activations_common.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,6 @@ template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);

Expand All @@ -69,12 +68,7 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
static_cast<int32_t>(std::numeric_limits<T>::max());
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
Expand Down
4 changes: 3 additions & 1 deletion tensorflow/lite/micro/kernels/quantization_util_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -232,6 +232,7 @@ TF_LITE_MICRO_TEST(
TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 255);
}

#ifdef TFLITE_EMULATE_FLOAT
TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerFrExp) {
int shift;
int64_t result = tflite::IntegerFrExp(0.0, &shift);
Expand Down Expand Up @@ -412,6 +413,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_CalculateInputRadius) {
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(3, 28), 7);
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(4, 2), 503316480);
}
#endif

TF_LITE_MICRO_TEST(QuantizationUtilTest_QuantizeMultiplierArray) {
const double weights[] = {-4, -2, -1, -0.5, -0.25, -0.125, 0,
Expand Down
1 change: 0 additions & 1 deletion tensorflow/lite/micro/tools/make/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,6 @@ $(BINDIR)%.test_target: $(BINDIR)%_test
# These are microcontroller-specific rules for converting the ELF output
# of the linker into a binary image that can be loaded directly.
ifeq ($(TOOLCHAIN), armclang)
CXXFLAGS += -ffp-mode=full
FROMELF := ${TARGET_TOOLCHAIN_ROOT}$(TARGET_TOOLCHAIN_PREFIX)fromelf
$(BINDIR)%.bin: $(BINDIR)%
@mkdir -p $(dir $@)
Expand Down