Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into pandas-metadata-fie…
Browse files Browse the repository at this point in the history
…ld-name
  • Loading branch information
jorisvandenbossche committed Dec 11, 2024
2 parents 900a5cd + e8a85dc commit fc92d71
Show file tree
Hide file tree
Showing 29 changed files with 628 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr/issue_check.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module.exports = async ({github, context}) => {
const pullRequestNumber = context.payload.number;
const title = context.payload.pull_request.title;
const issue = helpers.detectIssue(title)
if (issue){
if (issue && issue.kind === "github") {
await verifyGitHubIssue(github, context, pullRequestNumber, issue.id);
}
};
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr/link.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ module.exports = async ({github, context}) => {
const pullRequestNumber = context.payload.number;
const title = context.payload.pull_request.title;
const issue = helpers.detectIssue(title);
if (issue){
if (issue && issue.kind === "github") {
await commentGitHubURL(github, context, pullRequestNumber, issue.id);
}
};
6 changes: 6 additions & 0 deletions ci/docker/debian-12-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ RUN /arrow/ci/scripts/install_azurite.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation.
#
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
# - opentelemetry-cpp-dev is not packaged
ENV ARROW_ACERO=ON \
ARROW_AZURE=ON \
ARROW_BUILD_TESTS=ON \
Expand All @@ -134,6 +139,7 @@ ENV ARROW_ACERO=ON \
AWSSDK_SOURCE=BUNDLED \
Azure_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
opentelemetry_cpp_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PATH=/usr/lib/ccache/:$PATH \
PYTHON=python3 \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/fedora-39-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ ENV ARROW_ACERO=ON \
CC=gcc \
CXX=g++ \
google_cloud_cpp_storage_SOURCE=BUNDLED \
opentelemetry_cpp_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
PATH=/usr/lib/ccache/:$PATH \
Expand Down
8 changes: 6 additions & 2 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,19 @@ RUN /arrow/ci/scripts/install_ceph.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation
# Prioritize system packages and local installation.
#
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
# - Abseil is not packaged
# - libc-ares-dev does not install CMake config files
# - flatbuffer is not packaged
# - libgtest-dev only provide sources
# - libprotobuf-dev only provide sources
# - opentelemetry-cpp-dev is not packaged
#
# ARROW-17051: this build uses static Protobuf, so we must also use
# static Arrow to run Flight/Flight SQL tests
# static Arrow to run Flight/Flight SQL tests.
ENV absl_SOURCE=BUNDLED \
ARROW_ACERO=ON \
ARROW_AZURE=OFF \
Expand Down Expand Up @@ -179,6 +182,7 @@ ENV absl_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
gRPC_SOURCE=BUNDLED \
GTest_SOURCE=BUNDLED \
opentelemetry_cpp_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
Expand Down
5 changes: 4 additions & 1 deletion ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,13 @@ RUN /arrow/ci/scripts/install_azurite.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation
# Prioritize system packages and local installation.
#
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
# - Abseil is old
# - libc-ares-dev does not install CMake config files
# - opentelemetry-cpp-dev is not packaged
ENV absl_SOURCE=BUNDLED \
ARROW_ACERO=ON \
ARROW_AZURE=ON \
Expand Down Expand Up @@ -222,6 +224,7 @@ ENV absl_SOURCE=BUNDLED \
AWSSDK_SOURCE=BUNDLED \
Azure_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
opentelemetry_cpp_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
Expand Down
7 changes: 6 additions & 1 deletion ci/docker/ubuntu-24.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ RUN /arrow/ci/scripts/install_azurite.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation
# Prioritize system packages and local installation.
#
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
# - opentelemetry-cpp-dev is not packaged
ENV ARROW_ACERO=ON \
ARROW_AZURE=ON \
ARROW_BUILD_STATIC=ON \
Expand Down Expand Up @@ -205,6 +209,7 @@ ENV ARROW_ACERO=ON \
AWSSDK_SOURCE=BUNDLED \
Azure_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
opentelemetry_cpp_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ else
-DgRPC_SOURCE=${gRPC_SOURCE:-} \
-DGTest_SOURCE=${GTest_SOURCE:-} \
-Dlz4_SOURCE=${lz4_SOURCE:-} \
-Dopentelemetry-cpp_SOURCE=${opentelemetry_cpp_SOURCE:-} \
-DORC_SOURCE=${ORC_SOURCE:-} \
-DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
-DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"name": "features-minimal",
"hidden": true,
"cacheVariables": {
"ARROW_MIMALLOC": "OFF",
"ARROW_WITH_RE2": "OFF",
"ARROW_WITH_UTF8PROC": "OFF"
}
Expand All @@ -91,7 +92,8 @@
"ARROW_CSV": "ON",
"ARROW_DATASET": "ON",
"ARROW_FILESYSTEM": "ON",
"ARROW_JSON": "ON"
"ARROW_JSON": "ON",
"ARROW_MIMALLOC": "ON"
}
},
{
Expand Down
22 changes: 2 additions & 20 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -362,29 +362,11 @@ takes precedence over ccache if a storage backend is configured" ON)

define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)

set(ARROW_JEMALLOC_DESCRIPTION "Build the Arrow jemalloc-based allocator")
if(WIN32
OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD"
OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch|ARM|arm"
OR NOT ARROW_ENABLE_THREADING)
# jemalloc is not supported on Windows.
#
# jemalloc is the default malloc implementation on FreeBSD and can't
# be built with --disable-libdl on FreeBSD. Because lazy-lock feature
# is required on FreeBSD. Lazy-lock feature requires libdl.
#
# jemalloc may have a problem on ARM.
# See also: https://github.com/apache/arrow/issues/44342
#
# jemalloc requires thread.
define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} OFF)
else()
define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} ON)
endif()
define_option(ARROW_JEMALLOC "Build the Arrow jemalloc-based allocator" OFF)

define_option(ARROW_JSON "Build Arrow with JSON support (requires RapidJSON)" OFF)

define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" OFF)
define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" ON)

define_option(ARROW_PARQUET
"Build the Parquet libraries"
Expand Down
7 changes: 3 additions & 4 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -738,9 +738,8 @@ if(DEFINED ENV{ARROW_ORC_URL})
set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}")
else()
set_urls(ORC_SOURCE_URL
"https://www.apache.org/dyn/closer.cgi?action=download&filename=/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
"https://downloads.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
"https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
"https://www.apache.org/dyn/closer.lua/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz?action=download"
"https://dlcdn.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
)
endif()

Expand Down Expand Up @@ -817,6 +816,7 @@ if(DEFINED ENV{ARROW_THRIFT_URL})
set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}")
else()
set(THRIFT_SOURCE_URL
"https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download"
"https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
)
endif()
Expand Down Expand Up @@ -4971,7 +4971,6 @@ if(ARROW_WITH_OPENTELEMETRY)
# cURL is required whether we build from source or use an existing installation
# (OTel's cmake files do not call find_curl for you)
find_curl()
set(opentelemetry-cpp_SOURCE "AUTO")
resolve_dependency(opentelemetry-cpp)
set(ARROW_OPENTELEMETRY_LIBS
opentelemetry-cpp::trace
Expand Down
39 changes: 19 additions & 20 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ if(WIN32)
list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32")
endif()

if(NOT WIN32 AND NOT APPLE)
# Pass -lrt on Linux only
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
list(APPEND ARROW_SYSTEM_LINK_LIBS rt)
endif()

Expand Down Expand Up @@ -674,6 +673,7 @@ set(ARROW_TESTING_SRCS
testing/fixed_width_test_util.cc
testing/generator.cc
testing/gtest_util.cc
testing/math.cc
testing/process.cc
testing/random.cc
testing/util.cc)
Expand Down Expand Up @@ -726,9 +726,6 @@ set(ARROW_COMPUTE_SRCS
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
compute/key_hash_internal.cc
compute/key_map_internal.cc
compute/light_array_internal.cc
compute/ordering.cc
compute/registry.cc
compute/kernels/chunked_internal.cc
Expand All @@ -747,20 +744,7 @@ set(ARROW_COMPUTE_SRCS
compute/kernels/vector_selection.cc
compute/kernels/vector_selection_filter_internal.cc
compute/kernels/vector_selection_internal.cc
compute/kernels/vector_selection_take_internal.cc
compute/row/encode_internal.cc
compute/row/compare_internal.cc
compute/row/grouper.cc
compute/row/row_encoder_internal.cc
compute/row/row_internal.cc
compute/util.cc
compute/util_internal.cc)

append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
compute/kernels/vector_selection_take_internal.cc)

if(ARROW_COMPUTE)
# Include the remaining kernels
Expand Down Expand Up @@ -793,10 +777,25 @@ if(ARROW_COMPUTE)
compute/kernels/vector_replace.cc
compute/kernels/vector_run_end_encode.cc
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc)
compute/kernels/vector_sort.cc
compute/key_hash_internal.cc
compute/key_map_internal.cc
compute/light_array_internal.cc
compute/row/encode_internal.cc
compute/row/compare_internal.cc
compute/row/grouper.cc
compute/row/row_encoder_internal.cc
compute/row/row_internal.cc
compute/util.cc
compute/util_internal.cc)

append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc)
append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
endif()

arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS})
Expand Down
18 changes: 10 additions & 8 deletions cpp/src/arrow/compute/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,18 @@ add_arrow_test(internals_test
function_test.cc
exec_test.cc
kernel_test.cc
light_array_test.cc
registry_test.cc
key_hash_test.cc
row/compare_test.cc
row/grouper_test.cc
row/row_encoder_internal_test.cc
row/row_test.cc
util_internal_test.cc)
registry_test.cc)

add_arrow_compute_test(expression_test SOURCES expression_test.cc)
add_arrow_compute_test(row_test
SOURCES
key_hash_test.cc
light_array_test.cc
row/compare_test.cc
row/grouper_test.cc
row/row_encoder_internal_test.cc
row/row_test.cc
util_internal_test.cc)

add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")

Expand Down
8 changes: 7 additions & 1 deletion cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,20 +732,26 @@ void RegisterScalarOptions(FunctionRegistry* registry) {

SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
SCALAR_ARITHMETIC_UNARY(Acosh, "acosh", "acosh_checked")
SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
SCALAR_ARITHMETIC_UNARY(Atanh, "atanh", "atanh_checked")
SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked")
SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked")
SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
SCALAR_EAGER_UNARY(Asinh, "asinh")
SCALAR_EAGER_UNARY(Atan, "atan")
SCALAR_EAGER_UNARY(Cosh, "cosh")
SCALAR_EAGER_UNARY(Exp, "exp")
SCALAR_EAGER_UNARY(Expm1, "expm1")
SCALAR_EAGER_UNARY(Sign, "sign")
SCALAR_EAGER_UNARY(Sinh, "sinh")
SCALAR_EAGER_UNARY(Tanh, "tanh")

Result<Datum> Round(const Datum& arg, RoundOptions options, ExecContext* ctx) {
return CallFunction("round", {arg}, &options, ctx);
Expand Down
46 changes: 46 additions & 0 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,52 @@ Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
ARROW_EXPORT
Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);

/// \brief Compute the hyperbolic sine of the array values.
/// \param[in] arg The values to compute the hyperbolic sine for.
/// \param[in] ctx the function execution context, optional
/// \return the elementwise hyperbolic sine of the values
ARROW_EXPORT
Result<Datum> Sinh(const Datum& arg, ExecContext* ctx = NULLPTR);

/// \brief Compute the hyperbolic cosine of the array values.
/// \param[in] arg The values to compute the hyperbolic cosine for.
/// \param[in] ctx the function execution context, optional
/// \return the elementwise hyperbolic cosine of the values
ARROW_EXPORT
Result<Datum> Cosh(const Datum& arg, ExecContext* ctx = NULLPTR);

/// \brief Compute the hyperbolic tangent of the array values.
/// \param[in] arg The values to compute the hyperbolic tangent for.
/// \param[in] ctx the function execution context, optional
/// \return the elementwise hyperbolic tangent of the values
ARROW_EXPORT
Result<Datum> Tanh(const Datum& arg, ExecContext* ctx = NULLPTR);

/// \brief Compute the inverse hyperbolic sine of the array values.
/// \param[in] arg The values to compute the inverse hyperbolic sine for.
/// \param[in] ctx the function execution context, optional
/// \return the elementwise inverse hyperbolic sine of the values
ARROW_EXPORT
Result<Datum> Asinh(const Datum& arg, ExecContext* ctx = NULLPTR);

/// \brief Compute the inverse hyperbolic cosine of the array values.
/// \param[in] arg The values to compute the inverse hyperbolic cosine for.
/// \param[in] options arithmetic options (enable/disable overflow checking), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise inverse hyperbolic cosine of the values
ARROW_EXPORT
Result<Datum> Acosh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Compute the inverse hyperbolic tangent of the array values.
/// \param[in] arg The values to compute the inverse hyperbolic tangent for.
/// \param[in] options arithmetic options (enable/disable overflow checking), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise inverse hyperbolic tangent of the values
ARROW_EXPORT
Result<Datum> Atanh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Get the natural log of a value.
///
/// If argument is null the result will be null.
Expand Down
Loading

0 comments on commit fc92d71

Please sign in to comment.