Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apache arrow library update #5962

Open
Altina-oz opened this issue Dec 12, 2024 · 1 comment
Open

apache arrow library update #5962

Altina-oz opened this issue Dec 12, 2024 · 1 comment

Comments

@Altina-oz
Copy link
Contributor

Altina-oz commented Dec 12, 2024

你在什么场景下需要该功能?

Update apache arrow library xmake.lua script. Since some Cmake options of Arrow lib has gone away with this library updating, the xmake.lua present cannot be used to install later version of Arrow library.

描述可能的解决方案

Using this xmake.lua can be built successfully on my local computer (Ubuntu24.04):

package("myarrow")
    set_homepage("https://arrow.apache.org/")
    set_description("Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing")
    set_license("Apache-2.0")

    add_urls("https://github.com/apache/arrow/archive/refs/tags/apache-arrow-$(version).tar.gz",
             "https://github.com/apache/arrow.git")

    add_versions('18.1.0','026ecabd74f7b075f6c74e5448132ba40f35688a29d07616bcc1bd976676706c')
    
    --add_configs("build_utilities",      {description = "CBuild Arrow commandline utilities", default = false, type = "boolean"})
    add_configs("compute",      {description = "Build all computational kernel functions", default = false, type = "boolean"})
    add_configs("csv",      {description = "CSV reader module", default = false, type = "boolean"})
    --add_configs("cuda",     {description = " CUDA integration for GPU development. Depends on NVIDIA CUDA toolkit. The CUDA toolchain used to build the library can be customized by using the $CUDA_HOME environment variable.", default = false, type = "boolean"})
    add_configs("dataset",      {description = "Dataset API, implies the Filesystem API", default = false, type = "boolean"})
    add_configs("filesystem",      {description = "Filesystem API for accessing local and remote filesystems", default = false, type = "boolean"})
    add_configs("flight",      {description = "Arrow Flight RPC system, which depends at least on gRPC", default = false, type = "boolean"})
    add_configs("flight_sql",      {description = "Arrow Flight SQL", default = false, type = "boolean"})
    add_configs("gandiva",      {description = "Gandiva expression compiler, depends on LLVM, Protocol Buffers, and re2", default = false, type = "boolean"})
    --add_configs("gandiva_java",      {description = "Gandiva JNI bindings for Java", default = false, type = "boolean"})
    --add_configs("gcs",      {description = "Build Arrow with GCS support (requires the GCloud SDK for C++)", default = false, type = "boolean"})
    add_configs("hdfs",      {description = "Arrow integration with libhdfs for accessing the Hadoop Filesystem", default = false, type = "boolean"})
    add_configs("json",     {description = "JSON reader module", default = true, type = "boolean"})    
    add_configs("orc",      {description = "Arrow integration with Apache ORC", default = false, type = "boolean"})
    add_configs("parquet",  {description = "Apache Parquet libraries and Arrow integration", default = false, type = "boolean"})
    add_configs("parquet_require_encryption",  {description = "Parquet Modular Encryption", default = false, type = "boolean"})
    --add_configs("s3",  {description = "Support for Amazon S3-compatible filesystems", default = false, type = "boolean"})
    add_configs("substrait",  {description = "Build with support for Substrait", default = false, type = "boolean"})
    --add_configs("tensorflow",  {description = "Apache Parquet libraries and Arrow integration", default = false, type = "boolean"})
    -- Arrow uses vendored mimalloc and jemalloc. Do not add these two libraries to configdeps.???
    add_configs("mimalloc", {description = "Build the Arrow mimalloc-based allocator", default = false, type = "boolean"})
    add_configs("jemalloc", {description = "Build the Arrow jemalloc-based allocator", default = true, type = "boolean"})
    -- Useful extensions
    add_configs("with_re2", {description = "Build with support for regular expressions using the re2 library, on by default and used when ARROW_COMPUTE or ARROW_GANDIVA is ON", default = true, type = "boolean"})
    add_configs("with_utf8proc", {description = "Build with support for Unicode properties using the utf8proc library, on by default and used when ARROW_COMPUTE or ARROW_GANDIVA is ON", default = true, type = "boolean"})
    add_configs("with_thrift", {description = "Build support for thrift", default = false, type = "boolean"})
    add_configs("with_grpc", {description = "Build support for gRPC", default = false, type = "boolean"})
    add_configs("with_rapidjson", {description = "Build support for rapidjson", default = false, type = "boolean"})
    -- Compression options available in Arrow:
    add_configs("with_brotli", {description = "Build support for Brotli compression", default = false, type = "boolean"})
    add_configs("with_bz2", {description = "Build support for BZ2 compression", default = false, type = "boolean"})
    add_configs("with_lz4", {description = "Build support for lz4 compression", default = false, type = "boolean"})
    add_configs("with_snappy", {description = "Build support for Snappy compression", default = false, type = "boolean"})
    add_configs("with_zlib", {description = "Build support for zlib (gzip) compression", default = false, type = "boolean"})
    add_configs("with_zstd", {description = "Build support for ZSTD compression", default = false, type = "boolean"})
    -- Arrow IPC extension
    add_configs("ipc", {description = "build the IPC extensions", default = true, type = "boolean"})
    -- If true, arrow will look for shared libraries for third party dependency.
    
    --add_configs("shared_dep", {description = "Use shared library for dependency", default = false, type = "boolean"})

    add_deps("cmake", "boost","xsimd")

    if is_plat("macosx") then
        add_syslinks("pthread", "execinfo")
    elseif is_plat("linux") then
        add_syslinks("pthread")
    end



    on_load(function (package)
        if package:config("parquet") then
            package:add("links", "parquet")
        end
        if package:config("dataset") then
            package:add("links", "arrow_dataset")
        end
        package:add("links", "arrow", "arrow_bundled_dependencies")
        if package:config("parquet_require_encryption") then
            package:add("deps","rapidjson")
        end

        if package:config("parquet") then
            package:add("deps","thrift")
            package:add("deps","zlib")
        end

        if package:config("compute") then
            package:add("deps","re2")
            package:add("deps","utf8proc")
        end

        if package:config("gandiva") then
            package:add("deps","re2")
            package:add("deps","utf8proc")
        end

        if package:config("with_thrift") then
            package:add("deps","thrift")
            package:add("deps","zlib")
        end

        if package:config("flight") then
            package:add("deps","grpc")
            package:add("deps","zlib")
            package:add("deps","re2")
            package:add("deps","protobuf-cpp")
        end

        if package:config("orc") then
            package:add("deps","protobuf-cpp")
        end

        if package:config("substrait") then
            package:add("deps","protobuf-cpp")
        end

        if package:config("with_grpc") then
            package:add("deps","grpc")
            package:add("deps","zlib")
            package:add("deps","re2")
        end

        if package:config("json") then
            package:add("deps","rapidjson")
        end

        if package:config("with_protobuf") then
            package:add("deps","protobuf-cpp")
        end

        if package:config("with_re2") then
            package:add("deps","re2")
        end

        if package:config("with_utf8proc") then
            package:add("deps","utf8proc")
        end

        if package:config("with_brotli") then
            package:add("deps","brotli")
        end

        if package:config("with_bz2") then
            package:add("deps","bz2")
        end

        if package:config("with_lz4") then
            package:add("deps","lz4")
        end

        if package:config("snappy") then
            package:add("deps","snappy")
        end

        if package:config("with_zlib") then
            package:add("deps","zlib")
        end

        if package:config("with_zstd") then
            package:add("deps","zstd")
        end
        
    end)

    on_install("linux", "macosx", "bsd", function (package) 


        local configs = {
            "-DARROW_DEPENDENCY_SOURCE=SYSTEM",
            "-DARROW_THREADING=ON"
        }

        table.insert(configs, "-DCMAKE_BUILD_TYPE=" .. (package:debug() and "Debug" or "Release"))
        local shared = package:config("shared")
        table.insert(configs, "-DARROW_BUILD_STATIC=" .. (shared and "OFF" or "ON"))
        table.insert(configs, "-DARROW_BUILD_SHARED=" .. (shared and "ON" or "OFF"))
        table.insert(configs, "-DARROW_DEPENDENCY_USE_SHARED=" .. (package:config("shared_dep") and "ON" or "OFF"))

        for config, enabled in pairs(package:configs()) do
            if not package:extraconf("configs", config, "builtin") then
                table.insert(configs, "-DARROW_" .. string.upper(config)  .. "=" .. (enabled and "ON" or "OFF"))
            end
        end

        os.cd("cpp")
        import("package.tools.cmake").install(package, configs)
    end)

    on_test(function (package)
        assert(package:check_cxxsnippets({test = [[
            void test() {
                arrow::MemoryPool* pool = arrow::default_memory_pool();
                arrow::Int64Builder id_builder(pool);
                (void)id_builder;
            }
        ]]}, {configs = {languages = "c++17"}, includes = "arrow/api.h"}))
    end)

But this only supports a subset of arrow features on Linux only, and I have no windows environment right now. So maybe it is not good to just upload and pr this script.

cf.https://arrow.apache.org/docs/developers/cpp/building.html,
https://arrow.apache.org/docs/developers/cpp/windows.html

描述你认为的候选方案

No response

其他信息

No response

@Altina-oz
Copy link
Contributor Author

#5981

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant