From 64fc730f767de84835a5f1b4fc9b7831a3c2d15b Mon Sep 17 00:00:00 2001 From: Romain Poncet Date: Sun, 5 May 2024 20:07:12 +0100 Subject: [PATCH] Compatibility of schemas with nested types (#504) Hi, Here is a minimal example of the issue I've encountered. ```julia struct A x::Int end struct B a::A end v = [B(A(i)) for i =1:3] io = IOBuffer() Arrow.write(io, v; file=false) seekstart(io) Arrow.append(io, v) # throws ``` I don't know if this is really necessary, or if I'm not using this library properly, but this issue makes it difficult to append to arrow files with nested types. Since I've only added more cases where the call to `append` can succeed, I do not think that this creates retro-compatibility issues. Thanks for the review! --------- Co-authored-by: Ben Baumgold <4933671+baumgold@users.noreply.github.com> --- Project.toml | 2 +- src/append.jl | 26 ++++++++++++++++++++++++-- test/runtests.jl | 19 +++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index b283dc9f..8f0cdf7d 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ name = "Arrow" uuid = "69666777-d1a9-59fb-9406-91d4454c9d45" authors = ["quinnj "] -version = "2.7.1" +version = "2.7.2" [deps] ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" diff --git a/src/append.jl b/src/append.jl index 5a9c2596..64a638b8 100644 --- a/src/append.jl +++ b/src/append.jl @@ -282,9 +282,31 @@ function is_equivalent_schema(sch1::Tables.Schema, sch2::Tables.Schema) for (t1, t2) in zip(sch1.types, sch2.types) tt1 = Base.nonmissingtype(t1) tt2 = Base.nonmissingtype(t2) - if t1 == t2 || - (tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2)) + if t1 == t2 continue + elseif tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2) + continue + elseif isstructtype(tt1) && isstructtype(tt2) + is_equivalent_type_by_field(tt1, tt2) + else + return false + end + end + true +end + +function is_equivalent_type_by_field(T1, T2) + n1 = fieldcount(T1) + n2 = fieldcount(T2) + n1 != n2 && return false + + for i = 1:n1 + fieldname(T1, i) == fieldname(T2, i) || return false + + if fieldtype(T1, i) == fieldtype(T2, i) + continue + elseif isstructtype(T1) && isstructtype(T2) + is_equivalent_type_by_field(T1, T2) || continue else return false end diff --git a/test/runtests.jl b/test/runtests.jl index ed288b3d..6cf990a4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1042,5 +1042,24 @@ end @test tbl.f[2] === Foo493(4, 5) end end + + @testset "# 504" begin + struct Foo504 + x::Int + end + + struct Bar504 + a::Foo504 + end + + v = [Bar504(Foo504(i)) for i = 1:3] + io = IOBuffer() + Arrow.write(io, v; file=false) + seekstart(io) + Arrow.append(io, v) # testing the compatility between the schema of the arrow Table, and the "schema" of v (using the fallback mechanism of Tables.jl) + seekstart(io) + t = Arrow.Table(io) + @test Arrow.Tables.rowcount(t) == 6 + end end # @testset "misc" end