From eafce3d1bbd5300d363cc61061321e8882ff8c46 Mon Sep 17 00:00:00 2001 From: edvaone Date: Thu, 25 Jul 2024 15:12:55 +0300 Subject: [PATCH] Convert DECIMAL type (#53) Author: edvardas --- .../parquet/readsupport/MapConverter.java | 28 +++++++++++++++++- .../search/SearchServiceTest.groovy | 24 +++++++++++++++ .../_delta_log/00000000000000000000.json | 4 +++ ...4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet | Bin 0 -> 2873 bytes 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/test_data_types/_delta_log/00000000000000000000.json create mode 100644 src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet diff --git a/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java b/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java index b95a023..2ee2bb9 100644 --- a/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java +++ b/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java @@ -1,5 +1,7 @@ package com.exacaster.deltafetch.search.parquet.readsupport; +import java.math.BigDecimal; +import java.math.BigInteger; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; @@ -51,7 +53,7 @@ public Optional visit( @Override public Optional visit( LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return of(new SimplePrimitiveConverter(field.getName())); + return of(new DecimalConverter(field.getName(), decimalLogicalType.getScale())); } }).orElse(new SimplePrimitiveConverter(field.getName())); } @@ -149,4 +151,28 @@ public void addBinary(Binary value) { array.add(value.toStringUsingUTF8()); } } + + private class DecimalConverter extends PrimitiveConverter { + private final String name; + private final int scale; + + public DecimalConverter(String name, int scale) { + this.name = name; + this.scale = scale; + } + + @Override + public void addBinary(Binary value) { + BigInteger unscaledValue = new BigInteger(value.getBytes()); + BigDecimal decimalValue = new BigDecimal(unscaledValue, scale); + record.put(name, decimalValue); + } + + @Override + public void addLong(long value) { + BigInteger unscaledValue = BigInteger.valueOf(value); + BigDecimal decimalValue = new BigDecimal(unscaledValue, scale); + record.put(name, decimalValue); + } + } } \ No newline at end of file diff --git a/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy b/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy index 7897e12..67abd82 100644 --- a/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy +++ b/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy @@ -30,4 +30,28 @@ class SearchServiceTest extends Specification { then: "returns empty" result.isEmpty() } + def worksWithDifferentTypes() { + given: + def cache = Mock(SyncCache) { + get(*_) >> Optional.empty() + } + def conf = new Configuration() + def statsReader = new DeltaMetaReader(conf, cache) + def svc = new SearchService(statsReader, new Configuration()) + def path = getClass().getResource("/test_data_types").toString() + + when: + def result = svc.find(path, [new ColumnValueFilter("user_id", "555")], true, 1).findFirst() + + then: + result.isPresent() + result.get().getValue().get("trait_string") == "ACTIVE" + result.get().getValue().get("trait_decimal18_3") == 5.555 + result.get().getValue().get("trait_decimal21_3") == 55.123 + result.get().getValue().get("trait_double") == 12345.678 + result.get().getValue().get("trait_float") == 12345.678F + result.get().getValue().get("trait_int") == 12345 + result.get().getValue().get("trait_bigint") == 123456789012345 + result.get().getValue().get("trait_boolean") == true + } } diff --git a/src/test/resources/test_data_types/_delta_log/00000000000000000000.json b/src/test/resources/test_data_types/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..beffb30 --- /dev/null +++ b/src/test/resources/test_data_types/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1721901554498,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2873"},"engineInfo":"Apache-Spark/3.4.2 Delta-Lake/2.4.0","txnId":"b5285209-e1ef-4844-8c25-9353a33e2b75"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"17da5576-7e97-4e09-a01d-74f679b262e8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"user_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_decimal18_3\",\"type\":\"decimal(18,3)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_decimal21_3\",\"type\":\"decimal(21,3)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_bigint\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1721901552277}} +{"add":{"path":"part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet","partitionValues":{},"size":2873,"modificationTime":1721901554000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"user_id\":\"111\",\"trait_string\":\"ACTIVE\",\"trait_decimal18_3\":1.111,\"trait_decimal21_3\":11.123,\"trait_double\":3456.78,\"trait_float\":12345.678,\"trait_int\":12345,\"trait_bigint\":123456789012345},\"maxValues\":{\"user_id\":\"555\",\"trait_string\":\"DEACTIVATED\",\"trait_decimal18_3\":5.555,\"trait_decimal21_3\":55.123,\"trait_double\":12345.678,\"trait_float\":98765.43,\"trait_int\":98765,\"trait_bigint\":987654321098765},\"nullCount\":{\"user_id\":0,\"trait_string\":0,\"trait_decimal18_3\":0,\"trait_decimal21_3\":0,\"trait_double\":0,\"trait_float\":0,\"trait_int\":0,\"trait_bigint\":0,\"trait_boolean\":0}}"}} diff --git a/src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet b/src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..54d4725a4aef23f8c70a3db1450d5adba24956ea GIT binary patch literal 2873 zcmbtWduUr#7(X|;NlwzFTgT4jUbtWg>!pu)=}ns2l_06MIz^<+F@;s?O>$eq(xl5{ zGP+79QgjRjCo0&%gyM>pp;IhK(XoN-A5r{61@Qrn4$7Dy+vp$r!|$B?tgX^&67D(Y zd;Grde6O34!Gjt>A4{A1|L53*x}(8*bNP{{otYDA%NdQ1ajlYJIfrk z#+%l{anl;-rw)vQKX>2$jSHNkzP|^d zuTODkTk7-@zkK=g*`RZY6Timh4oo2w8(Hredda^X7(`kjTkAhvp7VPm8L}ctq&A-< z`k^@BNejUz36OhA44S!@#1?PrT|R~^vX3NUCvPr0Y^9^SE{XOYQsW9>I}@O9C&$C) zX{05QXTf&0cp5<r;B|=vck*yfLYhJ%~ixePXCtH)><0Vmo?5 zrh=3fZPfHqW31k&l`0c9g9k>N$tW6y(xhJ2`p44k5$Lsj$x-^G>NK~1mrH4uTo;q; zqH+<}OtIR`mkmTPkXlRwncOmzx2ZV9TPnI+(%Z z!iylFTnfl$Uy7_r!3^tC^Rxy0x9mbKL`BM4P=?+xQ|y+S1HKkiJ_-Uj?QW5^It6!6 zTY#@p@b4J-I=m}rf<5(P)~zR6>mlW02%=>V3DKDxG+LXfTUfkDEzU-1v{s<%5m=;E zzp*Wt$*hnAkt~cJkwbvbR&T1`Xa%S*q9lQYH7rCFEZoL>VJ;;_v09r@^(h^Ohsx%N zdfcehOVx@PNUObS3W=SYBGM%XClaysmCg8cUFp%I}X_m`642{G9XdB6d4V)O5o|#EH0OnjU z$JqZx@yt9I$|81X{mFFaPW`f!X-jTKOlkiGkxfz$n0?++yx8z{LG0s>Fkm*j2yiea zj2aMjgkiRd3sbJ{JonZ+!3A*%|LV)LGe~^gD&6}hYjLaAR`wE48TI