From e99191f0059c5feb6d63d3e31501f56a694939cc Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Fri, 2 Feb 2024 15:00:45 +0100 Subject: [PATCH] fix: omitting metadata during direct upload (#157) * fix: omitting metadata during direct upload * tests: make error more verbose * tests: ignore test files * tests: drop old code --- .gitignore | 1 + deepset_cloud_sdk/_api/files.py | 3 +- .../service/test_integration_files_service.py | 40 ++++++++++++++++++ tests/test_data/msmarco.10/.DS_Store | Bin 32772 -> 0 bytes tests/unit/api/test_files.py | 9 ++-- 5 files changed, 45 insertions(+), 8 deletions(-) delete mode 100644 tests/test_data/msmarco.10/.DS_Store diff --git a/.gitignore b/.gitignore index 780b818e..fc31042a 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,4 @@ cython_debug/ temp .idea .python-version +.DS_Store diff --git a/deepset_cloud_sdk/_api/files.py b/deepset_cloud_sdk/_api/files.py index a139454d..1efd8529 100644 --- a/deepset_cloud_sdk/_api/files.py +++ b/deepset_cloud_sdk/_api/files.py @@ -194,8 +194,7 @@ async def direct_upload_path( response = await self._deepset_cloud_api.post( workspace_name, "files", - files={"file": (file_name, file)}, - json={"meta": meta}, + files={"file": (file_name, file), "meta": (None, json.dumps(meta))}, params={"write_mode": write_mode.value}, ) if response.status_code != codes.CREATED or response.json().get("file_id") is None: diff --git a/tests/integration/service/test_integration_files_service.py b/tests/integration/service/test_integration_files_service.py index 9445b0eb..53cbaa20 100644 --- a/tests/integration/service/test_integration_files_service.py +++ b/tests/integration/service/test_integration_files_service.py @@ -30,6 +30,26 @@ async def test_direct_upload_path(self, integration_config: CommonConfig, worksp assert result.failed_upload_count == 0 assert len(result.failed) == 0 + names_of_uploaded_files = [ + file.name + for file in Path("./tests/test_data/msmarco.10").glob("*.txt") + if not file.name.endswith(".meta.json") + ] + # Check the metadata was uploaded correctly + files: List[File] = [] + async for file_batch in file_service.list_all( + workspace_name=workspace_name, + batch_size=11, + timeout_s=120, + ): + files += file_batch + + for file in files: + if file.name in names_of_uploaded_files: + assert ( + file.meta.get("source") == "msmarco" + ), f"Metadata was not uploaded correctly for file '{file.name}': {file.meta}" + async def test_async_upload( self, integration_config: CommonConfig, workspace_name: str, monkeypatch: MonkeyPatch ) -> None: @@ -49,6 +69,26 @@ async def test_async_upload( assert result.failed_upload_count == 0 assert len(result.failed) == 0 + names_of_uploaded_files = [ + file.name + for file in Path("./tests/test_data/msmarco.10").glob("*.txt") + if not file.name.endswith(".meta.json") + ] + # Check the metadata was uploaded correctly + files: List[File] = [] + async for file_batch in file_service.list_all( + workspace_name=workspace_name, + batch_size=11, + timeout_s=120, + ): + files += file_batch + + for file in files: + if file.name in names_of_uploaded_files: + assert ( + file.meta.get("source") == "msmarco" + ), f"Metadata was not uploaded correctly for file '{file.name}': {file.meta}" + async def test_upload_texts(self, integration_config: CommonConfig, workspace_name: str) -> None: async with FilesService.factory(integration_config) as file_service: files = [ diff --git a/tests/test_data/msmarco.10/.DS_Store b/tests/test_data/msmarco.10/.DS_Store deleted file mode 100644 index 6dfa2354e963a082010c49cbdaaf4d38b59a8804..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32772 zcmeI5%Z^-E6^2iR00AQ_lMFGQBUx^@yY0@U(}^7iCKeKND+CY{3Qi<~l#yjA+yhQ{ z0Y8Fy13!Wv!H?iM6T|w}?&|Ezzs}lBP(-Sea@p1O*H?S3|E#^&KBu~uW!b#_yI<@r z%O}gSyks}-U;f^*yzrf6xoG#7?Y8~m``m`FZ`dOn`~Ne$w z&)z-oRg1t2_J+P?w-@bp&hmD>$=}CjijRN%!G}M8|APWgFZa@ zU%LqGBJlN%z@`lSrE_235bQ?2i@+`dK?JA|7wyLV$?5(5a_^y+1jPH~Zp3*MHDk(Fqv6X9>4eK&#J89|3kZVstG>+AM;N) zf7u*(HQ?ia#qU%(-~F%nWjAJJcSG_!RnB++*E=lit`Ny`3-)E)|7!f5YJ!jd75|#m zvpstk(tjKLulSuR=ez%VK07#<%}Y2y^4R1252~C`B>!=@iX{HOZsRTv*w^vL z{{vsBCioUf$*1~One28AEo+kBsd7I47ko001A_6BHNhWL6MT!LGuG`(KTps}in9<9Gin|8=SfKK|Eyj^<$eWbL(Mar~7UaHg8z|9ck6uK%*E3I3p(;G6#?|G>r`_rI1k z$?sG-AOA~!+5Zdrf3ha|ooa$_ksSDR(s+*wEc5UFe^h0S`A4WG_!h~K&pxo`Th?BS z_?0T>6G_P@lk7lAK3S9ePL=cB|BtIkmTSS-`-UIvzf%7tzf(={@xS6z{n!D}zI0U%yagZR`Idd&iY(f{*`4K07(ge=GhM z{6UrT-T#WeXCq+;OvazANq(ow`S|}2m#Rn>|8s>P6(|@#{+IktHNiLk2Yl8tEB+7o zjVk98Ny%sI*DV4v{$x$^JJkf=A}RTY#;z)9JO7fk*B?|_EB@zvL8F@BoBsu$W1nL= zh(G(^f+*Zj5h zpJh$PpB+;oaO&Ee@BUYOoD{Bq-T!L*ooa%Q|7*VP@z);R_W$DA?+HHsuld{jv!1$Y zGq61V-T%)k*OcS$im$>dRnB++k9^K!U5@LI`+wvQs+^Di|9HL{Ng026rSd5Vas9RX zKjK%a3BLJ1;ByQH^_Tv)xr)EUpLgA;az2p^`0VBc_0RpU$KR+X_~w7fX9lizGRFG< z#@#Aw+wpg*3BE;A@~Qn?g%bK-@;g<|Cz6uSN}ha5K3S9ePL=cB|M#j$Zv9{OfL7|i zby! zO^zRarJCU5f5oTvv6h$dr~XTRr^@;6f5oTw3fG_Re--~uHNnUKijVuk{V%e1e7nlp z*8iPqf{*_-pS|3b`XBHcRnB++kNg80J)KOHf2;q3Kd5p(k^Ix0Dw4(jTzi%Dzmngn zCiv!m$uC!51?vy|FZrD+=i`6LXC==HR33k1P4YX{1mFBG`K9^=^Pl_wah0{=fBH3Q z=9y}OZ;=f7;rS2h|8c~xR5_nW27F#8TL0pI&2LmW-~IojisaV+u&a@D^N(ds@;lW8 zAODYhj(jRW;6L~OIQ~JE^WFc7zh}`8=U?}~;&-YEKK@sHR)16zdHj*JH!oFLEB@!` zYg7|_{2%ezOVxbKTF9?dIp6(X^Vd3k{k<>y|KStdIiE=W`M8Q?@jtIb_M`n*%bFa2 zr<&kfBm+LXe=GGr;5VwAkN+jV%;UlQPu3*AQ%&$Kk^#S5%c%W_{~uOa+xnmW>%U(A zOFq39N0a8`f5q=q`S^*X;By{_^>5@q%bMU1s+{lse^f=X_@7mLGn0<>U+@Rj1RwuL z{P6l$>VL?uR5{=MuleLMh`;-P%s=CAR1wvEF0AX-y$jb z{5*@*>whJ`Q{{a3zZyT+(XgK?k3X^&@EcXm$Nzu1P(^a7mvk(7MuXn6e>S(E%uHNm$?27G$yp#LLlC-}Y-T#Vz;O-CN@BUZu?^F|f{IB?|K3L7j{Ja03RaqmSMQR5{=MulO9v`xXP~Kli^Hf2W$@?2meEhHZ z%}m;^fAN37Z&W$o{Xg={DlWeM*XqCE52~C`B>#H1ie&MBd8Kgwnff2{E7b(wBH8eD zkH7Zlw*M#c-Opjm`S`!#Z^zHkAI|?|O^&})P4F#}lF!sUmCiwVY@rh(O|C6=1kE^T||8w<0 zqnhC3f5m706|O(2|B~OSa=!b27oF`(N`}Pp#DdfZwPl`1pV1mwimuGIIQ6?Z$;FYsLSAYJ!jdHJ_cN zh;La7_>C&(yZ;rxoP7=WWKHrrRn8}pf4fvgviP5CA6Wga)c=6rs3!OpNy+c7Bxw6D z>c8Z7s+>N3{@bQ1mFZpL3#r}U# zf8GDjE7z3cZ&2lY_rKzEEepHJGXCy=#qU%(AOHXTd^M8o_{;9c%KrZi#qU%TeDlBH za~%QoH`xDh|Bw7ZmGg;Yz-RY2h`;+^^BdI!-~2E6tYxl6^@pt8yjx|h_>bR7cB?s$Y>c8Z7stG>+*Zgt? zh#tRXE#Nn*obUc0`Fx6not)tKCu@Q~s3!RMU-9|ub-4fM{(o9!t@xj-KO5BqAO9;p zJ0P4*mhpH0D}JZS`R@Ob&pML*&o%##{6UrT-T%+3NEZL|N}NYOJCeKgH~!cBMm535 z|BBD&xLD20_*4HSzfoc0$?sGXeEi??%g<5dEN*=Lz19DBZdY03=l@YJ zJJkdq|0_Pn5*0wkAOA~!r^@;6|B=s8O!lMv+pI14gDU6a{|9%fNb>Xl@O`OeEA?OS z2h{}MA}RP(KGxqs{lWi&Kd5p({vY_9N8;-1>Pmw4_$O-vU#KSd=6}hjlCS>!pLZTt zS;PPEJJkf=A}RQsdB=T$|J?s0e^BLoA}RRP9@Zd1|3TKo_y<+acmF@BB8mUY9E$rQ zzGY4FdC!$k49Pb5D- zt|E#5IgeIW@@KEq)<4$&!}x`2f^Ys8{Bj;GKL24^6Z}Dy^YOpnm$OK8Q*!<%Yl1(h zCioUf$!Gox`R@ORRo2Kq`*odaf^U%&eCimhfMESi{l7Kx2UX4|l7i3COdXE=XIT^c zL6!5}|BtFj7XO#)c-e!I`FH=1;~!KLeEdK1*?DC38~Bg)j~M@;%K7g9o=@jdX2GEU zasT(@7pe(9{vY{V`N7o;!Tu9jd-r^mHT+M%+Nmb^_`l~<$H{%X{<5qMe4)zu?tjH+ z?g`I-kTuEgR5_nW4!?heI>ui5>gOK?e^5>EEs}yyEX%c2`uMY~3I3qU`9xCisiUkW zgZe|(1bFq60a?^k2~{|_uk{7?V@ diff --git a/tests/unit/api/test_files.py b/tests/unit/api/test_files.py index 7f562598..5a3b8781 100644 --- a/tests/unit/api/test_files.py +++ b/tests/unit/api/test_files.py @@ -329,8 +329,7 @@ async def test_direct_upload_file(self, files_api: FilesAPI, mocked_deepset_clou mocked_deepset_cloud_api.post.assert_called_once_with( "test_workspace", "files", - files={"file": ("basic.txt", ANY)}, - json={"meta": {"key": "value"}}, + files={"file": ("basic.txt", ANY), "meta": (None, '{"key": "value"}')}, params={ "write_mode": "OVERWRITE", }, @@ -352,8 +351,7 @@ async def test_direct_upload_file_with_name(self, files_api: FilesAPI, mocked_de mocked_deepset_cloud_api.post.assert_called_once_with( "test_workspace", "files", - files={"file": ("my_file.txt", ANY)}, - json={"meta": {"key": "value"}}, + files={"file": ("my_file.txt", ANY), "meta": (None, '{"key": "value"}')}, params={"write_mode": "OVERWRITE"}, ) @@ -373,8 +371,7 @@ async def test_direct_upload_with_path_as_string(self, files_api: FilesAPI, mock mocked_deepset_cloud_api.post.assert_called_once_with( "test_workspace", "files", - files={"file": ("my_file.txt", ANY)}, - json={"meta": {"key": "value"}}, + files={"file": ("my_file.txt", ANY), "meta": (None, '{"key": "value"}')}, params={"write_mode": "FAIL"}, )