Skip to content

Commit

Permalink
extend the upload_file to upload directories
Browse files Browse the repository at this point in the history
  • Loading branch information
MAfarrag committed Dec 14, 2024
1 parent a67ea85 commit d8c6f71
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 25 deletions.
72 changes: 60 additions & 12 deletions src/unicloud/google_cloud/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import fnmatch
import os
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Union

from google.cloud import storage
from google.oauth2 import service_account
Expand Down Expand Up @@ -271,29 +271,77 @@ def file_exists(self, file_name: str) -> bool:
blob = self.bucket.get_blob(file_name)
return False if blob is None else True

def upload_file(self, local_path, bucket_path):
def upload_file(self, local_path: Union[str, Path], bucket_path: Union[str, Path]):
"""Upload a file to GCS.
Uploads a file or an entire directory to a Google Cloud Storage bucket.
If the `local_path` is a directory, this method recursively uploads all files
and subdirectories to the specified `bucket_path` in the GCS bucket.
Parameters
----------
local_path: [str]
The path to the file to upload.
bucket_path: [str]
The path in the bucket, this path has to have the bucket id as the first path of the path.
local_path : Union[str, Path]
The path to the local file or directory to upload.
- For a single file, provide the full path to the file (e.g., "path/to/file.txt").
- For a directory, provide the path to the directory (e.g., "path/to/directory/").
bucket_path : str
The destination path in the GCS bucket where the file(s) will be uploaded.
- For a single file, provide the full path (e.g., "bucket/folder/file.txt").
- For a directory upload, provide the base path (e.g., "bucket/folder/").
Raises
------
FileNotFoundError
If the `local_path` does not exist.
ValueError
If the `local_path` is neither a file nor a directory.
Examples
--------
>>> Bucket_ID = "test-bucket"
>>> PROJECT_ID = "py-project-id"
>>> gcs = GCS(PROJECT_ID) # doctest: +SKIP
>>> my_bucket = gcs.get_bucket(Bucket_ID) # doctest: +SKIP
>>> local_path = "path/to/local/my-file.txt"
>>> bucket_path = "my-file.txt"
>>> my_bucket.upload_file(file_path, bucket_path) # doctest: +SKIP
Upload a single file:
>>> my_bucket.upload_file("local/file.txt", "bucket/folder/file.txt") # doctest: +SKIP
Upload an entire directory:
>>> my_bucket.upload_file("local/directory/", "bucket/folder/") # doctest: +SKIP
Notes
-----
- For directory uploads, the relative structure of the local directory will be preserved in the GCS bucket.
- Ensure the `bucket_path` is valid and writable.
"""
blob = self.bucket.blob(bucket_path)
blob.upload_from_filename(local_path)
print(f"File {local_path} uploaded to {bucket_path}.")
local_path = Path(local_path)

if not local_path.exists():
raise FileNotFoundError(f"The local path {local_path} does not exist.")

if local_path.is_file():
# Upload a single file
blob = self.bucket.blob(bucket_path)
blob.upload_from_filename(str(local_path))
print(f"File {local_path} uploaded to {bucket_path}.")
elif local_path.is_dir():
# Upload all files in the directory
for file in local_path.rglob("*"):
if file.is_file():
# Preserve directory structure in the bucket
relative_path = file.relative_to(local_path)
bucket_file_path = (
f"{bucket_path.rstrip('/')}/{relative_path.as_posix()}"
)
blob = self.bucket.blob(bucket_file_path)
blob.upload_from_filename(str(file))
print(f"File {file} uploaded to {bucket_file_path}.")
else:
raise ValueError(
f"The local path {local_path} is neither a file nor a directory."
)

def download(self, file_name, local_path):
"""Download a file from GCS.
Expand Down
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from pathlib import Path
from typing import Dict

import pytest

Expand All @@ -11,3 +13,20 @@ def test_file() -> Path:
@pytest.fixture
def test_file_content() -> str:
return "This is a test file.\n"



@pytest.fixture
def upload_test_data() -> Dict[str, Path]:
local_dir = Path("tests/data/upload-dir")
bucket_path = "upload-dir"
expected_files = {
f"{bucket_path}/file1.txt",
f"{bucket_path}/subdir/file2.txt",
f"{bucket_path}/subdir/file3.log",
}
return {
"local_dir": local_dir,
"bucket_path": bucket_path,
"expected_files": expected_files,
}
93 changes: 80 additions & 13 deletions tests/google_cloud/test_gcs_bucket.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import shutil
import uuid
from pathlib import Path
from unittest.mock import MagicMock
from typing import Dict
from unittest.mock import MagicMock, patch

import pytest
from google.cloud import storage
Expand Down Expand Up @@ -33,21 +33,33 @@ def test_get_file(self):
assert isinstance(blob, storage.blob.Blob)

def test_file_exists(self):
# check file that exists
assert self.bucket.file_exists("211102_rabo_all_aois.geojson")
# check file that does not exist
assert not self.bucket.file_exists("non_existent_file.geojson")

def test_upload_file(self):
# Create a local file to upload
test_file_name = f"test-file-{uuid.uuid4()}.txt"
test_file_content = "This is a test file."
with open(test_file_name, "w") as f:
f.write(test_file_content)
def test_upload_file(self, test_file: Path):
bucket_path = f"test-upload-gcs-bucket-{test_file.name}"
self.bucket.upload_file(test_file, bucket_path)
assert any(blob.name == bucket_path for blob in self.bucket.bucket.list_blobs())
# delete the uploaded file
# self.bucket.delete_file(bucket_path)
# self.bucket.bucket.blob(bucket_path).delete()

self.bucket.upload_file(
test_file_name, f"test-upload-gcs-bucket/{test_file_name}"
)
def test_upload_directory_with_subdirectories_e2e(
self, upload_test_data: Dict[str, Path]
):

local_dir = upload_test_data["local_dir"]
bucket_path = upload_test_data["bucket_path"]

self.bucket.upload_file(local_dir, bucket_path)

uploaded_files = [blob.name for blob in self.bucket.bucket.list_blobs()]
expected_files = upload_test_data["expected_files"]
assert set(uploaded_files) & expected_files == expected_files

# Cleanup
for blob_name in list(expected_files):
self.bucket.bucket.blob(blob_name).delete()

@pytest.mark.e2e
def test_download_single_file(self):
Expand Down Expand Up @@ -162,3 +174,58 @@ def test_download_single_file_from_gcs(self):

mock_bucket.blob.assert_called_once_with(file_name)
mock_blob.download_to_filename.assert_called_once_with(str(local_path))


class TestUploadMock:

def test_upload_single_file(self):

mock_blob = MagicMock()
mock_bucket = MagicMock()
mock_bucket.blob.return_value = mock_blob

gcs_bucket = GCSBucket(mock_bucket)

local_file = Path("local/file.txt")
bucket_path = "bucket/folder/file.txt"

with patch("pathlib.Path.exists", return_value=True), patch(
"pathlib.Path.is_file", return_value=True
):
gcs_bucket.upload_file(local_file, bucket_path)

mock_bucket.blob.assert_called_once_with(bucket_path)
mock_blob.upload_from_filename.assert_called_once_with(str(local_file))

def test_upload_directory_with_subdirectories(self):

mock_bucket = MagicMock()
gcs_bucket = GCSBucket(mock_bucket)

# Mock directory and files
local_directory = Path("local/directory")
bucket_path = "bucket/folder"
files = [
local_directory / "file1.txt",
local_directory / "subdir" / "file2.txt",
local_directory / "subdir" / "file3.log",
]

# Mock rglob and existence checks
with (
patch("pathlib.Path.exists", return_value=True),
patch("pathlib.Path.is_dir", return_value=True),
patch("pathlib.Path.rglob", return_value=files),
):

# Mock individual file checks and uploads
with patch("pathlib.Path.is_file", side_effect=[False, True, True, True]):
gcs_bucket.upload_file(local_directory, bucket_path)

for file in files:
relative_path = file.relative_to(local_directory)
bucket_file_path = f"{bucket_path}/{relative_path.as_posix()}"
mock_bucket.blob.assert_any_call(bucket_file_path)
mock_bucket.blob(bucket_file_path).upload_from_filename.assert_any_call(
str(file)
)

0 comments on commit d8c6f71

Please sign in to comment.