Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Support for Archive Uploads to GitHub-Owned Storage with Multipart Upload Option #1261

Closed
wants to merge 16 commits into from
Closed
29 changes: 28 additions & 1 deletion src/Octoshift/Services/GithubApi.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
Expand Down Expand Up @@ -783,7 +784,6 @@ public virtual async Task<IEnumerable<Mannequin>> GetMannequinsByLogin(string or
});
}


public virtual async Task<string> GetUserId(string login)
{
var url = $"{_apiUrl}/graphql";
Expand Down Expand Up @@ -1071,6 +1071,33 @@ mutation abortRepositoryMigration(
}
}

public virtual async Task<string> UploadArchiveToGithubStorage(string orgDatabaseId, bool isMultipart, string archiveName, Stream archiveContent)
{
using var httpContent = new StreamContent(archiveContent);
string response;

if (isMultipart)
{
var url = $"https://uploads.github.com/organizations/{orgDatabaseId.EscapeDataString()}/gei/archive/blobs/uploads";

using var content = new MultipartFormDataContent
{
{ httpContent, "archive", archiveName }
};

response = await _client.PostAsync(url, content);
Copy link
Collaborator

@ArinGhazarian ArinGhazarian Sep 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple of observations:

  1. Even though this may work the PostAsync method will convert the data to JSON (basically base64 string) and will eventually create StringContent so I am not sure if this is the intended use here because we're now dealing with a Stream rather than string content. So We may want to refactor the SendAsync method to also support a MultiPartFormDataContent and StreamContent. It can simply check to see if the passed in body is either of those and not convert it to a string content.
  2. When dealing with a stream we shouldn't log the entire body as we do here instead as I suggested in option 1, we can check the body and if it's a multipart form data or a stream content we can just say BLOB or Binary data instead of dumping the entire JSON encoded body!

}
else
{
var url = $"https://uploads.github.com/organizations/{orgDatabaseId.EscapeDataString()}/gei/archive\\?name\\={archiveName}";
// DEV: var url = $"http://uploads.github.localhost/organizations/{org.EscapeDataString()}/gei/archive?name={archiveName}"
response = await _client.PostAsync(url, httpContent);
}

var data = JObject.Parse(response);
return "gei://archive/" + (string)data["archiveId"];
}

private static object GetMannequinsPayload(string orgId)
{
var query = "query($id: ID!, $first: Int, $after: String)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ public class MigrateRepoCommandArgsTests
private const string AWS_SECRET_ACCESS_KEY = "aws-secret-access-key";
private const string AWS_SESSION_TOKEN = "aws-session-token";
private const string AWS_REGION = "aws-region";
private const string AWS_BUCKET_NAME = "aws-bucket-name";
private const string AZURE_STORAGE_CONNECTION_STRING = "azure-storage-connection-string";

private const string AWS_BUCKET_NAME = "aws-bucket-name";
private const string BBS_HOST = "our-bbs-server.com";
private const string BBS_SERVER_URL = $"https://{BBS_HOST}";
private const string BBS_USERNAME = "bbs-username";
Expand Down Expand Up @@ -70,6 +71,44 @@ public void It_Throws_When_Aws_Bucket_Name_Not_Provided_But_Aws_Access_Key_Provi
.WithMessage("*AWS S3*--aws-bucket-name*");
}

[Fact]
public void It_Throws_When_Aws_Bucket_Name_Provided_With_UseGithubStorage_Option()
{
var args = new MigrateRepoCommandArgs
{
ArchivePath = ARCHIVE_PATH,
GithubOrg = GITHUB_ORG,
GithubRepo = GITHUB_REPO,
AzureStorageConnectionString = AZURE_STORAGE_CONNECTION_STRING,
AwsBucketName = AWS_BUCKET_NAME,
UseGithubStorage = true
};

args.Invoking(x => x.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--use-github-storage flag was provided with an AWS S3 Bucket name*");
}

[Fact]
public void It_Throws_When_Aws_Bucket_Name_Provided_With_AzureStorageConnectionString_Option()
{
var args = new MigrateRepoCommandArgs
{
ArchivePath = ARCHIVE_PATH,
GithubOrg = GITHUB_ORG,
GithubRepo = GITHUB_REPO,
AzureStorageConnectionString = AZURE_STORAGE_CONNECTION_STRING,
AwsBucketName = AWS_BUCKET_NAME,
UseGithubStorage = true
};

args.Invoking(x => x.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--use-github-storage flag was provided with a connection string for an Azure storage account*");
}

[Fact]
public void It_Throws_When_Aws_Bucket_Name_Not_Provided_But_Aws_Secret_Key_Provided()
{
Expand Down Expand Up @@ -186,25 +225,6 @@ public void It_Throws_When_Kerberos_Is_Set_And_Bbs_Username_Is_Provided()
.WithMessage("*--bbs-username*--kerberos*");
}

[Fact]
public void Errors_If_BbsServer_Url_Not_Provided_But_Bbs_Username_Is_Provided()
begonaguereca marked this conversation as resolved.
Show resolved Hide resolved
{
// Act
var args = new MigrateRepoCommandArgs
{
ArchivePath = ARCHIVE_PATH,
GithubOrg = GITHUB_ORG,
GithubRepo = GITHUB_REPO,
BbsUsername = BBS_USERNAME
};

// Assert
args.Invoking(x => x.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--bbs-username*--bbs-password*--bbs-server-url*");
}

[Fact]
public void Errors_If_BbsServer_Url_Not_Provided_But_Bbs_Password_Is_Provided()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,58 @@ public async Task Happy_Path_Full_Flow_Bbs_Credentials_Via_Environment()
));
}

[Fact]
public async Task Happy_Path_Uploads_To_Github_Storage()
{
// Arrange
_mockBbsApi.Setup(x => x.StartExport(BBS_PROJECT, BBS_REPO)).ReturnsAsync(BBS_EXPORT_ID);
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
_mockGithubApi.SetupSequence(x => x.UploadArchiveToGithubStorage(It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<FileStream>()).Result).Returns("gei://archive/");

var archiveFilePath = "./git_archive";
File.WriteAllText(archiveFilePath, "I am an archive");
using var gitContentStream = File.Create(archiveFilePath);
_mockFileSystemProvider
.SetupSequence(m => m.OpenRead(archiveFilePath))
.Returns(gitContentStream);

// Act
var args = new MigrateRepoCommandArgs
{
BbsServerUrl = BBS_SERVER_URL,
BbsUsername = BBS_USERNAME,
BbsPassword = BBS_PASSWORD,
BbsProject = BBS_PROJECT,
BbsRepo = BBS_REPO,
SshUser = SSH_USER,
SshPrivateKey = PRIVATE_KEY,
ArchivePath = archiveFilePath,
UseGithubStorage = true,
GithubOrg = GITHUB_ORG,
GithubRepo = GITHUB_REPO,
GithubPat = GITHUB_PAT,
QueueOnly = true,
};
await _handler.Handle(args);

File.Delete(archiveFilePath);

// Assert
_mockGithubApi.Verify(m => m.StartBbsMigration(
MIGRATION_SOURCE_ID,
BBS_REPO_URL,
GITHUB_ORG_ID,
GITHUB_REPO,
GITHUB_PAT,
"gei://archive/",
null
));
}

[Fact]
public async Task Happy_Path_Deletes_Downloaded_Archive()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void Should_Have_Options()
var command = new MigrateRepoCommand();
command.Should().NotBeNull();
command.Name.Should().Be("migrate-repo");
command.Options.Count.Should().Be(31);
command.Options.Count.Should().Be(32);

TestHelpers.VerifyCommandOption(command.Options, "bbs-server-url", true);
TestHelpers.VerifyCommandOption(command.Options, "bbs-project", true);
Expand Down Expand Up @@ -88,6 +88,7 @@ public void Should_Have_Options()
TestHelpers.VerifyCommandOption(command.Options, "keep-archive", false);
TestHelpers.VerifyCommandOption(command.Options, "no-ssl-verify", false);
TestHelpers.VerifyCommandOption(command.Options, "target-api-url", false);
TestHelpers.VerifyCommandOption(command.Options, "use-github-storage", false, true);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class MigrateRepoCommandArgsTests
private const string TARGET_REPO = "foo-target-repo";
private const string GITHUB_TARGET_PAT = "github-target-pat";
private const string AWS_BUCKET_NAME = "aws-bucket-name";

private const string GHES_API_URL = "foo-ghes-api.com";
[Fact]
public void Defaults_TargetRepo_To_SourceRepo()
{
Expand Down Expand Up @@ -67,6 +67,62 @@ public void Aws_Bucket_Name_Without_Ghes_Api_Url_Throws()
.WithMessage("*--aws-bucket-name*");
}

[Fact]
public void UseGithubStorage_Without_Ghes_Api_Url_Throws()
{
var args = new MigrateRepoCommandArgs
{
SourceRepo = SOURCE_REPO,
GithubSourceOrg = SOURCE_ORG,
GithubTargetOrg = TARGET_ORG,
TargetRepo = TARGET_REPO,
UseGithubStorage = true
};

FluentActions.Invoking(() => args.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--use-github-storage*");
}

[Fact]
public void UseGithubStorage_And_Aws_Bucket_Name_Throws()
{
var args = new MigrateRepoCommandArgs
{
SourceRepo = SOURCE_REPO,
GithubSourceOrg = SOURCE_ORG,
GithubTargetOrg = TARGET_ORG,
TargetRepo = TARGET_REPO,
AwsBucketName = AWS_BUCKET_NAME,
GhesApiUrl = GHES_API_URL,
UseGithubStorage = true
};

FluentActions.Invoking(() => args.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--use-github-storage flag was provided with an AWS S3 Bucket name*");
}

[Fact]
public void It_Throws_When_Aws_Bucket_Name_Provided_With_AzureStorageConnectionString_Option()
{
var args = new MigrateRepoCommandArgs
{
GithubSourceOrg = SOURCE_ORG,
GithubTargetOrg = TARGET_ORG,
TargetRepo = TARGET_REPO,
AwsBucketName = AWS_BUCKET_NAME,
GhesApiUrl = GHES_API_URL,
UseGithubStorage = true
};

args.Invoking(x => x.Validate(_mockOctoLogger.Object))
.Should()
.ThrowExactly<OctoshiftCliException>()
.WithMessage("*--use-github-storage flag was provided with a connection string for an Azure storage account*");
}
[Fact]
public void No_Ssl_Verify_Without_Ghes_Api_Url_Throws()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,107 @@ public async Task Happy_Path_GithubSource_Ghes()
_mockFileSystemProvider.Verify(x => x.DeleteIfExists(metadataArchiveFilePath), Times.Once);
}

[Fact]
public async Task Happy_Path_UseGithubStorage()
{
var githubOrgId = Guid.NewGuid().ToString();
var migrationSourceId = Guid.NewGuid().ToString();
var sourceGithubPat = Guid.NewGuid().ToString();
var targetGithubPat = Guid.NewGuid().ToString();
var githubRepoUrl = $"https://myghes/{SOURCE_ORG}/{SOURCE_REPO}";
var migrationId = Guid.NewGuid().ToString();
var gitArchiveId = 1;
var metadataArchiveId = 2;
var gitArchiveUrl = $"https://example.com/{gitArchiveId}";
var metadataArchiveUrl = $"https://example.com/{metadataArchiveId}";
var uploadedGitArchiveUrl = "gei://archive/1";
var uploadedMetadataArchiveUrl = "gei://archive/2";
var gitArchiveFilePath = "./gitdata_archive";
var metadataArchiveFilePath = "./metadata_archive";

File.WriteAllText(gitArchiveFilePath, "I am git archive");
File.WriteAllText(metadataArchiveFilePath, "I am metadata archive");

using var gitContentStream = File.Create(gitArchiveFilePath);
using var metaContentStream = File.Create(metadataArchiveFilePath);

_mockFileSystemProvider
.SetupSequence(m => m.OpenRead(gitArchiveFilePath))
.Returns(gitContentStream);
_mockFileSystemProvider
.SetupSequence(m => m.OpenRead(metadataArchiveFilePath))
.Returns(metaContentStream);

_mockTargetGithubApi.Setup(x => x.GetOrganizationId(TARGET_ORG).Result).Returns(githubOrgId);
_mockTargetGithubApi.Setup(x => x.CreateGhecMigrationSource(githubOrgId).Result).Returns(migrationSourceId);
_mockTargetGithubApi
.Setup(x => x.StartMigration(
migrationSourceId,
githubRepoUrl,
githubOrgId,
TARGET_REPO,
sourceGithubPat,
targetGithubPat,
uploadedGitArchiveUrl,
uploadedMetadataArchiveUrl,
false,
null,
false).Result)
.Returns(migrationId);
_mockTargetGithubApi.Setup(x => x.GetMigration(migrationId).Result).Returns((State: RepositoryMigrationStatus.Succeeded, TARGET_REPO, 0, null, null));
_mockTargetGithubApi.Setup(x => x.DoesOrgExist(TARGET_ORG).Result).Returns(true);

_mockSourceGithubApi.Setup(x => x.StartGitArchiveGeneration(SOURCE_ORG, SOURCE_REPO).Result).Returns(gitArchiveId);
_mockSourceGithubApi.Setup(x => x.StartMetadataArchiveGeneration(SOURCE_ORG, SOURCE_REPO, false, false).Result).Returns(metadataArchiveId);
_mockSourceGithubApi.Setup(x => x.GetArchiveMigrationStatus(SOURCE_ORG, gitArchiveId).Result).Returns(ArchiveMigrationStatus.Exported);
_mockSourceGithubApi.Setup(x => x.GetArchiveMigrationStatus(SOURCE_ORG, metadataArchiveId).Result).Returns(ArchiveMigrationStatus.Exported);
_mockSourceGithubApi.Setup(x => x.GetArchiveMigrationUrl(SOURCE_ORG, gitArchiveId).Result).Returns(gitArchiveUrl);
_mockSourceGithubApi.Setup(x => x.GetArchiveMigrationUrl(SOURCE_ORG, metadataArchiveId).Result).Returns(metadataArchiveUrl);

_mockTargetGithubApi.SetupSequence(x => x.UploadArchiveToGithubStorage(It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<FileStream>()).Result).Returns(uploadedGitArchiveUrl).Returns(uploadedMetadataArchiveUrl);

_mockFileSystemProvider
.SetupSequence(m => m.GetTempFileName())
.Returns(gitArchiveFilePath)
.Returns(metadataArchiveFilePath);

_mockFileSystemProvider
.SetupSequence(m => m.OpenRead(gitArchiveFilePath))
.Returns(gitContentStream);

_mockFileSystemProvider
.SetupSequence(m => m.OpenRead(metadataArchiveFilePath))
.Returns(metaContentStream);


_mockEnvironmentVariableProvider.Setup(m => m.SourceGithubPersonalAccessToken(It.IsAny<bool>())).Returns(sourceGithubPat);
_mockEnvironmentVariableProvider.Setup(m => m.TargetGithubPersonalAccessToken(It.IsAny<bool>())).Returns(targetGithubPat);

_mockGhesVersionChecker.Setup(m => m.AreBlobCredentialsRequired(GHES_API_URL)).ReturnsAsync(true);

var args = new MigrateRepoCommandArgs
{
GithubSourceOrg = SOURCE_ORG,
SourceRepo = SOURCE_REPO,
GithubTargetOrg = TARGET_ORG,
TargetRepo = TARGET_REPO,
TargetApiUrl = TARGET_API_URL,
GhesApiUrl = GHES_API_URL,
UseGithubStorage = true,
};
await _handler.Handle(args);

_mockTargetGithubApi.Verify(x => x.GetMigration(migrationId));
_mockTargetGithubApi.Verify(x => x.UploadArchiveToGithubStorage(It.IsAny<string>(), false, It.IsAny<string>(), gitContentStream));
_mockTargetGithubApi.Verify(x => x.UploadArchiveToGithubStorage(It.IsAny<string>(), false, It.IsAny<string>(), metaContentStream));
_mockFileSystemProvider.Verify(x => x.DeleteIfExists(gitArchiveFilePath), Times.Once);
_mockFileSystemProvider.Verify(x => x.DeleteIfExists(metadataArchiveFilePath), Times.Once);

File.Delete(gitArchiveFilePath);
File.Delete(metadataArchiveFilePath);
gitContentStream.Close();
}

[Fact]
public async Task Happy_Path_GithubSource_Ghes_Repo_Renamed()
{
Expand Down
Loading