From 91c5d6bc09bfcdf3be835e91ed99738298893927 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Mon, 16 Dec 2024 16:53:42 +0100 Subject: [PATCH 1/2] Move all top level ingestion functions into Ingestion namespace, rename files --- .../{Ingest.swift => Ingestion.swift} | 144 +++++++++--------- Sources/App/configure.swift | 2 +- Tests/AppTests/ErrorReportingTests.swift | 4 +- ...gestorTests.swift => IngestionTests.swift} | 22 +-- Tests/AppTests/MastodonTests.swift | 6 +- Tests/AppTests/MetricsTests.swift | 2 +- Tests/AppTests/PackageTests.swift | 6 +- Tests/AppTests/PipelineTests.swift | 8 +- 8 files changed, 95 insertions(+), 99 deletions(-) rename Sources/App/Commands/{Ingest.swift => Ingestion.swift} (81%) rename Tests/AppTests/{IngestorTests.swift => IngestionTests.swift} (96%) diff --git a/Sources/App/Commands/Ingest.swift b/Sources/App/Commands/Ingestion.swift similarity index 81% rename from Sources/App/Commands/Ingest.swift rename to Sources/App/Commands/Ingestion.swift index c19501bef..8b84e6e5c 100644 --- a/Sources/App/Commands/Ingest.swift +++ b/Sources/App/Commands/Ingestion.swift @@ -19,6 +19,7 @@ import Vapor enum Ingestion { + struct Error: ProcessingError { var packageId: Package.Id var underlyingError: UnderlyingError @@ -73,100 +74,95 @@ enum Ingestion { } } } -} -struct IngestCommand: AsyncCommand { - typealias Signature = SPICommand.Signature + struct Command: AsyncCommand { + typealias Signature = SPICommand.Signature - var help: String { "Run package ingestion (fetching repository metadata)" } + var help: String { "Run package ingestion (fetching repository metadata)" } - func run(using context: CommandContext, signature: SPICommand.Signature) async { - let client = context.application.client - let db = context.application.db - Current.setLogger(Logger(component: "ingest")) + func run(using context: CommandContext, signature: SPICommand.Signature) async { + let client = context.application.client + let db = context.application.db + Current.setLogger(Logger(component: "ingest")) - Self.resetMetrics() + Self.resetMetrics() - do { - try await ingest(client: client, database: db, mode: .init(signature: signature)) - } catch { - Current.logger().error("\(error.localizedDescription)") - } + do { + try await ingest(client: client, database: db, mode: .init(signature: signature)) + } catch { + Current.logger().error("\(error.localizedDescription)") + } - do { - try await AppMetrics.push(client: client, - jobName: "ingest") - } catch { - Current.logger().warning("\(error.localizedDescription)") + do { + try await AppMetrics.push(client: client, + jobName: "ingest") + } catch { + Current.logger().warning("\(error.localizedDescription)") + } } - } -} - -extension IngestCommand { - static func resetMetrics() { - AppMetrics.ingestMetadataSuccessCount?.set(0) - AppMetrics.ingestMetadataFailureCount?.set(0) + static func resetMetrics() { + AppMetrics.ingestMetadataSuccessCount?.set(0) + AppMetrics.ingestMetadataFailureCount?.set(0) + } } -} -/// Ingest via a given mode: either one `Package` identified by its `Id` or a limited number of `Package`s. -/// - Parameters: -/// - client: `Client` object -/// - database: `Database` object -/// - mode: process a single `Package.Id` or a `limit` number of packages -/// - Returns: future -func ingest(client: Client, - database: Database, - mode: SPICommand.Mode) async throws { - let start = DispatchTime.now().uptimeNanoseconds - defer { AppMetrics.ingestDurationSeconds?.time(since: start) } - - switch mode { - case .id(let id): - Current.logger().info("Ingesting (id: \(id)) ...") - let pkg = try await Package.fetchCandidate(database, id: id) - await ingest(client: client, database: database, packages: [pkg]) - - case .limit(let limit): - Current.logger().info("Ingesting (limit: \(limit)) ...") - let packages = try await Package.fetchCandidates(database, for: .ingestion, limit: limit) - Current.logger().info("Candidate count: \(packages.count)") - await ingest(client: client, database: database, packages: packages) - - case .url(let url): - Current.logger().info("Ingesting (url: \(url)) ...") - let pkg = try await Package.fetchCandidate(database, url: url) - await ingest(client: client, database: database, packages: [pkg]) + /// Ingest via a given mode: either one `Package` identified by its `Id` or a limited number of `Package`s. + /// - Parameters: + /// - client: `Client` object + /// - database: `Database` object + /// - mode: process a single `Package.Id` or a `limit` number of packages + /// - Returns: future + static func ingest(client: Client, + database: Database, + mode: SPICommand.Mode) async throws { + let start = DispatchTime.now().uptimeNanoseconds + defer { AppMetrics.ingestDurationSeconds?.time(since: start) } + + switch mode { + case .id(let id): + Current.logger().info("Ingesting (id: \(id)) ...") + let pkg = try await Package.fetchCandidate(database, id: id) + await ingest(client: client, database: database, packages: [pkg]) + + case .limit(let limit): + Current.logger().info("Ingesting (limit: \(limit)) ...") + let packages = try await Package.fetchCandidates(database, for: .ingestion, limit: limit) + Current.logger().info("Candidate count: \(packages.count)") + await ingest(client: client, database: database, packages: packages) + + case .url(let url): + Current.logger().info("Ingesting (url: \(url)) ...") + let pkg = try await Package.fetchCandidate(database, url: url) + await ingest(client: client, database: database, packages: [pkg]) + } } -} -/// Main ingestion function. Fetched package metadata from hosting provider and updates `Repositoy` and `Package`s. -/// - Parameters: -/// - client: `Client` object -/// - database: `Database` object -/// - packages: packages to be ingested -/// - Returns: future -func ingest(client: Client, - database: Database, - packages: [Joined]) async { - Current.logger().debug("Ingesting \(packages.compactMap {$0.model.id})") - AppMetrics.ingestCandidatesCount?.set(packages.count) - - await withTaskGroup(of: Void.self) { group in - for pkg in packages { - group.addTask { - await Ingestion.ingest(client: client, database: database, package: pkg) + /// Main ingestion function. Fetched package metadata from hosting provider and updates `Repositoy` and `Package`s. + /// - Parameters: + /// - client: `Client` object + /// - database: `Database` object + /// - packages: packages to be ingested + /// - Returns: future + static func ingest(client: Client, + database: Database, + packages: [Joined]) async { + Current.logger().debug("Ingesting \(packages.compactMap {$0.model.id})") + AppMetrics.ingestCandidatesCount?.set(packages.count) + + await withTaskGroup(of: Void.self) { group in + for pkg in packages { + group.addTask { + await ingest(client: client, database: database, package: pkg) + } } } } -} -extension Ingestion { static func ingest(client: Client, database: Database, package: Joined) async { let result = await Result { () async throws(Ingestion.Error) -> Joined in @Dependency(\.environment) var environment diff --git a/Sources/App/configure.swift b/Sources/App/configure.swift index 9bbd93778..a55b20295 100644 --- a/Sources/App/configure.swift +++ b/Sources/App/configure.swift @@ -354,7 +354,7 @@ public func configure(_ app: Application) async throws -> String { app.asyncCommands.use(Analyze.Command(), as: "analyze") app.asyncCommands.use(CreateRestfileCommand(), as: "create-restfile") app.asyncCommands.use(DeleteBuildsCommand(), as: "delete-builds") - app.asyncCommands.use(IngestCommand(), as: "ingest") + app.asyncCommands.use(Ingestion.Command(), as: "ingest") app.asyncCommands.use(ReconcileCommand(), as: "reconcile") app.asyncCommands.use(TriggerBuildsCommand(), as: "trigger-builds") app.asyncCommands.use(ReAnalyzeVersions.Command(), as: "re-analyze-versions") diff --git a/Tests/AppTests/ErrorReportingTests.swift b/Tests/AppTests/ErrorReportingTests.swift index 8bee183b5..a2e1fe703 100644 --- a/Tests/AppTests/ErrorReportingTests.swift +++ b/Tests/AppTests/ErrorReportingTests.swift @@ -31,7 +31,7 @@ class ErrorReportingTests: AppTestCase { } } - func test_Ingestor_error_reporting() async throws { + func test_Ingestion_error_reporting() async throws { // setup try await Package(id: .id0, url: "1", processingStage: .reconciliation).save(on: app.db) Current.fetchMetadata = { _, _, _ throws(Github.Error) in throw Github.Error.invalidURL("1") } @@ -40,7 +40,7 @@ class ErrorReportingTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) } // validation diff --git a/Tests/AppTests/IngestorTests.swift b/Tests/AppTests/IngestionTests.swift similarity index 96% rename from Tests/AppTests/IngestorTests.swift rename to Tests/AppTests/IngestionTests.swift index a2374419c..0599215f5 100644 --- a/Tests/AppTests/IngestorTests.swift +++ b/Tests/AppTests/IngestionTests.swift @@ -22,7 +22,7 @@ import S3Store import Vapor -class IngestorTests: AppTestCase { +class IngestionTests: AppTestCase { func test_ingest_basic() async throws { // setup @@ -38,7 +38,7 @@ class IngestorTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) } // validate @@ -77,7 +77,7 @@ class IngestorTests: AppTestCase { Current.fetchLicense = { _, _, _ in Github.License(htmlUrl: "license") } // MUT - await ingest(client: app.client, database: app.db, packages: packages) + await Ingestion.ingest(client: app.client, database: app.db, packages: packages) do { // validate the second package's license is updated @@ -310,7 +310,7 @@ class IngestorTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(testUrls.count)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(testUrls.count)) } // validate @@ -337,7 +337,7 @@ class IngestorTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) } // validate @@ -389,7 +389,7 @@ class IngestorTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) } // validate repositories (single element pointing to the ingested package) @@ -490,7 +490,7 @@ class IngestorTests: AppTestCase { do { // first ingestion, no readme has been saved // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(1)) // validate try await XCTAssertEqualAsync(await Repository.query(on: app.db).count(), 1) @@ -506,7 +506,7 @@ class IngestorTests: AppTestCase { try await pkg.save(on: app.db) // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(1)) // validate try await XCTAssertEqualAsync(await Repository.query(on: app.db).count(), 1) @@ -522,7 +522,7 @@ class IngestorTests: AppTestCase { try await pkg.save(on: app.db) // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(1)) // validate try await XCTAssertEqualAsync(await Repository.query(on: app.db).count(), 1) @@ -573,7 +573,7 @@ class IngestorTests: AppTestCase { $0.date.now = .now } operation: { // MUT - try await ingest(client: app.client, database: app.db, mode: .limit(1)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(1)) } // There should only be one call as `storeS3ReadmeImages` takes the array of images. @@ -604,7 +604,7 @@ class IngestorTests: AppTestCase { } operation: { // MUT let app = self.app! - try await ingest(client: app.client, database: app.db, mode: .limit(1)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(1)) } // validate diff --git a/Tests/AppTests/MastodonTests.swift b/Tests/AppTests/MastodonTests.swift index efe931557..ba08e9296 100644 --- a/Tests/AppTests/MastodonTests.swift +++ b/Tests/AppTests/MastodonTests.swift @@ -66,7 +66,7 @@ final class MastodonTests: AppTestCase { } operation: { // run first two processing steps try await reconcile(client: app.client, database: app.db) - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) // MUT - analyze, triggering the post try await Analyze.analyze(client: app.client, @@ -86,7 +86,7 @@ final class MastodonTests: AppTestCase { try await withDependencies { $0.date.now = .now.addingTimeInterval(Constants.reIngestionDeadtime) } operation: { - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) // MUT - analyze, triggering posts if any try await Analyze.analyze(client: app.client, @@ -104,7 +104,7 @@ final class MastodonTests: AppTestCase { // fast forward our clock by the deadtime interval again (*2) and re-ingest $0.date.now = .now.addingTimeInterval(Constants.reIngestionDeadtime * 2) } operation: { - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) // MUT - analyze again try await Analyze.analyze(client: app.client, database: app.db, diff --git a/Tests/AppTests/MetricsTests.swift b/Tests/AppTests/MetricsTests.swift index dddd889ab..bef5f18b6 100644 --- a/Tests/AppTests/MetricsTests.swift +++ b/Tests/AppTests/MetricsTests.swift @@ -120,7 +120,7 @@ class MetricsTests: AppTestCase { let pkg = try await savePackage(on: app.db, "1") // MUT - try await ingest(client: app.client, database: app.db, mode: .id(pkg.id!)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .id(pkg.id!)) // validation XCTAssert((AppMetrics.ingestDurationSeconds?.get()) ?? 0 > 0) diff --git a/Tests/AppTests/PackageTests.swift b/Tests/AppTests/PackageTests.swift index d1ab1f47a..d771c75bd 100644 --- a/Tests/AppTests/PackageTests.swift +++ b/Tests/AppTests/PackageTests.swift @@ -350,8 +350,8 @@ final class PackageTests: AppTestCase { } // run ingestion to progress package through pipeline - try await ingest(client: app.client, database: app.db, mode: .limit(10)) - + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) + // MUT & validate do { let pkg = try await XCTUnwrapAsync(try await Package.query(on: app.db).first()) @@ -378,7 +378,7 @@ final class PackageTests: AppTestCase { try await withDependencies { $0.date.now = .now.addingTimeInterval(Constants.reIngestionDeadtime) } operation: { - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) do { let pkg = try await XCTUnwrapAsync(try await Package.query(on: app.db).first()) diff --git a/Tests/AppTests/PipelineTests.swift b/Tests/AppTests/PipelineTests.swift index 0895f80d0..f34940663 100644 --- a/Tests/AppTests/PipelineTests.swift +++ b/Tests/AppTests/PipelineTests.swift @@ -201,8 +201,8 @@ class PipelineTests: AppTestCase { } // MUT - second stage - try await ingest(client: app.client, database: app.db, mode: .limit(10)) - + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) + do { // validate let packages = try await Package.query(on: app.db).sort(\.$url).all() XCTAssertEqual(packages.map(\.url), ["1", "2", "3"].asGithubUrls) @@ -240,7 +240,7 @@ class PipelineTests: AppTestCase { } // MUT - ingest again - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) do { // validate - only new package moves to .ingestion stage let packages = try await Package.query(on: app.db).sort(\.$url).all() @@ -270,7 +270,7 @@ class PipelineTests: AppTestCase { $0.date.now = .now.addingTimeInterval(Constants.reIngestionDeadtime) } operation: { // MUT - ingest yet again - try await ingest(client: app.client, database: app.db, mode: .limit(10)) + try await Ingestion.ingest(client: app.client, database: app.db, mode: .limit(10)) do { // validate - now all three packages should have been updated let packages = try await Package.query(on: app.db).sort(\.$url).all() From 2c386dc0933e867e9eb1b0ad0727d56081865e99 Mon Sep 17 00:00:00 2001 From: "Sven A. Schmidt" Date: Mon, 16 Dec 2024 16:57:47 +0100 Subject: [PATCH 2/2] Move remaining two top level functions into Ingestion namespace --- Sources/App/Commands/Ingestion.swift | 174 ++++++++++++++------------- Tests/AppTests/IngestionTests.swift | 58 ++++----- 2 files changed, 118 insertions(+), 114 deletions(-) diff --git a/Sources/App/Commands/Ingestion.swift b/Sources/App/Commands/Ingestion.swift index 8b84e6e5c..e68b83659 100644 --- a/Sources/App/Commands/Ingestion.swift +++ b/Sources/App/Commands/Ingestion.swift @@ -116,8 +116,8 @@ enum Ingestion { /// - mode: process a single `Package.Id` or a `limit` number of packages /// - Returns: future static func ingest(client: Client, - database: Database, - mode: SPICommand.Mode) async throws { + database: Database, + mode: SPICommand.Mode) async throws { let start = DispatchTime.now().uptimeNanoseconds defer { AppMetrics.ingestDurationSeconds?.time(since: start) } @@ -197,10 +197,10 @@ enum Ingestion { s3Readme = .error("\(error)") } - let fork = await getFork(on: database, parent: metadata.repository?.parent) + let fork = await Ingestion.getFork(on: database, parent: metadata.repository?.parent) try await run { () async throws(Ingestion.Error.UnderlyingError) in - try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) + try await Ingestion.updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork) } rethrowing: { Ingestion.Error(packageId: package.model.id!, underlyingError: $0) } @@ -281,108 +281,111 @@ enum Ingestion { // but let's play it safe and not risk a server crash, unlikely as it may be. } } -} -/// Insert or update `Repository` of given `Package` with given `Github.Metadata`. -/// - Parameters: -/// - database: `Database` object -/// - package: package to update -/// - metadata: `Github.Metadata` with data for update -/// - Returns: future -func updateRepository(on database: Database, - for repository: Repository, - metadata: Github.Metadata, - licenseInfo: Github.License?, - readmeInfo: Github.Readme?, - s3Readme: S3Readme?, - fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) { - @Dependency(\.environment) var environment - if environment.shouldFail(failureMode: .noRepositoryMetadata) { - throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) - } - if environment.shouldFail(failureMode: .repositorySaveFailed) { - throw .repositorySaveFailed(owner: repository.owner, - name: repository.name, - details: "TestError") - } - if environment.shouldFail(failureMode: .repositorySaveUniqueViolation) { - throw .repositorySaveUniqueViolation(owner: repository.owner, - name: repository.name, - details: "TestError") - } - guard let repoMetadata = metadata.repository else { - throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) - } + /// Insert or update `Repository` of given `Package` with given `Github.Metadata`. + /// - Parameters: + /// - database: `Database` object + /// - package: package to update + /// - metadata: `Github.Metadata` with data for update + /// - Returns: future + static func updateRepository(on database: Database, + for repository: Repository, + metadata: Github.Metadata, + licenseInfo: Github.License?, + readmeInfo: Github.Readme?, + s3Readme: S3Readme?, + fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) { + @Dependency(\.environment) var environment + if environment.shouldFail(failureMode: .noRepositoryMetadata) { + throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) + } + if environment.shouldFail(failureMode: .repositorySaveFailed) { + throw .repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: "TestError") + } + if environment.shouldFail(failureMode: .repositorySaveUniqueViolation) { + throw .repositorySaveUniqueViolation(owner: repository.owner, + name: repository.name, + details: "TestError") + } + guard let repoMetadata = metadata.repository else { + throw .noRepositoryMetadata(owner: repository.owner, name: repository.name) + } + + repository.defaultBranch = repoMetadata.defaultBranch + repository.forks = repoMetadata.forkCount + repository.fundingLinks = repoMetadata.fundingLinks?.compactMap(FundingLink.init(from:)) ?? [] + repository.hasSPIBadge = readmeInfo?.containsSPIBadge() + repository.homepageUrl = repoMetadata.homepageUrl?.trimmed + repository.isArchived = repoMetadata.isArchived + repository.isInOrganization = repoMetadata.isInOrganization + repository.keywords = Set(repoMetadata.topics.map { $0.lowercased() }).sorted() + repository.lastIssueClosedAt = repoMetadata.lastIssueClosedAt + repository.lastPullRequestClosedAt = repoMetadata.lastPullRequestClosedAt + repository.license = .init(from: repoMetadata.licenseInfo) + repository.licenseUrl = licenseInfo?.htmlUrl + repository.name = repoMetadata.repositoryName + repository.openIssues = repoMetadata.openIssues.totalCount + repository.openPullRequests = repoMetadata.openPullRequests.totalCount + repository.owner = repoMetadata.repositoryOwner + repository.ownerName = repoMetadata.owner.name + repository.ownerAvatarUrl = repoMetadata.owner.avatarUrl + repository.s3Readme = s3Readme + repository.readmeHtmlUrl = readmeInfo?.htmlUrl + repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) + repository.stars = repoMetadata.stargazerCount + repository.summary = repoMetadata.description + repository.forkedFrom = fork - repository.defaultBranch = repoMetadata.defaultBranch - repository.forks = repoMetadata.forkCount - repository.fundingLinks = repoMetadata.fundingLinks?.compactMap(FundingLink.init(from:)) ?? [] - repository.hasSPIBadge = readmeInfo?.containsSPIBadge() - repository.homepageUrl = repoMetadata.homepageUrl?.trimmed - repository.isArchived = repoMetadata.isArchived - repository.isInOrganization = repoMetadata.isInOrganization - repository.keywords = Set(repoMetadata.topics.map { $0.lowercased() }).sorted() - repository.lastIssueClosedAt = repoMetadata.lastIssueClosedAt - repository.lastPullRequestClosedAt = repoMetadata.lastPullRequestClosedAt - repository.license = .init(from: repoMetadata.licenseInfo) - repository.licenseUrl = licenseInfo?.htmlUrl - repository.name = repoMetadata.repositoryName - repository.openIssues = repoMetadata.openIssues.totalCount - repository.openPullRequests = repoMetadata.openPullRequests.totalCount - repository.owner = repoMetadata.repositoryOwner - repository.ownerName = repoMetadata.owner.name - repository.ownerAvatarUrl = repoMetadata.owner.avatarUrl - repository.s3Readme = s3Readme - repository.readmeHtmlUrl = readmeInfo?.htmlUrl - repository.releases = repoMetadata.releases.nodes.map(Release.init(from:)) - repository.stars = repoMetadata.stargazerCount - repository.summary = repoMetadata.description - repository.forkedFrom = fork - - do { - try await repository.save(on: database) - } catch let error as PSQLError where error.isUniqueViolation { - let details = error.serverInfo?[.message] ?? "" - throw Ingestion.Error.UnderlyingError.repositorySaveUniqueViolation(owner: repository.owner, - name: repository.name, - details: details) - } catch let error as PSQLError { - let details = error.serverInfo?[.message] ?? "" - throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, - name: repository.name, - details: details) - } catch { - throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, - name: repository.name, - details: "\(error)") + do { + try await repository.save(on: database) + } catch let error as PSQLError where error.isUniqueViolation { + let details = error.serverInfo?[.message] ?? "" + throw Ingestion.Error.UnderlyingError.repositorySaveUniqueViolation(owner: repository.owner, + name: repository.name, + details: details) + } catch let error as PSQLError { + let details = error.serverInfo?[.message] ?? "" + throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: details) + } catch { + throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner, + name: repository.name, + details: "\(error)") + } } -} -func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { - guard let parentUrl = parent?.normalizedURL else { return nil } + static func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? { + guard let parentUrl = parent?.normalizedURL else { return nil } - if let packageId = try? await Package.query(on: database) - .filter(\.$url, .custom("ilike"), parentUrl) - .first()?.id { - return .parentId(id: packageId, fallbackURL: parentUrl) - } else { - return .parentURL(parentUrl) + if let packageId = try? await Package.query(on: database) + .filter(\.$url, .custom("ilike"), parentUrl) + .first()?.id { + return .parentId(id: packageId, fallbackURL: parentUrl) + } else { + return .parentURL(parentUrl) + } } } + // Helper to ensure the canonical source for these critical fields is the same in all the places where we need them private extension Github.Metadata { var repositoryOwner: String? { repository?.repositoryOwner } var repositoryName: String? { repository?.repositoryName } } + // Helper to ensure the canonical source for these critical fields is the same in all the places where we need them private extension Github.Metadata.Repository { var repositoryOwner: String? { owner.login } var repositoryName: String? { name } } + private extension Github.Metadata.Parent { // Returns a normalized version of the URL. Adding a `.git` if not present. var normalizedURL: String? { @@ -394,6 +397,7 @@ private extension Github.Metadata.Parent { } } + private extension Ingestion.Error { static func invalidURL(packageId: Package.Id, url: String) -> Self { Ingestion.Error(packageId: packageId, underlyingError: .invalidURL(url)) diff --git a/Tests/AppTests/IngestionTests.swift b/Tests/AppTests/IngestionTests.swift index 0599215f5..a0b915530 100644 --- a/Tests/AppTests/IngestionTests.swift +++ b/Tests/AppTests/IngestionTests.swift @@ -97,12 +97,12 @@ class IngestionTests: AppTestCase { let repo = Repository(packageId: try pkg.requireID()) // MUT - try await updateRepository(on: app.db, - for: repo, - metadata: .mock(owner: "foo", repository: "bar"), - licenseInfo: .init(htmlUrl: ""), - readmeInfo: .init(html: "", htmlUrl: "", imagesToCache: []), - s3Readme: nil) + try await Ingestion.updateRepository(on: app.db, + for: repo, + metadata: .mock(owner: "foo", repository: "bar"), + licenseInfo: .init(htmlUrl: ""), + readmeInfo: .init(html: "", htmlUrl: "", imagesToCache: []), + s3Readme: nil) // validate do { @@ -154,16 +154,16 @@ class IngestionTests: AppTestCase { summary: "package desc") // MUT - try await updateRepository(on: app.db, - for: repo, - metadata: md, - licenseInfo: .init(htmlUrl: "license url"), - readmeInfo: .init(etag: "etag", - html: "readme html https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com", - htmlUrl: "readme html url", - imagesToCache: []), - s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"), - fork: .parentURL("https://github.com/foo/bar.git")) + try await Ingestion.updateRepository(on: app.db, + for: repo, + metadata: md, + licenseInfo: .init(htmlUrl: "license url"), + readmeInfo: .init(etag: "etag", + html: "readme html https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com", + htmlUrl: "readme html url", + imagesToCache: []), + s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"), + fork: .parentURL("https://github.com/foo/bar.git")) // validate do { @@ -229,14 +229,14 @@ class IngestionTests: AppTestCase { summary: "package desc") // MUT - try await updateRepository(on: app.db, - for: repo, - metadata: md, - licenseInfo: .init(htmlUrl: "license url"), - readmeInfo: .init(html: "readme html", - htmlUrl: "readme html url", - imagesToCache: []), - s3Readme: nil) + try await Ingestion.updateRepository(on: app.db, + for: repo, + metadata: md, + licenseInfo: .init(htmlUrl: "license url"), + readmeInfo: .init(html: "readme html", + htmlUrl: "readme html url", + imagesToCache: []), + s3Readme: nil) // validate do { @@ -660,23 +660,23 @@ class IngestionTests: AppTestCase { try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db) // test lookup when package is in the index - let fork = await getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git")) + let fork = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git")) XCTAssertEqual(fork, .parentId(id: .id0, fallbackURL: "https://github.com/foo/parent.git")) // test lookup when package is in the index but with different case in URL - let fork2 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git")) + let fork2 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git")) XCTAssertEqual(fork2, .parentId(id: .id0, fallbackURL: "https://github.com/Foo/Parent.git")) // test whem metadata repo url doesn't have `.git` at end - let fork3 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent")) + let fork3 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent")) XCTAssertEqual(fork3, .parentId(id: .id0, fallbackURL: "https://github.com/Foo/Parent.git")) // test lookup when package is not in the index - let fork4 = await getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git")) + let fork4 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git")) XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git")) // test lookup when parent url is nil - let fork5 = await getFork(on: app.db, parent: nil) + let fork5 = await Ingestion.getFork(on: app.db, parent: nil) XCTAssertEqual(fork5, nil) } }