From 520e09e7e730f7e98410e6f66f9c4f78325d4e37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20R=C3=ADos?= Date: Mon, 28 Oct 2024 11:59:11 -0700 Subject: [PATCH] fix: handle `invalid` urls in `sbom` job (#2425) --- .../dlt_sources/github_repos/__init__.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/warehouse/oso_dagster/dlt_sources/github_repos/__init__.py b/warehouse/oso_dagster/dlt_sources/github_repos/__init__.py index a7c21cb18..524078a30 100644 --- a/warehouse/oso_dagster/dlt_sources/github_repos/__init__.py +++ b/warehouse/oso_dagster/dlt_sources/github_repos/__init__.py @@ -400,8 +400,21 @@ def oss_directory_github_sbom_resource( resolver = GithubRepositoryResolver(gh) all_github_urls = resolver.github_urls_from_df(projects_df) - valid_urls = [resolver.parse_url(url) for url in all_github_urls["url"] if url] - for url in valid_urls: - if url.type == GithubURLType.REPOSITORY and url.repository: - yield from resolver.get_sbom_for_repo(url.owner, url.repository) + for unparsed_url in all_github_urls["url"]: + if not unparsed_url: + continue + + try: + parsed_url = resolver.parse_url(unparsed_url) + + if parsed_url.type != GithubURLType.REPOSITORY or not parsed_url.repository: + continue + + yield from resolver.get_sbom_for_repo( + parsed_url.owner, parsed_url.repository + ) + + except InvalidGithubURL: + logger.warning("Skipping invalid github url: %s", unparsed_url) + continue