Skip to content

Commit

Permalink
fix: update parse_npm_git_url macro for improved URL parsing (#2525)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jabolol authored Nov 26, 2024
1 parent 7118352 commit 22a47b2
Showing 1 changed file with 62 additions and 42 deletions.
104 changes: 62 additions & 42 deletions warehouse/dbt/macros/models/parse_npm_git_url.sql
Original file line number Diff line number Diff line change
@@ -1,49 +1,69 @@
{% macro parse_npm_git_url(key, source) %}

select
*,

case
when regexp_contains({{ key }}, r'^git\+ssh://') then
regexp_replace({{ key }}, r'^git\+ssh://([^@]+)@', 'https://')
when regexp_contains({{ key }}, r'^git@') then
regexp_replace({{ key }}, r'^git@(.*?):', 'https://\\1/')
when regexp_contains({{ key }}, r'^git\+https://') then
regexp_replace({{ key }}, r'^git\+', '')
when regexp_contains({{ key }}, r'^https?://') then
{{ key }}
when regexp_contains({{ key }}, r'^[^:/]+\.[^:/]+/') then
concat('https://', {{ key }})
else null
end as remote_url,

regexp_extract(
case
when regexp_contains({{ key }}, r'\.git$') then
regexp_replace({{ key }}, r'\.git$', '')
else {{ key }}
end,
r'/([^/]+)$'
) as remote_name,

regexp_extract(
with parsed_data as (
select
*,

case
when regexp_contains({{ key }}, r'#') then
regexp_replace({{ key }}, r'#.*$', '')
when regexp_contains({{ key }}, r'^git\+ssh://') then
regexp_replace({{ key }}, r'^git\+ssh://([^@]+)@', 'https://')
when regexp_contains({{ key }}, r'^git@') then
regexp_replace({{ key }}, r'^git@(.*?):', 'https://\\1/')
when regexp_contains({{ key }}, r'^git\+ssh://') then
regexp_replace({{ key }}, r'^git\+ssh://', 'https://')
else {{ key }}
end,
r'https?:\/\/[^\/]+\/([^\/]+)\/[^\/]+$'
) as remote_namespace,

case
when regexp_contains({{ key }}, r'github\.com') then 'GITHUB'
when regexp_contains({{ key }}, r'gitlab\.com') then 'GITLAB'
when regexp_contains({{ key }}, r'bitbucket\.org') then 'BITBUCKET'
else 'OTHER'
end as remote_source_id

from {{ source }}
when regexp_contains({{ key }}, r'^git\+https://') then
regexp_replace({{ key }}, r'^git\+', '')
when regexp_contains({{ key }}, r'^https?://') then
{{ key }}
when regexp_contains({{ key }}, r'^[^:/]+\.[^:/]+/') then
concat('https://', {{ key }})
else null
end as remote_url,

regexp_extract(
case
when regexp_contains({{ key }}, r'#') then
regexp_replace({{ key }}, r'#.*$', '')
when regexp_contains({{ key }}, r'\.git$') then
regexp_replace({{ key }}, r'\.git$', '')
else {{ key }}
end,
r'/([^/]+)(?:\.git)?$'
) as remote_name,

regexp_extract(
case
when regexp_contains({{ key }}, r'#') then
regexp_replace({{ key }}, r'#.*$', '')
when regexp_contains({{ key }}, r'^git@') then
regexp_replace({{ key }}, r'^git@(.*?):', 'https://\\1/')
when regexp_contains({{ key }}, r'^git\+ssh://') then
regexp_replace({{ key }}, r'^git\+ssh://', 'https://')
else {{ key }}
end,
r'https?:\/\/[^\/]+\/([^\/]+)\/[^\/]+'
) as remote_namespace,

case
when regexp_contains({{ key }}, r'github\.com') then 'GITHUB'
when regexp_contains({{ key }}, r'gitlab\.com') then 'GITLAB'
when regexp_contains({{ key }}, r'bitbucket\.org') then 'BITBUCKET'
else 'OTHER'
end as remote_source_id

from {{ source }}
),

final_data as (
select
* except(remote_url),
case
when regexp_contains(remote_url, r'\.git$') then remote_url
else concat(remote_url, '.git')
end as remote_url
from parsed_data
)

select * from final_data

{% endmacro %}

0 comments on commit 22a47b2

Please sign in to comment.