Skip to content

Commit

Permalink
Add the additional queries from the manuscript.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sheeba Samuel committed Nov 21, 2024
1 parent 0369564 commit 908b5ae
Show file tree
Hide file tree
Showing 25 changed files with 93 additions and 3 deletions.
2 changes: 2 additions & 0 deletions sparql_query/q1.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Research articles by research field

SELECT ?research_field (COUNT(DISTINCT ?article) AS ?number_of_articles)
WHERE {
?repository <http://purl.org/pav/retrievedFrom> ?article .
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q10.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Exceptions occurring in Jupyter notebooks in our corpus.

SELECT ?exception (COUNT(?exception) AS ?count)
WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q11.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Jupyter notebook exceptions by research field, taking as a proxy the highest-level MeSH terms of the article associated with the notebook.

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?research_field (COUNT(?exception) AS ?exception_count)
WHERE {
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q12.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Notebooks with successful executions with same and different results

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT (COUNT(?processed_different_result) AS ?count_different_result) (COUNT(?processed_same_result) AS ?count_same_result) (?count_same_result + ?count_different_result AS ?count_successful_executions)
WHERE {
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q13.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Notebooks with code style errors and their description
SELECT ?notebook ?error ?description
WHERE {
?error a <https://w3id.org/reproduceme/NotebookCodeStyleError> ;
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q14.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Notebooks by search term: 'immun' AND ('stem' OR 'differentiation')

SELECT DISTINCT ?notebook_url ?article_label ?keywords WHERE {
?article <https://w3id.org/reproduceme/keywords> ?keywords .
?article <http://www.w3.org/2000/01/rdf-schema#label> ?article_label .
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q15.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Article by keywords, e.g., `open source'
SELECT DISTINCT ?article ?keywords WHERE {
?article <https://w3id.org/reproduceme/keywords> ?keywords .
FILTER (REGEX(LCASE(?keywords), "open(.)source"))
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q16.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Most common errors in immunology
SELECT DISTINCT ?exception (COUNT(?exception) AS ?count) WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception ;
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q17.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Most common errors in Nature journal

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?exception (COUNT(?exception) AS ?count) WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q18.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# MeSH terms ranked by 'ModuleNotFoundError' frequency
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?research_field (COUNT(?exception) AS ?exception_count)
WHERE {
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q19.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Repositories by their stargazers count

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?repo ?stargazers_count WHERE {
?repo <https://w3id.org/reproduceme/stargazers_count> ?count.
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q2.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Research field (MeSH terms) by the number of GitHub repositories that contain at least one Jupyter notebook.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX repr: <https://w3id.org/reproduceme/>
Expand Down
3 changes: 2 additions & 1 deletion sparql_query/q20.rq
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
# Match articles between FAIR Jupyter and Wikidata via DOI

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

Expand Down
3 changes: 2 additions & 1 deletion sparql_query/q21.rq
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
# Match articles between FAIR Jupyter and Wikidata via PMC ID

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

Expand Down
3 changes: 2 additions & 1 deletion sparql_query/q22.rq
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
# Match articles between FAIR Jupyter and Wikidata via MeSH in different language, i.e Malayalam

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

Expand Down
25 changes: 25 additions & 0 deletions sparql_query/q23.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Match articles between FAIR Jupyter and MaRDI via DOI and get co-used software

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX mardi_wd: <https://portal.mardi4nfdi.de/entity/>
PREFIX mardi_wdt: <https://portal.mardi4nfdi.de/prop/direct/>

PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>

SELECT DISTINCT ?title ?doi ?method ?methodLabel

WHERE {
?fj_article <https://w3id.org/reproduceme/doi> ?doi .

service <http://query.portal.mardi4nfdi.de/proxy/wdqs/bigdata/namespace/wdq/sparql> {
?mardi_paper mardi_wdt:P27 ?doi .
?mardi_paper mardi_wdt:P159 ?title .

?mardi_paper mardi_wdt:P1463 ?method .
?method rdfs:label ?methodLabel .
}

}
LIMIT 1000
12 changes: 12 additions & 0 deletions sparql_query/q24.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# GitHub repositories and their Software Heritage snapshot

# List of GitHub repositories covered by https://doi.org/10.1093/gigascience/giad113 ,
# with pointers to their Software Heritage snapshots as per https://doi.org/10.5281/zenodo.12806151

SELECT DISTINCT
(URI(?repo_url_base) AS ?GitHub_URL)
(URI (CONCAT("https://archive.softwareheritage.org/browse/origin/directory/?origin_url=", ENCODE_FOR_URI(STR(?repo_url_base)))) AS ?SWH_URL)
WHERE {
?repository <https://w3id.org/reproduceme/url> ?repo_url_base .
}
ORDER BY ASC(?repo_url_base)
18 changes: 18 additions & 0 deletions sparql_query/q25.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Articles and repositories with notebooks in Julia

SELECT DISTINCT ?title
(URI(CONCAT("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC", STR(?pmcid))) AS ?PMC_URL)
(URI(?repo_url_base) AS ?GitHub_URL)
?Notebook_URL
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
<https://w3id.org/reproduceme/language> "julia" .
?notebook <http://purl.org/pav/retrievedFrom> ?repository .
?article ^<http://purl.org/pav/retrievedFrom> ?repository .
?article <https://w3id.org/reproduceme/pmc> ?pmcid .
?article <http://www.w3.org/2000/01/rdf-schema#label> ?title .
?notebook <http://www.w3.org/2000/01/rdf-schema#label> ?notebook_label .
?repository <https://w3id.org/reproduceme/url> ?repo_url_base .
BIND(URI(CONCAT( ?repo_url_base, "/blob/master/", ?notebook_label)) AS ?Notebook_URL)
}
ORDER BY DESC(?Notebook_URL)
2 changes: 2 additions & 0 deletions sparql_query/q3.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Journals with the highest number of articles that had a valid GitHub repository and at least one Jupyter notebook.

SELECT ?journal_name (COUNT(?article) as ?article_count)
WHERE {
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q4.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Journals by the number of GitHub repositories and by the number of GitHub repositories with at least one Jupyter notebook.

SELECT ?journal_name (COUNT(?repository) as ?repository_count) (COUNT(?repository_nb) as ?repositories_with_notebooks_count) WHERE
{
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q5.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Journals by number of GitHub repositories with Jupyter notebooks.

SELECT ?journal_name (COUNT(?repository_nb) AS ?repositories_with_notebooks_count)
?max_notebooks_count
WHERE {
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q6.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Programming languages of the notebooks.

SELECT ?language (COUNT(?notebook) as ?notebook_count)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q7.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Python notebooks by Python version by year of first commit to the notebook’s GitHub repository.
SELECT ?created_year ?language (COUNT(?notebook) as ?notebook_count)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
Expand Down
1 change: 1 addition & 0 deletions sparql_query/q8.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Python notebooks by minor Python version by year of first commit to the GitHub repository containing the notebook.
SELECT ?created_year ?minor_version (COUNT(?notebook) as ?count_minor_version)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
Expand Down
2 changes: 2 additions & 0 deletions sparql_query/q9.rq
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Python notebooks by major Python version by year of first commit to the notebook’s GitHub repository.

SELECT ?created_year ?major_version (COUNT(?notebook) as ?count_major_version)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
Expand Down

0 comments on commit 908b5ae

Please sign in to comment.