-
Notifications
You must be signed in to change notification settings - Fork 8
Web API Guide
Access the UniProt documentation here.
The requirements for fetching data are the gene taxon and symbol.
let getUniProtInfo = function (query) {
const taxon = query.taxon;
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/uniprot/uploadlists/",
data: {
from: "GENENAME",
to: "ACC",
format: "tab",
taxon: taxon,
query: geneSymbol,
},
dataType: "text",
timeout: 5000,
}).then(function (data) {
const regex = new RegExp(geneSymbol + "[ \t\r\n\v\f]*([A-Z0-9]+)", "gm");
const id = regex.exec(data)[1];
return $.get({
url: serviceRoot + "/uniprot/uniprot/" + id + ".xml",
timeout: 5000,
});
}).fail(function () {
return $.get(this);
});
};
- UniProt ID
- Protein Sequence
- Protein Type
- Species Name
Access the NCBI Documentation here.
The requirements for fetching data are the gene symbol and species name.
let getNCBIInfo = function (query) {
const geneSymbol = query.symbol;
const geneName = query.species.replace(/_/, "+");
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esearch.fcgi",
data: {
db: "gene",
term: geneSymbol + "[gene]+" + geneName + "[Organism]",
},
dataType: "text",
timeout: 5000,
}).then(function (data) {
const regex = /<Id>(\d*)<\/Id>/gm;
const id = regex.exec(data)[1];
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esummary.fcgi?db=gene&id=" + id,
dataType: "xml",
timeout: 5000,
});
});
};
- NCBI ID
- Locus Tag
- Chromosome Sequence
After browsing the capabilities of the NCBI API, I conclude it is not possible to directly retrieve gene data from strictly the taxon ID.
According to the documentation, the way to access gene information through the NCBI database is by providing an Entrez Unique Identifier (UID).
This is the purpose of the first get()
function:
This would require us knowing both the gene name and the organism name in advance though, which, presently, are both passed into the gene page as the page is created.
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esearch.fcgi",
data: {
db: "gene",
term: geneSymbol + "[gene]+" + geneName + "[Organism]",
},
dataType: "text",
timeout: 5000,
}).
The result of this function is page of XML data, which we use to get the UID.
Example query: YHP1
<eSearchResult>
<Count>1</Count>
<RetMax>1</RetMax>
<RetStart>0</RetStart>
<IdList>
**_<Id>852062</Id>_** <!-- We want this -->
</IdList>
<TranslationSet>
<Translation>
<From>+Saccharomyces+cerevisiae[Organism]</From>
<To>"Saccharomyces cerevisiae"[Organism]</To>
</Translation>
</TranslationSet>
<TranslationStack>
<TermSet>
<Term>YHP1[gene]</Term>
<Field>gene</Field>
<Count>1</Count>
<Explode>N</Explode>
</TermSet>
<TermSet>
<Term>"Saccharomyces cerevisiae"[Organism]</Term>
<Field>Organism</Field>
<Count>7062</Count>
<Explode>Y</Explode>
</TermSet>
<OP>AND</OP>
</TranslationStack>
<QueryTranslation>
YHP1[gene] AND "Saccharomyces cerevisiae"[Organism]
</QueryTranslation>
</eSearchResult>
We take that value, and put it into a SECOND get() function to retrieve the gene data we want:
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esummary.fcgi?db=gene&id=" + id,
dataType: "xml",
timeout: 5000,
});
As stated in my previous comment, it is possible to retrieve the species name and the gene name by accessing the taxonomy database, which would give us the organism name. It would require a third get() function to be created, however, which isn't difficult but would slow the page down slightly.
Access the JASPAR API documentation here.
The requirements for the JASPAR API function are the gene symbol and taxon.
let getJasparInfo = function (query) {
const geneSymbol = query.symbol;
//will eventually need to decide which taxon to use for JASPAR, for now this remains hardcoded
const taxon = "4932";
return $.get({
url: serviceRoot + "/jaspar/api/v1/matrix/?tax_id=" + taxon + "&format=json&name=" + geneSymbol.toUpperCase(),
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
}).then(function (data) {
return (data.results.length === 0 || data.results === undefined) ? {} :
$.get({
url: serviceRoot + "/jaspar/api/v1/matrix/" + data.results[0].matrix_id,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
});
};let getJasparInfo = function (query) {
const geneSymbol = query.symbol;
//will eventually need to decide which taxon to use for JASPAR, for now this remains hardcoded
const taxon = "4932";
return $.get({
url: serviceRoot + "/jaspar/api/v1/matrix/?tax_id=" + taxon + "&format=json&name=" + geneSymbol.toUpperCase(),
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
}).then(function (data) {
return (data.results.length === 0 || data.results === undefined) ? {} :
$.get({
url: serviceRoot + "/jaspar/api/v1/matrix/" + data.results[0].matrix_id,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
});
};
- JASPAR ID
- Class
- Family
- Sequence Logo
- Frequency Matrix
Within our API, the taxon is hardcoded as 4932. As of 1/28/19, the taxon for the rest of the APIs is 559292. When 559292 was tested as of 1/28/19 as a taxon, the API response lacked the data we seek.
Taxon ID 4932 is referring to the species Saccharomyces cerevisiae, see https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=4932, while taxon ID 559292 is a substrain of 4932, see https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=559292.
If you look at the first link, you will see that the strain S288C is included in the long list of substrains.
What is special about S288c is that it was the first strain to be sequenced completely, so it is the reference genome for yeast. However, now that many more substrains have been completely sequenced, the databases had to be able to distinguish between them.
So, in the case of our gene pages, all the data we are providing are still for the same species. Since UniProt pre-dates the time when separate sub-strain designations were made, it still uses the parent ID, 4932. However, JASPAR is referring to the sequenced strain.
This leads future GRNsight researchers to question which taxon will be used to pull data from JASPAR.
Access the Yeastmine documentation here.
All of the YeastMine functions require the gene symbol.
let getGeneOntologyInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/backend/locus/" + geneSymbol + "/go_details",
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
}
}).catch(function () {
return defaultGeneOntology;
});
};
let getRegulationInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/backend/locus/" + geneSymbol + "/regulation_details",
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
}
});
};
let getYeastMineInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/webservice/locus/" + geneSymbol,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
};
Gene Ontology Data:
- Molecular Functions
- Biological Processes
- Cellular Components
Regulators Data:
- List of Regulators
- List of Targets
Description:
- SGD ID
- Gene Ontology Overview