Skip to content
This repository has been archived by the owner on Aug 5, 2020. It is now read-only.

Commit

Permalink
Initial write
Browse files Browse the repository at this point in the history
  • Loading branch information
jayrbolton committed May 29, 2019
1 parent e64788a commit 723b998
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
language: python
python:
- 3.6
script:
- python validate.py
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ This repo holds configuration files that can be used in other KBase search codeb
* ES type mappings for each index
* Mapping of KBase Workspace type names to index names in ES.

## Validate syntax

Run `python validate.py` to validate the yaml syntax in the config file.

## KBase Search Stack

* [Index Runner](https://github.com/kbaseIncubator/index_runner_deluxe) - Kafka consumer to construct indexes and documents.
Expand Down
156 changes: 156 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@

# Generic, global type mappings. These apply to all workspace objects, but not subobjects.
global_ws_mappings:
timestamp: {type: date}
obj_name: {type: keyword}
guid: {type: keyword}
creation_date: {type: date}
shared_users: {type: keyword}
access_group: {type: integer}
creator: {type: keyword}
version: {type: integer}
obj_id: {type: integer}
is_public: {type: boolean}
copied: {type: keyword}
tags: {type: keyword}
obj_type_version: {type: keyword}
obj_type_module: {type: keyword}
obj_type_name: {type: keyword}

# Mapping of KBase type names to index names
ws_type_to_indexes:
Narrative: "narrative"
PairedEndLibrary: "reads"
SingleEndLibrary: "reads"
Assembly: "assembly"
Genome: "genome"
Pangenome: "pangenome"
Taxon: "taxon"
Tree: "tree"

# Which indexes are considered "subobjects" (nested under workspace objects, such as genome features)
ws_subobjects:
- "genome_features:1"
- "pangenome_orthologfamily:1"

aliases:
"narrative:1": narrative
"reads:1": reads
"assembly:1": assembly
"genome:1": genome
"genome_features:1": genome_features
"pangenome:1": pangenome
"pangenome_orthologfamily:1": pangenome_orthologfamily
"taxon:1": taxon
"tree:1": tree

# All ES type mappings
mappings:

"narrative:1":
narrative_title: {type: text}
data_objects:
type: nested
properties:
name: {type: keyword}
obj_type: {type: keyword}
cells:
type: object
properties:
desc: {type: text}
cell_type: {type: keyword}
total_cells: {type: short}

"reads:1":
sequencing_tech: {type: keyword}
size: {type: integer}
interleaved: {type: boolean}
single_genome: {type: boolean}
provenance_services: {type: keyword}
phred_type: {type: text}
gc_content: {type: float}
mean_quality_score: {type: float}
mean_read_length: {type: float}

"assembly:1":
assembly_name: {type: keyword}
mean_contig_length: {type: float}
percent_complete_contigs: {type: float}
percent_circle_contigs: {type: float}
assembly_id: {type: keyword}
gc_content: {type: float}
size: {type: integer}
num_contigs: {type: integer}
taxon_ref: {type: keyword}
external_origination_date: {type: keyword} # should maybe be of type date?
external_source_id: {type: keyword}
external_source: {type: keyword}

"genome:1":
genome_id: {type: keyword}
scientific_name: {type: keyword}
size: {type: integer}
num_contigs: {type: integer}
genome_type: {type: keyword}
gc_content: {type: float}
taxonomy: {type: keyword}
mean_contig_length: {type: float}
external_origination_date: {type: keyword} # should maybe be of type date?
original_source_file_name: {type: keyword}
# new fields to include:
cds_count: {type: integer}
feature_count: {type: integer}
mrna_count: {type: integer}
non_coding_feature_count: {type: integer}
assembly_ref: {type: keyword}
source_id: {type: keyword}
feature_counts: {type: object}
source: {type: keyword}
warnings: {type: text}

"genome_features:1":
feature_type: {type: keyword}
functions: {type: keyword}
contig_ids: {type: keyword}
sequence_length: {type: integer}
id: {type: keyword}
# genome_upa: {type: keyword}
guid: {type: keyword}
genome_version: {type: integer}
# new fields to include:
assembly_ref: {type: keyword}
genome_feature_type: {type: keyword}
starts: {type: integer}
strands: {type: keyword}
stops: {type: integer}
aliases: {type: keyword}

"pangenome:1":
pangenome_id: {type: keyword}
pangenome_name: {type: keyword}
pangenome_type: {type: keyword}
genome_upas: {type: keyword}

"pangenome_orthologfamily:1":
ortholog_id: {type: keyword}
ortholog_type: {type: keyword}
function: {type: keyword}
gene_ids: {type: keyword}

"taxon:1":
scientific_name: {type: keyword}
scientific_lineage: {type: keyword}
domain: {type: keyword}
kingdom: {type: keyword}
parent_taxon_ref: {type: keyword}
genetic_code: {type: integer}
aliases: {type: keyword}

"tree:1":
tree_name: {type: keyword}
type: {type: keyword}
labels:
type: nested
properties:
node_id: {type: text}
label: {type: text}
5 changes: 5 additions & 0 deletions validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import yaml

with open('./config.yaml') as fd:
yaml.load(fd)
print('YAML successfully parsed')

0 comments on commit 723b998

Please sign in to comment.