Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions _pipeline_data/configs/_include_project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# This config file is required for running pipeline-utils and defines the necessary
# paths and configurations for the include_dbt_sandbox project.


paths:
pl_dir:
root: repo_root
template: dbt_project

# Study input data (CSV + DD)
src_data_dir:
root: repo_root
template: "{study_data_dir}"

# Static metadata root used for internal/export model generation
static_data_dir:
root: repo_root
template: "{pipeline_data_dir}/static"

# static_int_metadata_dir is hardcoded since it is necessary prior to loading the paths from this file. This path is set in factory_functions.py
# static_exp_metadata_dir is hardcoded since it is necessary prior to loading the paths from this file. This path is set in factory_functions.py

static_int_additions_dir:
root: repo_root
template: "{pipeline_data_dir}/static/common_data_models/internal/additions"

static_sp_exp_dir:
root: repo_root
template: "{pipeline_data_dir}/static/common_data_models/export/{exp_model_name}"

pl_exp_dir:
root: repo_root
template: dbt_project/models/export/{exp_model_name}

pl_access_dir:
root: repo_root
template: dbt_project/models/access

pl_combined_dir:
root: repo_root
template: dbt_project/models/combined

pl_macros_dir:
root: repo_root
template: dbt_project/macros

pl_profiles:
root: repo_root
template: dbt_project/profiles.yml

utils_files_dir:
root: repo_root
template: .

utils_macros_dir:
root: repo_root
template: dbt_project/macros

pl_dag_dir:
base: repo_root
template: dags/{project_id}

overridable_paths:
# Values come from the study YAML and are interpreted from repo root.
study_data_dir:
context_attr: study_data_dir
root: repo_root
template: _study_data/{study_id}
override_root: repo_root

pipeline_data_dir:
context_attr: pipeline_data_dir
root: repo_root
template: _pipeline_data
override_root: repo_root

derived_paths:
dbtp_src_study_dir:
base: pl_dir
template: models/include/{study_id}/src

dbtp_ftdc_study_dir:
base: pl_dir
template: models/include/{study_id}/int

dbtp_ftdc_study_docs_dir:
base: dbtp_ftdc_study_dir
template: docs

dbtp_ftdc_dir:
base: pl_dir
template: models/include/{study_id}

pl_scripts_dir:
base: pl_dir
template: scripts


stages:
sources:
models_dir: dbtp_src_study_dir
yml_dir: dbtp_src_study_dir
docs_dir: dbtp_src_study_dir

intermediate:
models_dir: dbtp_ftdc_study_dir
yml_dir: dbtp_ftdc_study_dir
docs_dir: dbtp_ftdc_study_docs_dir
macros_dir: pl_macros_dir

access:
models_dir: pl_access_dir
yml_dir: pl_access_dir
docs_dir: pl_access_dir
macros_dir: pl_macros_dir

combined:
models_dir: pl_combined_dir
yml_dir: pl_combined_dir
docs_dir: pl_combined_dir
macros_dir: pl_macros_dir

export:
models_dir: pl_exp_dir
yml_dir: pl_exp_dir
docs_dir: pl_exp_dir

dbt_projects:
- dir_key: pl_dir
name: "include_dbt_sandbox"

dbt_project_stages:
sources:
dir_key: pl_dir
dbtp_def: src
tables_attr: src_prefixed_tables

intermediate:
dir_key: pl_dir
dbtp_def: stb
tables_attr: stb_prefixed_tables

access:
dir_key: pl_dir
dbtp_def: access
tables_attr: int_prefixed_tables

combined:
dir_key: pl_dir
dbtp_def: combined
tables_attr: combined_prefixed_tables

export:
dir_key: pl_dir
dbtp_def: exp
tables_attr: exp_prefixed_tables

run_script_dir: pl_scripts_dir

study_sql_method: study_select_sql

features:
copy_export_dir: false
copy_macros_dir: false
copy_profiles: false
copy_import_macros: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
variable_name,description,data_type,min,max,units,enumerations,comment
access_policy_id,Access policy communicates the limitations and/or requirements that define how a user may gain access to a particular set of data.,string,,,,,
description,More details associated with the given resource,string,,,,,
data_access_type,Type of access restrictions on file downloads ( open | registered | controlled ),enumeration,,,,open=Open Access;registered=Registered;controlled=Controlled;gsr_restricted=GSR Restricted;gsr_allowed=GSR Allowed,
website,"URL describing the entity this represents. This can include a formal website, such as the Entity's website, or to an online document describing the entity.",string,,,,,
consent_scope,Which of the four areas this resource covers (extensible),enumeration,,,,adr=Advanced Care Directive;research=Research;patient_privacy=Patient Privacy;treatment=Treatment,
disease_limitation,Disease Use Limitations,string,,,,,
status,Indicates the state of the consent.,enumeration,,,,draft=Draft;proposed=Proposed;active=Active;rejected=Rejected;inactive=Inactive;entered_in_error=Entered in Error,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
AccessPolicy_access_policy_id,Autocreated FK slot,string,,,,,Foreign Key: AccessPolicy.access_policy_id
access_policy_code,A classification of the type of consents found in a consent statement.,enumeration,,,,"gru=General Research Use;hmb=Health/Medical/Biomedical;ds=Disease-Specific (Disease/Trait/Exposure);irb=IRB Approval Required;pub=Publication Required;col=Collaboration Required;npu=Not-for-profit use only;mds=Methods;gso=Genetic Studies only;gsr=Genomic Summary Results;rd=Related Diseases;duo_0000004=This data use permission indicates there is no restriction on use.;duo_0000042=This data use permission indicates that use is allowed for general research use for any research purpose.;duo_0000006=This data use permission indicates that use is allowed for health/medical/biomedical purposes; does not include the study of population origins or ancestry.;duo_0000007=This data use permission indicates that use is allowed provided it is related to the specified disease.;duo_0000011=This data use permission indicates that use of the data is limited to the study of population origins or ancestry.;duo_0000012=This data use modifier indicates that use is limited to studies of a certain research type.;duo_0000015=This data use modifier indicates that use does not allow methods development research (e.g., development of software or algorithms).;duo_0000016=This data use modifier indicates that use is limited to genetic studies only (i.e., studies that include genotype research alone or both genotype and phenotype research, but not phenotype research exclusively);duo_0000018=This data use modifier indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.;duo_0000019=This data use modifier indicates that requestor agrees to make results of studies using the data available to the larger scientific community.;duo_0000020=This data use modifier indicates that the requestor must agree to collaboration with the primary study investigator(s).;duo_0000021=This data use modifier indicates that the requestor must provide documentation of local IRB/ERB approval.;duo_0000022=This data use modifier indicates that use is limited to within a specific geographic region.;duo_0000024=This data use modifier indicates that requestor agrees not to publish results of studies until a specific date.;duo_0000025=This data use modifier indicates that use is approved for a specific number of months.;duo_0000026=This data use modifier indicates that use is limited to use by approved users.;duo_0000027=This data use modifier indicates that use is limited to use within an approved project.;duo_0000028=This data use modifier indicates that use is limited to use within an approved institution.;duo_0000029=This data use modifier indicates that the requestor must return derived/enriched data to the database/resource.;duo_0000043=This data use modifier indicates that use is allowed for clinical use and care.;duo_0000044=This data use modifier indicates use for purposes of population, origin, or ancestry research is prohibited.;duo_0000045=This data use modifier indicates that use of the data is limited to not-for-profit organisations.;duo_0000046=This data use modifier indicates that use of the data is limited to not-for-profit use.",
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
variable_name,description,data_type,min,max,units,enumerations,comment
value_type,Age Value Type,enumeration,,,,"age=Age (must also be annotated with age units);code=Age as Code (ages will be provided as coded values);age_range=Age expressed as a range (relative date/time);date=Rather than an age, we have an actual date for the event's occurence",
relative_date_time_id,Age either numeric value or range,string,,,,,Foreign Key: RelativeDateTime.id
age_code,Age expressed as an enumerated value representing an age category,string,,,,,
as_date,Event Date (rather than age),string,,,,,
id,Unique Identifier for a table entry. This is probably not the Global ID,string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
variable_name,description,data_type,min,max,units,enumerations,comment
aliquot_id,Aliquot Global ID,string,,,,,
availability_status,Can this Sample be requested for further analysis?,enumeration,,,,available=Specimen is currently available;unavailable=Specimen is currently unavailable,
volume,What is the volume of the Aliquot?,number,,,,,
volume_units,Units associated with the volume (ucum),string,,,,,
concentration,What is the concentration of the analyte in the Aliquot?,string,,,,,
sample_id,Sample Global ID,string,,,,,Foreign Key: Sample.sample_id
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
Aliquot_aliquot_id,Autocreated FK slot,string,,,,,Foreign Key: Aliquot.aliquot_id
external_id,"Other identifiers for this entity, eg, from the submitting study or in systems link dbGaP",string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
variable_name,description,data_type,min,max,units,enumerations,comment
name,Name of the entity.,string,,,,,
role,Research Study Party Role,enumeration,,,,sponsor=sponsor;lead_sponsor=lead-sponsor;sponsor_investigator;primary_investigator;collaborator;funding_source;general_contact;recruitment_contact;sub_investigator;study_director;study_chair;irb=Institutional Review Board,
associated_party_practitioner_id,Associated Party (is Practitioner).,string,,,,,Foreign Key: Practitioner.practitioner_id
associated_party_practitioner_role_id,Associated Party (is Practitioner Role),string,,,,,Foreign Key: PractitionerRole.practitioner_role_id
associated_party_institution_id,Associated Party (is Institution),string,,,,,Foreign Key: Institution.institution_id
id,Unique Identifier for a table entry. This is probably not the Global ID,string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
AssociatedParty_id,Autocreated FK slot,string,,,,,Foreign Key: AssociatedParty.id
classifier,Research Study Party Organization Type (what type of institution is party),enumeration,,,,nih=NIH;fda=FDA;academic=Academic;government=Government;nonprofit=Nonprofit;industry=Industry,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
AssociatedParty_id,Autocreated FK slot,string,,,,,Foreign Key: AssociatedParty.id
period_id_period_id,Reference to a time period which defines a Start and End datatime period.,string,,,,,Foreign Key: Period.period_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
variable_name,description,data_type,min,max,units,enumerations,comment
family_global_id,Family Global ID,string,,,,,
family_id,External ID common to all family members,string,,,,,
family_type,"Describes the 'type' of study family, eg, trio.",enumeration,,,,control_only=Control Only;duo=Duo;trio=Trio;trioplus=Trio+;proband_only=Proband Only;other=Other,
description,More details associated with the given resource,string,,,,,
consanguinity,Is there known or suspected consanguinity in this study family?,enumeration,,,,not_suspected=Not suspected;suspected=Suspected;known_present=Known present;unknown=Unknown,
family_focus,"What is this study family investigating? EG, a specific condition",string,,,,,
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
variable_name,description,data_type,min,max,units,enumerations,comment
family_relationship_id,Family Relationship Global ID,string,,,,,
patient_id,The child from the parent-child relationship,string,,,,,Foreign Key: Participant.participant_id
relative_id,The parent from the parent-child relationship,string,,,,,Foreign Key: Participant.participant_id
relationship,The role the relative (parent) fills with respect to the patient (child) for this relationship.,enumeration,,,,"mother=The relative is the biological mother of the patient.;father=The relative is the biological father of the patient.;monozygotic_twin=The relative and patient are monozygotic twins;polyzygotic_twin=The relative and patient are polyzygotic twins;twin=The relative and patient are twins, but no further clarification is available (always use the more specific form when possible);full_sibling=The relative and child both share the same biological mother and father;half_sibling=The relative and child only share one biological parent;sibling=The relative share at least one biological parent, but there isn't enough information to confirm more then that.",
knowledge_source,The source for the reltionship term,enumeration,,,,"traditional=The knowledge comes from traditional sources like a form filled out by a patient or information copied from an external traditional source like government records.;inferred=The knowledge is inferred from indirect evidence. For example, the existence of one patient's mother can be inferred from the existence of the patient.",
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
Family_family_global_id,Autocreated FK slot,string,,,,,Foreign Key: Family.family_global_id
external_id,"Other identifiers for this entity, eg, from the submitting study or in systems link dbGaP",string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
variable_name,description,data_type,min,max,units,enumerations,comment
file_location_id,Location details (this is not a global ID),string,,,,,
location_uri,The URI at which this data can be accessed,string,,,,,
file_name,The file's name (no path),string,,,,,
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
variable_name,description,data_type,min,max,units,enumerations,comment
file_meta_data_id,Representation of file metadata for NCPI,string,,,,,
meta_data_type,Clarify which type of meta data this file has recorded,enumeration,,,,bam_cram=Bam or Cram file;fastq=FASTQ File;maf=MAF (Somatic Mutation);proteomics=Proteomics file;vcf=GC or gVCF file,
assay_strategy,"e.g., Whole Genome Sequencing",string,,,,,
platform_instrument,"e.g., Illumina HiSeq2000",string,,,,,
library_prep,"e.g., polyA",string,,,,,
library_selection,...,string,,,,,
strandedness,"stranded, unstranded",string,,,,,
target_region,Target region,string,,,,,
is_paired_end,"True, False",string,,,,,
adaptor_trimmed,"True, False",string,,,,,
reference_genome,"GRCh37, GRCh38",string,,,,,
workflow_type,"e.g., alignment, somatic",string,,,,,
workflow_tool,"e.g., BAM-MEM, GATK-Haplotype Caller",string,,,,,
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
FileMetaData_file_meta_data_id,Autocreated FK slot,string,,,,,Foreign Key: FileMetaData.file_meta_data_id
samples_sample_id,"e.g., Reference(sample)",string,,,,,Foreign Key: Sample.sample_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
HasExternalId_id,Autocreated FK slot,integer,,,,,Foreign Key: HasExternalId.id
external_id,"Other identifiers for this entity, eg, from the submitting study or in systems link dbGaP",string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
variable_name,description,data_type,min,max,units,enumerations,comment
institution_id,The institution this record is associated with.,string,,,,,
name,Name of the entity.,string,,,,,
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
Institution_institution_id,Autocreated FK slot,string,,,,,Foreign Key: Institution.institution_id
external_id,"Other identifiers for this entity, eg, from the submitting study or in systems link dbGaP",string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
variable_name,description,data_type,min,max,units,enumerations,comment
participant_id,The Global ID for the Participant,string,,,,,Foreign Key: Participant.participant_id
file_format,The file format used ([EDAM](http://edamontology.org) where possible),string,,,,,
file_size,"The size of the file, e.g., in bytes.",number,,,,,
file_size_unit,Units associated with the file_size value (ucum),string,,,,,
content_version,Version of the file content,string,,,,,
file_type,"The type of data contained in this file. Should be as detailed as possible, e.g., Whole Exome Variant Calls.",string,,,,,
file_hash,Value of hashing the file,string,,,,,
file_hash_type,"Algorithm used to calculate the hash (and size, where applicable)",string,,,,,
description,More details associated with the given resource,string,,,,,
file_global_id,File Global ID,string,,,,,
access_policy_id,Access Policy Global ID,string,,,,,Foreign Key: AccessPolicy.access_policy_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
NCPIFile_file_global_id,Autocreated FK slot,string,,,,,Foreign Key: NCPIFile.file_global_id
external_id,"Other identifiers for this entity, eg, from the submitting study or in systems link dbGaP",string,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
NCPIFile_file_global_id,Autocreated FK slot,string,,,,,Foreign Key: NCPIFile.file_global_id
file_location_id_file_location_id,Location details (this is not a global ID),string,,,,,Foreign Key: FileLocation.file_location_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variable_name,description,data_type,min,max,units,enumerations,comment
NCPIFile_file_global_id,Autocreated FK slot,string,,,,,Foreign Key: NCPIFile.file_global_id
file_meta_data_id_file_meta_data_id,Representation of file metadata for NCPI,string,,,,,Foreign Key: FileMetaData.file_meta_data_id
Loading