Skip to content

ipr.inputs

Read/Validate the variant input files

SPECIFICATION

SPECIFICATION = os.path.join(os.path.dirname(__file__), 'content.spec.json')

COPY_REQ

COPY_REQ = ['gene', 'kbCategory']

COPY_KEY

COPY_KEY = ['gene']

COPY_OPTIONAL

COPY_OPTIONAL = [
    'cnvState',
    'copyChange',
    'lohState',  # Loss of Heterzygosity state - informative detail to analyst
    'chromosomeBand',
    'start',
    'end',
    'size',
    'log2Cna',
    'cna',
]

SMALL_MUT_REQ

SMALL_MUT_REQ = ['gene', 'proteinChange']

SMALL_MUT_KEY

SMALL_MUT_KEY = SMALL_MUT_REQ + [
    'altSeq',
    'chromosome',
    'endPosition',
    'refSeq',
    'startPosition',
    'transcript',

SMALL_MUT_OPTIONAL

SMALL_MUT_OPTIONAL = [
    'altSeq',
    'chromosome',
    'endPosition',
    'germline',
    'hgvsCds',
    'hgvsGenomic',
    'hgvsProtein',
    'ncbiBuild',
    'normalAltCount',
    'normalDepth',
    'normalRefCount',
    'refSeq',
    'rnaAltCount',
    'rnaDepth',
    'rnaRefCount',
    'startPosition',
    'transcript',
    'tumourAltCount',
    'tumourDepth',
    'tumourRefCount',
    'zygosity',
]

EXP_REQ

EXP_REQ = ['gene', 'kbCategory']

EXP_KEY

EXP_KEY = ['gene']

EXP_OPTIONAL

EXP_OPTIONAL = [
    'biopsySiteFoldChange',
    'biopsySitePercentile',
    'biopsySiteQC',
    'biopsySiteZScore',
    'biopsySitekIQR',
    'diseaseFoldChange',
    'diseasekIQR',
    'diseasePercentile',
    'diseaseQC',
    'diseaseZScore',
    'expressionState',
    'histogramImage',
    'primarySiteFoldChange',
    'primarySitekIQR',
    'primarySitePercentile',
    'primarySiteQC',
    'primarySiteZScore',
    'rnaReads',
    'rpkm',
    'tpm',
]

SV_REQ

SV_REQ = [
    'eventType',
    'breakpoint',
    'gene1',  # prev: nterm_hugo
    'gene2',  # prev: cterm_hugo
    'exon1',  # n-terminal
    'exon2',  # c-terminal
]

SV_KEY

SV_KEY = SV_REQ[:]

SV_OPTIONAL

SV_OPTIONAL = [
    'ctermTranscript',
    'ntermTranscript',
    'ctermGene',  # combined hugo ensembl form
    'ntermGene',  # combined hugo ensembl form
    'detectedIn',
    'conventionalName',
    'svg',
    'svgTitle',
    'name',
    'frame',
    'omicSupport',
    'highQuality',
]

DefaultValidatingDraft7Validator

DefaultValidatingDraft7Validator = extend_with_default(jsonschema.Draft7Validator)

validate_variant_rows()

  • check that the required columns are present
  • check that a unique key can be formed for each row
  • drop any non-defined columns
def validate_variant_rows(
    rows: Iterable[Dict], required: List[str], optional: List[str], row_to_key: Callable
) -> List[IprVariant]:

Args

  • rows (Iterable[Dict]): the input files rows
  • required (List[str])
  • optional (List[str]): list of optional column names
  • row_to_key (Callable): function to generate a key for a given row

Returns

  • List[IprVariant]: the rows from the tab file as dictionaries

Raises

  • ValueError: row keys are not unique
  • ValueError: A required column is missing

preprocess_copy_variants()

Validate the input rows contain the minimum required fields and generate any default values where possible

def preprocess_copy_variants(rows: Iterable[Dict]) -> List[IprVariant]:

Args

  • rows (Iterable[Dict])

Returns

preprocess_small_mutations()

Validate the input rows contain the minimum required fields and generate any default values where possible

def preprocess_small_mutations(rows: Iterable[Dict]) -> List[IprGeneVariant]:

Args

  • rows (Iterable[Dict])

Returns

preprocess_expression_variants()

Validate the input rows contain the minimum required fields and generate any default values where possible

def preprocess_expression_variants(rows: Iterable[Dict]) -> List[IprGeneVariant]:

Args

  • rows (Iterable[Dict])

Returns

create_graphkb_sv_notation()

Generate GKB style structural variant notation from a structural variant input row

def create_graphkb_sv_notation(row: IprStructuralVariant) -> str:

Args

Returns

  • str

preprocess_structural_variants()

Validate the input rows contain the minimum required fields and generate any default values where possible

def preprocess_structural_variants(rows: Iterable[Dict]) -> List[IprVariant]:

Args

  • rows (Iterable[Dict])

Returns

Check matching information for any genes with variants. Warn about genes with only one experimental measure.

def check_variant_links(
    small_mutations: List[IprGeneVariant],
    expression_variants: List[IprGeneVariant],
    copy_variants: List[IprGeneVariant],
    structural_variants: List[IprStructuralVariant],
) -> Set[str]:

Args

Returns

  • Set[str]: set of gene names with variants (used for filtering before upload to IPR)

check_comparators()

Given the optional content dictionary, check that based on the analyses present the correct/sufficient comparators have also been specified

def check_comparators(content: Dict, expresssionVariants: Iterable[Dict] = []) -> None:

Args

  • content (Dict)
  • expresssionVariants (Iterable[Dict])

validate_report_content()

Validate a report content input JSON object against the schema specification

Adds defaults as reccommended by: https://python-jsonschema.readthedocs.io/en/latest/faq/#why-doesn-t-my-schema-s-default-property-set-the-default-on-my-instance

def validate_report_content(content: Dict, schema_file: str = SPECIFICATION) -> None:

Args

  • content (Dict)
  • schema_file (str)
Back to top