Skip to content

graphkb.match

Functions which return Variants from GraphKB which match some input variant definition

INPUT_COPY_CATEGORIES

INPUT_COPY_CATEGORIES = IterableNamespace(
    AMP='amplification',
    ANY_GAIN='copy gain',
    ANY_LOSS='copy loss',
    DEEP='deep deletion',
    GAIN='low level copy gain',
    LOSS='shallow deletion',
)

INPUT_EXPRESSION_CATEGORIES

INPUT_EXPRESSION_CATEGORIES = IterableNamespace(
    UP='increased expression', DOWN='reduced expression'
)

AMBIGUOUS_AA

AMBIGUOUS_AA = ['x', '?', 'X']

VARIANT_RETURN_PROPERTIES

VARIANT_RETURN_PROPERTIES = (
    BASE_RETURN_PROPERTIES
    + [f'type.{p}' for p in GENERIC_RETURN_PROPERTIES]
    + [f'reference1.{p}' for p in GENE_RETURN_PROPERTIES]
    + [f'reference2.{p}' for p in GENE_RETURN_PROPERTIES]
    + ['zygosity', 'germline', 'displayName']

POS_VARIANT_RETURN_PROPERTIES

POS_VARIANT_RETURN_PROPERTIES = VARIANT_RETURN_PROPERTIES + [
    'break1Start',
    'break1End',
    'break2Start',
    'break2End',
    'break1Repr',
    'break2Repr',
    'refSeq',
    'untemplatedSeq',
    'untemplatedSeqSize',
    'truncation',
    'assembly',

FEATURES_CACHE

FEATURES_CACHE: Set[str] = set()

get_equivalent_features()

Match an equivalent list of features given some input feature name (or ID)

def get_equivalent_features(
    conn: GraphKBConnection,
    gene_name: str,
    ignore_cache: bool = False,
    is_source_id: bool = False,
    source: str = '',
    source_id_version: str = '',
) -> List[Ontology]:

Args

  • conn (GraphKBConnection)
  • gene_name (str): the gene name to search features by
  • ignore_cache (bool): bypass the cache to always force a new request
  • is_source_id (bool): treat the gene_name as the gene ID from the source database (ex. ENSG001)
  • source (str): the name of the source database the gene definition is from (ex. ensembl)
  • source_id_version (str): the version of the source_id

Returns

  • List[Ontology]: equivalent feature records

Examples

get_equivalent_features(conn, 'KRAS')
get_equivalent_features(conn, 'ENSG001', source='ensembl', is_source_id=True)
get_equivalent_features(conn, 'ENSG001', source='ensembl', source_id_version='1')
get_equivalent_features(conn, '#3:44')

cache_missing_features()

Create a cache of features that exist to avoid repeatedly querying for missing features

def cache_missing_features(conn: GraphKBConnection) -> None:

Args

match_category_variant()

Returns a list of variants matching the input variant

def match_category_variant(
    conn: GraphKBConnection,
    gene_name: str,
    category: str,
    root_exclude_term: str = '',
    gene_source: str = '',
    gene_is_source_id: bool = False,
    ignore_cache: bool = False,
) -> List[Variant]:

Args

  • conn (GraphKBConnection): the graphkb connection object
  • gene_name (str): the name of the gene the variant is in reference to
  • category (str): the variant category (ex. copy loss)
  • root_exclude_term (str)
  • gene_source (str): The source database the gene is defined by (ex. ensembl)
  • gene_is_source_id (bool): Indicates the gene name(s) input should be treated as sourceIds not names
  • ignore_cache (bool)

Returns

  • List[Variant]: List of variant records from GraphKB which match the input

Raises

match_copy_variant()

Returns a list of variants matching the input variant

def match_copy_variant(
    conn: GraphKBConnection, gene_name: str, category: str, drop_homozygous: bool = False, **kwargs
) -> List[Variant]:

Args

  • conn (GraphKBConnection): the graphkb connection object
  • gene_name (str): the name of the gene the variant is in reference to
  • category (str): the variant category (ex. copy loss)
  • drop_homozygous (bool): Drop homozygous matches from the result when true

Returns

  • List[Variant]: List of variant records from GraphKB which match the input

Raises

  • ValueError: The input copy category is not recognized

positions_overlap()

Check if 2 Position records from GraphKB indicate an overlap

def positions_overlap(
    pos_record: BasicPosition, range_start: BasicPosition, range_end: Optional[BasicPosition] = None
) -> bool:

Args

  • pos_record (BasicPosition): the record to compare
  • range_start (BasicPosition): the position record indicating the start of an uncertainty range
  • range_end (Optional[BasicPosition]): the position record indicating the end of an uncertainty range

Returns

  • bool: True if the positions overlap

Raises

  • NotImplementedError: if a cytoband type position is given

Note

null values indicate not-specified or any

compare_positional_variants()

Compare 2 variant records from GraphKB to determine if they are equivalent

def compare_positional_variants(
    variant: Union[PositionalVariant, ParsedVariant],
    reference_variant: Union[PositionalVariant, ParsedVariant],
) -> bool:

Args

Returns

  • bool: True if the records are equivalent

match_positional_variant()

Given the HGVS+ representation of some positional variant, parse it and match it to annotations in GraphKB

def match_positional_variant(
    conn: GraphKBConnection,
    variant_string: str,
    reference1: Optional[str] = None,
    reference2: Optional[str] = None,
    gene_is_source_id: bool = False,
    gene_source: str = '',
    ignore_cache: bool = False,
) -> List[Variant]:

Args

  • conn (GraphKBConnection)
  • variant_string (str): the HGVS+ annotation string
  • reference1 (Optional[str]): Explicitly specify the first reference link record (gene1)
  • reference2 (Optional[str]): Explicitly specify the second reference link record (gene2)
  • gene_is_source_id (bool): Indicates the gene name(s) input should be treated as sourceIds not names
  • gene_source (str): The source database the gene is defined by (ex. ensembl)
  • ignore_cache (bool)

Returns

  • List[Variant]: A list of matched statement records

Raises

  • NotImplementedError: thrown for uncertain position input (ranges)
  • FeatureNotFoundError: One of the genes does not exist in GraphKB
  • ValueError: the gene names were given both in the variant_string and explicitly

Examples

match_positional_variant(conn, '(EWSR1,FLI1):fusion(e.1,e.2)')
match_positional_variant(conn, 'fusion(e.1,e.2)', 'EWSR1', 'FLI1')
match_positional_variant(conn, 'fusion(e.1,e.2)', '#3:4', '#4:5')
match_positional_variant(conn, 'fusion(e.1,e.2)', '123', '456', gene_is_source_id=True, gene_source='entrez gene')
match_positional_variant(conn, 'KRAS:p.G12D')
match_positional_variant(conn, 'p.G12D', 'KRAS')
Back to top