Source code for ce_detector.annotator

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""class for annotating junction reads
"""
from __future__ import annotations

from collections import defaultdict
from typing import Any

import gffutils
import numpy as np

from .utils import get_yaml
from .utils import timethis

# HardCode the information of chromosome because its name of two ref are not identical
CHROMS = get_yaml()["chr2hg38"]


[docs]class Annotator: """annotate junction reads :param junctionmap: instance return :class:`ce_detector.detector.JunctionMap` :type junctionmap: instance :param database: database of annotation files :type database: Any :param output: filename of annotated junction reads. Defaults to None :type output: TestIo """ def __init__(self, database: Any, output=None): """Constructor of Annotator""" self.database = gffutils.FeatureDB(database) self.output = output
[docs] @staticmethod def detect_property(start, end, junction_list): """detect type of slice, number of skipped donors and number of skipped acceptors type of slice including D A DA N NDA :param start: start of junction read :type start: int :param end: end of junction read :type end: int :param junction_list: gene list of junction reads :type junction_list: numpy.array :return: type of slice, number of skipped donors, number of skipped acceptors """ known_donors = junction_list[:, 0] known_acceptors = junction_list[:, 1] donors_skipped = ((start + 1 < known_donors) & (known_donors < end)).sum() acceptors_skipped = ( (start + 1 < known_acceptors) & (known_acceptors < end) ).sum() if [start + 1, end] in junction_list.tolist(): reads_type = "DA" elif (start + 1 in known_donors) and (end in known_acceptors): reads_type = "NDA" elif (start + 1 in known_donors) and (end not in known_acceptors): reads_type = "D" elif (start + 1 not in junction_list[:, 0]) and (end in junction_list[:, 1]): reads_type = "A" else: reads_type = "N" return reads_type, donors_skipped, acceptors_skipped
[docs] def annotate_junction(self, read, result, db): """annotate junction reads and write results to file :param read: junction read return :class:`ce_detector.detector.Read` :type read: instance :param result: gene list used for annotation :type result: defaultdict[Any, Any] :param db: database of annotation file :type db: instance of file """ chrom = read.chrom start, end = read.start, read.end region = f"{CHROMS[chrom]}:{start}-{end}" # change chromosome gene_list = [] for gene in db.features_of_type(("gene"), limit=region): gene_list.append(gene.id) # new gene if gene.id not in result: # transcript for transcript in db.children( gene, level=1, featuretype=("primary_transcript", "transcript", "mRNA"), ): # mRNA may drop out # find all position of introns for every gene known junctions for junction in db.interfeatures( db.children( transcript, level=1, featuretype="exon", order_by="start", ), new_featuretype="intron", ): result[gene.id].append([junction.start, junction.end]) if ( len(result[gene.id]) >= 1 ): # drop gene with only one exon in terms of number of intron result[gene.id] = np.unique(result[gene.id], axis=0) else: result.pop(gene.id) gene_list.pop() # annotate junctions reads for gene in gene_list: junction_list = result[gene] reads_type, donors_skipped, acceptors_skipped = self.detect_property( start, end, junction_list, ) read.information.append( [reads_type, donors_skipped, acceptors_skipped, gene], ) if self.output: self.output.write( f"{read}\t{reads_type}\t{donors_skipped}\t{acceptors_skipped}\t{gene}\n", )
[docs] @timethis(name="Junction Annotator", message=" ") def run(self, junctionmap, logger, verbose=False): """main function used to annotate junction reads pick all genes covered by one junction read and annotate all of them: type of slice, number of skipped donors and number of skipped acceptors """ result = defaultdict(list) # logger.info(f'Begin to annotate junctions!') for _index, read in enumerate(junctionmap): self.annotate_junction(read, result, self.database) if verbose and _index % 1000 == 0: logger.info(f"Chrom {junctionmap.chrom} {_index} Reads") return junctionmap