from . import configuration as conf
from .pyGenoObjectBases import *
import rabaDB.fields as rf
from .tools import UsefulFunctions as uf
from .Exon import *
from .SNP import SNP_INDEL
from .tools.BinarySequence import NucBinarySequence
[docs]class Transcript_Raba(pyGenoRabaObject) :
"""The wrapped Raba object that really holds the data"""
_raba_namespace = conf.pyGeno_RABA_NAMESPACE
id = rf.Primitive()
name = rf.Primitive()
length = rf.Primitive()
start = rf.Primitive()
end = rf.Primitive()
coding = rf.Primitive()
biotype = rf.Primitive()
selenocysteine = rf.RList()
genome = rf.RabaObject('Genome_Raba')
chromosome = rf.RabaObject('Chromosome_Raba')
gene = rf.RabaObject('Gene_Raba')
protein = rf.RabaObject('Protein_Raba')
exons = rf.Relation('Exon_Raba')
def _curate(self) :
if self.name != None :
self.name = self.name.upper()
self.length = abs(self.end - self.start)
have_CDS_start = False
have_CDS_end = False
for exon in self.exons :
if exon.CDS_start is not None :
have_CDS_start = True
if exon.CDS_end is not None :
have_CDS_end = True
if have_CDS_start and have_CDS_end :
self.coding = True
else :
self.coding = False
[docs]class Transcript(pyGenoRabaObjectWrapper) :
"""The wrapper for playing with Transcripts"""
_wrapped_class = Transcript_Raba
def __init__(self, *args, **kwargs) :
pyGenoRabaObjectWrapper.__init__(self, *args, **kwargs)
self.exons = RLWrapper(self, Exon, self.wrapped_object.exons)
self._load_sequencesTriggers = set(["UTR5", "UTR3", "cDNA", "sequence", "data"])
self.exonsDict = {}
def _makeLoadQuery(self, objectType, *args, **coolArgs) :
if issubclass(objectType, SNP_INDEL) :
# conf.db.enableDebug(True)
f = RabaQuery(objectType, namespace = self._wrapped_class._raba_namespace)
coolArgs['species'] = self.genome.species
coolArgs['chromosomeNumber'] = self.chromosome.number
coolArgs['start >='] = self.start
coolArgs['start <'] = self.end
if len(args) > 0 and type(args[0]) is list :
for a in args[0] :
if type(a) is dict :
f.addFilter(**a)
else :
f.addFilter(*args, **coolArgs)
return f
return pyGenoRabaObjectWrapper._makeLoadQuery(self, objectType, *args, **coolArgs)
def _load_data(self) :
def getV(k) :
return pyGenoRabaObjectWrapper.__getattribute__(self, k)
def setV(k, v) :
return pyGenoRabaObjectWrapper.__setattr__(self, k, v)
self.data = []
cDNA = []
UTR5 = []
UTR3 = []
exons = []
prime5 = True
for ee in self.wrapped_object.exons :
e = pyGenoRabaObjectWrapper_metaclass._wrappers[Exon_Raba](wrapped_object_and_bag = (ee, getV('bagKey')))
self.exonsDict[(e.start, e.end)] = e
exons.append(e)
self.data.extend(e.data)
if e.hasCDS() :
UTR5.append(''.join(e.UTR5))
if self.selenocysteine is not None:
for position in self.selenocysteine:
if e.CDS_start <= position <= e.CDS_end:
if e.strand == '+':
ajusted_position = position - e.CDS_start
else:
ajusted_position = e.CDS_end - position - 3
if e.CDS[ajusted_position] == 'T':
e.CDS = list(e.CDS)
e.CDS[ajusted_position] = '!'
if len(cDNA) == 0 and e.frame != 0 :
e.CDS = e.CDS[e.frame:]
if e.strand == '+':
e.CDS_start += e.frame
else:
e.CDS_end -= e.frame
if len(e.CDS):
cDNA.append(''.join(e.CDS))
UTR3.append(''.join(e.UTR3))
prime5 = False
else :
if prime5 :
UTR5.append(''.join(e.data))
else :
UTR3.append(''.join(e.data))
sequence = ''.join(self.data)
cDNA = ''.join(cDNA)
UTR5 = ''.join(UTR5)
UTR3 = ''.join(UTR3)
setV('exons', exons)
setV('sequence', sequence)
setV('cDNA', cDNA)
setV('UTR5', UTR5)
setV('UTR3', UTR3)
if len(cDNA) > 0 and len(cDNA) % 3 != 0 :
setV('flags', {'DUBIOUS' : True, 'cDNA_LEN_NOT_MULT_3': True})
else :
setV('flags', {'DUBIOUS' : False, 'cDNA_LEN_NOT_MULT_3': False})
def _load_bin_sequence(self) :
self.bin_sequence = NucBinarySequence(self.sequence)
self.bin_UTR5 = NucBinarySequence(self.UTR5)
self.bin_cDNA = NucBinarySequence(self.cDNA)
self.bin_UTR3 = NucBinarySequence(self.UTR3)
[docs] def getNucleotideCodon(self, cdnaX1) :
"""Returns the entire codon of the nucleotide at pos cdnaX1 in the cdna, and the position of that nocleotide in the codon"""
return uf.getNucleotideCodon(self.cDNA, cdnaX1)
[docs] def getCodon(self, i) :
"""returns the ith codon"""
return self.getNucleotideCodon(i*3)[0]
[docs] def iterCodons(self) :
"""iterates through the codons"""
for i in range(len(self.cDNA)/3) :
yield self.getCodon(i)
[docs] def find(self, sequence) :
"""return the position of the first occurance of sequence"""
return self.bin_sequence.find(sequence)
[docs] def findAll(self, sequence):
"""Returns a list of all positions where sequence was found"""
return self.bin_sequence.findAll(sequence)
[docs] def findIncDNA(self, sequence) :
"""return the position of the first occurance of sequence"""
return self.bin_cDNA.find(sequence)
[docs] def findAllIncDNA(self, sequence) :
"""Returns a list of all positions where sequence was found in the cDNA"""
return self.bin_cDNA.findAll(sequence)
[docs] def getcDNALength(self) :
"""returns the length of the cDNA"""
return len(self.cDNA)
[docs] def findInUTR5(self, sequence) :
"""return the position of the first occurance of sequence in the 5'UTR"""
return self.bin_UTR5.find(sequence)
[docs] def findAllInUTR5(self, sequence) :
"""Returns a list of all positions where sequence was found in the 5'UTR"""
return self.bin_UTR5.findAll(sequence)
[docs] def getUTR5Length(self) :
"""returns the length of the 5'UTR"""
return len(self.bin_UTR5)
[docs] def findInUTR3(self, sequence) :
"""return the position of the first occurance of sequence in the 3'UTR"""
return self.bin_UTR3.find(sequence)
[docs] def findAllInUTR3(self, sequence) :
"""Returns a lits of all positions where sequence was found in the 3'UTR"""
return self.bin_UTR3.findAll(sequence)
[docs] def getUTR3Length(self) :
"""returns the length of the 3'UTR"""
return len(self.bin_UTR3)
[docs] def getNbCodons(self) :
"""returns the number of codons in the transcript"""
return len(self.cDNA)/3
def __getattribute__(self, name) :
return pyGenoRabaObjectWrapper.__getattribute__(self, name)
def __getitem__(self, i) :
return self.sequence[i]
def __len__(self) :
return len(self.sequence)
def __str__(self) :
return """Transcript, id: %s, name: %s > %s""" %(self.id, self.name, str(self.gene))