import time, warnings
from . import configuration as conf
from rabaDB.rabaSetup import *
from rabaDB.Raba import *
from rabaDB.filters import RabaQuery
def nosave() :
raise ValueError('You can only save object that are linked to reference genomes')
[docs]class pyGenoRabaObject(Raba) :
"""pyGeno uses rabaDB to persistenly store data. Most persistent
objects have classes that inherit from this one (Genome_Raba,
Chromosome_Raba, Gene_Raba, Protein_Raba, Exon_Raba). Theses classes
are not mean to be accessed directly. Users manipulate wrappers
such as : Genome, Chromosome etc... pyGenoRabaObject extends
the Raba class by adding a function _curate that is called just
before saving. This class is to be considered abstract, and is not
meant to be instanciated"""
_raba_namespace = conf.pyGeno_RABA_NAMESPACE
_raba_abstract = True # not saved in db by default
def __init__(self) :
if self is pyGenoRabaObject :
raise TypeError("This class is abstract")
def _curate(self) :
"Last operations performed before saving, must be implemented in child"
raise TypeError("This method is abstract and should be implemented in child")
[docs] def save(self) :
"""Calls _curate() before performing a normal rabaDB lazy save
(saving only occurs if the object has been modified)"""
if self.mutated() :
self._curate()
Raba.save(self)
[docs]class RLWrapper(object) :
"""A wrapper for rabalists that replaces raba objects by pyGeno Object"""
def __init__(self, rabaObj, listObjectType, rl) :
self.rabaObj = rabaObj
self.rl = rl
self.listObjectType = listObjectType
def __getitem__(self, i) :
return self.listObjectType(wrapped_object_and_bag = (self.rl[i], self.rabaObj.bagKey))
def __getattr__(self, name) :
rl = object.__getattribute__(self, 'rl')
return getattr(rl, name)
[docs]class pyGenoRabaObjectWrapper(object, metaclass=pyGenoRabaObjectWrapper_metaclass) :
"""All the wrapper classes such as Genome and Chromosome inherit
from this class. It has most that make pyGeno useful, such as
get(), count(), ensureIndex(). This class is to be considered
abstract, and is not meant to be instanciated"""
_wrapped_class = None
_bags = {}
def __init__(self, wrapped_object_and_bag = (), *args, **kwargs) :
if self is pyGenoRabaObjectWrapper :
raise TypeError("This class is abstract")
if wrapped_object_and_bag != () :
assert wrapped_object_and_bag[0]._rabaClass is self._wrapped_class
self.wrapped_object = wrapped_object_and_bag[0]
self.bagKey = wrapped_object_and_bag[1]
pyGenoRabaObjectWrapper._bags[self.bagKey][self._getObjBagKey(self.wrapped_object)] = self
else :
self.wrapped_object = self._wrapped_class(*args, **kwargs)
self.bagKey = time.time()
pyGenoRabaObjectWrapper._bags[self.bagKey] = {}
pyGenoRabaObjectWrapper._bags[self.bagKey][self._getObjBagKey(self.wrapped_object)] = self
self._load_sequencesTriggers = set()
self.loadSequences = True
self.loadData = True
self.loadBinarySequences = True
def _getObjBagKey(self, obj) :
"""pyGeno objects are kept in bags to ensure that reference
objects are loaded only once. This function returns the bag key
of the current object"""
return (obj._rabaClass.__name__, obj.raba_id)
def _makeLoadQuery(self, objectType, *args, **coolArgs) :
# conf.db.enableDebug(True)
f = RabaQuery(objectType._wrapped_class, namespace = self._wrapped_class._raba_namespace)
coolArgs[self._wrapped_class.__name__[:-5]] = self.wrapped_object #[:-5] removes _Raba from class name
if len(args) > 0 and type(args[0]) is list :
for a in args[0] :
if type(a) is dict :
f.addFilter(**a)
else :
f.addFilter(*args, **coolArgs)
return f
[docs] def count(self, objectType, *args, **coolArgs) :
"""Returns the number of elements satisfying the query"""
return self._makeLoadQuery(objectType, *args, **coolArgs).count()
[docs] def get(self, objectType, *args, gen=False, **coolArgs) :
"""Raba Magic inside. This is th function that you use for
querying pyGeno's DB.
Usage examples:
* myGenome.get("Gene", name = 'TPST2')
* myGene.get(Protein, id = 'ENSID...')
* myGenome.get(Transcript, {'start >' : x, 'end <' : y})"""
warnings.warn("At some point in the future 'get' will be returning a generator by default (gen=True).\n" +\
"If you want to use the output in a list format, you can just wrap your call with list().\n" +\
"These changes are in conformity with python3 as opposed to the now-deprecated python2.", DeprecationWarning)
if gen:
return self.iterGet(objectType, *args, **coolArgs)
else:
ret = []
for e in self._makeLoadQuery(objectType, *args, **coolArgs).run(gen=True):
if issubclass(objectType, pyGenoRabaObjectWrapper) :
ret.append(objectType(wrapped_object_and_bag = (e, self.bagKey)))
else :
ret.append(e)
return ret
[docs] def iterGet(self, objectType, *args, **coolArgs) :
"""Same as get. But retuns the elements one by one, much more efficient for large outputs"""
warnings.warn("At some point in the future 'iterGet' will stop existing and will get replaced by 'get'.\n" +\
"If you have functions still using the iter method, you should start changing them now.\n" +\
"These changes are in conformity with python3 as opposed to the now-deprecated python2.", DeprecationWarning)
for e in self._makeLoadQuery(objectType, *args, **coolArgs).run(gen=True):
if issubclass(objectType, pyGenoRabaObjectWrapper) :
yield objectType(wrapped_object_and_bag = (e, self.bagKey))
else:
yield e
#~ def ensureIndex(self, fields) :
#~ """
#~ Warning: May not work on some systems, see ensureGlobalIndex
#~
#~ Creates a partial index on self (if it does not exist).
#~ Ex: myTranscript.ensureIndex('name')"""
#~
#~ where, whereValues = '%s=?' %(self._wrapped_class.__name__[:-5]), self.wrapped_object
#~ self._wrapped_class.ensureIndex(fields, where, (whereValues,))
#~ def dropIndex(self, fields) :
#~ """Warning: May not work on some systems, see dropGlobalIndex
#~
#~ Drops a partial index on self. Ex: myTranscript.dropIndex('name')"""
#~ where, whereValues = '%s=?' %(self._wrapped_class.__name__[:-5]), self.wrapped_object
#~ self._wrapped_class.dropIndex(fields, where, (whereValues,))
def __getattr__(self, name) :
"""If a wrapper does not have a specific field, pyGeno will
look for it in the wrapped_object"""
# print "pyGenoObjectBases __getattr__ : " + name + " from " + str(type(self))
if name == 'save' or name == 'delete' :
raise AttributeError("You can't delete or save an object from wrapper, try .wrapped_object.delete()/save()")
if name in self._load_sequencesTriggers and self.loadSequences :
self.loadSequences = False
self._load_sequences()
return getattr(self, name)
if name in self._load_sequencesTriggers and self.loadData :
self.loadData = False
self._load_data()
return getattr(self, name)
if name[:4] == 'bin_' and self.loadBinarySequences :
self.updateBinarySequence = False
self._load_bin_sequence()
return getattr(self, name)
attr = getattr(self.wrapped_object, name)
if isRabaObject(attr) :
attrKey = self._getObjBagKey(attr)
if attrKey in pyGenoRabaObjectWrapper._bags[self.bagKey] :
retObj = pyGenoRabaObjectWrapper._bags[self.bagKey][attrKey]
else :
wCls = pyGenoRabaObjectWrapper_metaclass._wrappers[attr._rabaClass]
retObj = wCls(wrapped_object_and_bag = (attr, self.bagKey))
return retObj
return attr
[docs] @classmethod
def getIndexes(cls) :
"""Returns a list of indexes attached to the object's class. Ex
Transcript.getIndexes()"""
return cls._wrapped_class.getIndexes()
[docs] @classmethod
def flushIndexes(cls) :
"""Drops all the indexes attached to the object's class. Ex
Transcript.flushIndexes()"""
return cls._wrapped_class.flushIndexes()
[docs] @classmethod
def help(cls) :
"""Returns a list of available field for queries. Ex
Transcript.help()"""
return cls._wrapped_class.help().replace("_Raba", "")
[docs] @classmethod
def ensureGlobalIndex(cls, fields) :
"""Add a GLOBAL index to the db to speedup lookouts. Fields can be a
list of fields for Multi-Column Indices or simply the name of a
single field. A global index is an index on the entire type.
A global index on 'Transcript' on field 'name', will index the names for all the transcripts in the database"""
cls._wrapped_class.ensureIndex(fields)
[docs] @classmethod
def dropGlobalIndex(cls, fields) :
"""Drops an index, the opposite of ensureGlobalIndex()"""
cls._wrapped_class.dropIndex(fields)
[docs] def getSequencesData(self) :
"""This lazy abstract function is only called if the object
sequences need to be loaded"""
raise NotImplementedError("This fct loads non binary sequences and should be implemented in child if needed")
def _load_sequences(self) :
"""This lazy abstract function is only called if the object
sequences need to be loaded"""
self._load_data()
def _load_data(self) :
"""This lazy abstract function is only called if the object
sequences need to be loaded"""
raise NotImplementedError("This fct loads non binary sequences and should be implemented in child if needed")
def _load_bin_sequence(self) :
"""Same as _load_sequences(), but loads binary sequences"""
raise NotImplementedError("This fct loads binary sequences and should be implemented in child if needed")