Source code for tools.SegmentTree

import random, copy

[docs]def aux_insertTree(childTree, parentTree): """This a private (You shouldn't have to call it) recursive function that inserts a child tree into a parent tree.""" if childTree.x1 != None and childTree.x2 != None : parentTree.insert(childTree.x1, childTree.x2, childTree.name, childTree.referedObject) for c in childTree.children: aux_insertTree(c, parentTree)
[docs]def aux_moveTree(offset, tree): """This a private recursive (You shouldn't have to call it) function that translates tree(and it's children) to a given x1""" if tree.x1 != None and tree.x2 != None : tree.x1, tree.x2 = tree.x1+offset, tree.x2+offset for c in tree.children: aux_moveTree(offset, c)
[docs]class SegmentTree : """ Optimised genome annotations. A segment tree is an arborescence of segments. First position is inclusive, second exlusive, respectively refered to as x1 and x2. A segment tree has the following properties : * The root has no x1 or x2 (both set to None). * Segment are arrangend in an ascending order * For two segment S1 and S2 : [S2.x1, S2.x2[ C [S1.x1, S1.x2[ <=> S2 is a child of S1 Here's an example of a tree : * Root : 0-15 * ---->Segment : 0-12 * ------->Segment : 1-6 * ---------->Segment : 2-3 * ---------->Segment : 4-5 * ------->Segment : 7-8 * ------->Segment : 9-10 * ---->Segment : 11-14 * ------->Segment : 12-14 * ---->Segment : 13-15 Each segment can have a 'name' and a 'referedObject'. ReferedObject are objects are stored within the graph for future usage. These objects are always stored in lists. If referedObject is already a list it will be stored as is. """ def __init__(self, x1 = None, x2 = None, name = '', referedObject = [], father = None, level = 0) : if x1 > x2 : self.x1, self.x2 = x2, x1 else : self.x1, self.x2 = x1, x2 self.father = father self.level = level self.id = random.randint(0, 10**8) self.name = name self.children = [] self.referedObject = referedObject def __addChild(self, segmentTree, index = -1) : segmentTree.level = self.level + 1 if index < 0 : self.children.append(segmentTree) else : self.children.insert(index, segmentTree)
[docs] def insert(self, x1, x2, name = '', referedObject = []) : """Insert the segment in it's right place and returns it. If there's already a segment S as S.x1 == x1 and S.x2 == x2. S.name will be changed to 'S.name U name' and the referedObject will be appended to the already existing list""" if x1 > x2 : xx1, xx2 = x2, x1 else : xx1, xx2 = x1, x2 rt = None insertId = None childrenToRemove = [] for i in range(len(self.children)) : if self.children[i].x1 == xx1 and xx2 == self.children[i].x2 : self.children[i].name = self.children[i].name + ' U ' + name self.children[i].referedObject.append(referedObject) return self.children[i] if self.children[i].x1 <= xx1 and xx2 <= self.children[i].x2 : return self.children[i].insert(x1, x2, name, referedObject) elif xx1 <= self.children[i].x1 and self.children[i].x2 <= xx2 : if rt == None : if type(referedObject) is list : rt = SegmentTree(xx1, xx2, name, referedObject, self, self.level+1) else : rt = SegmentTree(xx1, xx2, name, [referedObject], self, self.level+1) insertId = i rt.__addChild(self.children[i]) self.children[i].father = rt childrenToRemove.append(self.children[i]) elif xx1 <= self.children[i].x1 and xx2 <= self.children[i].x2 : insertId = i break if rt != None : self.__addChild(rt, insertId) for c in childrenToRemove : self.children.remove(c) else : if type(referedObject) is list : rt = SegmentTree(xx1, xx2, name, referedObject, self, self.level+1) else : rt = SegmentTree(xx1, xx2, name, [referedObject], self, self.level+1) if insertId != None : self.__addChild(rt, insertId) else : self.__addChild(rt) return rt
[docs] def insertTree(self, childTree): """inserts childTree in the right position (regions will be rearanged to fit the organisation of self)""" aux_insertTree(childTree, self)
#~ def included_todo(self, x1, x2=None) : #~ "Returns all the segments where [x1, x2] is included""" #~ pass
[docs] def intersect(self, x1, x2 = None) : """Returns a list of all segments intersected by [x1, x2]""" def condition(x1, x2, tree) : #print self.id, tree.x1, tree.x2, x1, x2 if (tree.x1 != None and tree.x2 != None) and (tree.x1 <= x1 and x1 < tree.x2 or tree.x1 <= x2 and x2 < tree.x2) : return True return False if x2 == None : xx1, xx2 = x1, x1 elif x1 > x2 : xx1, xx2 = x2, x1 else : xx1, xx2 = x1, x2 c1 = self.__dichotomicSearch(xx1) c2 = self.__dichotomicSearch(xx2) if c1 == -1 or c2 == -1 : return [] if xx1 < self.children[c1].x1 : c1 -= 1 inter = self.__radiateDown(x1, x2, c1, condition) if self.children[c1].id == self.children[c2].id : inter.extend(self.__radiateUp(x1, x2, c2+1, condition)) else : inter.extend(self.__radiateUp(x1, x2, c2, condition)) ret = [] for c in inter : ret.extend(c.intersect(x1, x2)) inter.extend(ret) return inter
def __dichotomicSearch(self, x1) : r1 = 0 r2 = len(self.children)-1 pos = -1 while (r1 <= r2) : pos = (r1+r2)/2 val = self.children[pos].x1 if val == x1 : return pos elif x1 < val : r2 = pos -1 else : r1 = pos +1 return pos def __radiateDown(self, x1, x2, childId, condition) : "Radiates down: walks self.children downward until condition is no longer verifed or there's no childrens left " ret = [] i = childId while 0 <= i : if condition(x1, x2, self.children[i]) : ret.append(self.children[i]) else : break i -= 1 return ret def __radiateUp(self, x1, x2, childId, condition) : "Radiates uo: walks self.children upward until condition is no longer verifed or there's no childrens left " ret = [] i = childId while i < len(self.children): if condition(x1, x2, self.children[i]) : ret.append(self.children[i]) else : break i += 1 return ret
[docs] def emptyChildren(self) : """Kills of children""" self.children = []
[docs] def removeGaps(self) : """Remove all gaps between regions""" for i in range(1, len(self.children)) : if self.children[i].x1 > self.children[i-1].x2: aux_moveTree(self.children[i-1].x2-self.children[i].x1, self.children[i])
[docs] def getX1(self) : """Returns the starting position of the tree""" if self.x1 != None : return self.x1 return self.children[0].x1
[docs] def getX2(self) : """Returns the ending position of the tree""" if self.x2 != None : return self.x2 return self.children[-1].x2
[docs] def getIndexedLength(self) : """Returns the total length of indexed regions""" if self.x1 != None and self.x2 != None: return self.x2 - self.x1 else : if len(self.children) == 0 : return 0 else : l = self.children[0].x2 - self.children[0].x1 for i in range(1, len(self.children)) : l += self.children[i].x2 - self.children[i].x1 - max(0, self.children[i-1].x2 - self.children[i].x1) return l
[docs] def getFirstLevel(self) : """returns a list of couples (x1, x2) of all the first level indexed regions""" res = [] if len(self.children) > 0 : for c in self.children: res.append((c.x1, c.x2)) else : if self.x1 != None : res = [(self.x1, self.x2)] else : res = None return res
[docs] def flatten(self) : """Flattens the tree. The tree become a tree of depth 1 where overlapping regions have been merged together""" if len(self.children) > 1 : children = self.children self.emptyChildren() children[0].emptyChildren() x1 = children[0].x1 x2 = children[0].x2 refObjs = [children[0].referedObject] name = children[0].name for i in range(1, len(children)) : children[i].emptyChildren() if children[i-1] >= children[i] : x2 = children[i].x2 refObjs.append(children[i].referedObject) name += " U " + children[i].name else : if len(refObjs) == 1 : refObjs = refObjs[0] self.insert(x1, x2, name, refObjs) x1 = children[i].x1 x2 = children[i].x2 refObjs = [children[i].referedObject] name = children[i].name if len(refObjs) == 1 : refObjs = refObjs[0] self.insert(x1, x2, name, refObjs)
[docs] def move(self, newX1) : """Moves tree to a new starting position, updates x1s of children""" if self.x1 != None and self.x2 != None : offset = newX1-self.x1 aux_moveTree(offset, self) elif len(self.children) > 0 : offset = newX1-self.children[0].x1 aux_moveTree(offset, self)
def __str__(self) : strRes = self.__str() offset = '' for i in range(self.level+1) : offset += '\t' for c in self.children : strRes += '\n'+offset+'-->'+str(c) return strRes def __str(self) : if self.x1 == None : if len(self.children) > 0 : return "Root: %d-%d, name: %s, id: %d, obj: %s" %(self.children[0].x1, self.children[-1].x2, self.name, self.id, repr(self.referedObject)) else : return "Root: EMPTY , name: %s, id: %d, obj: %s" %(self.name, self.id, repr(self.referedObject)) else : return "Segment: %d-%d, name: %s, id: %d, father id: %d, obj: %s" %(self.x1, self.x2, self.name, self.id, self.father.id, repr(self.referedObject)) def __len__(self) : "returns the size of the complete indexed region" if self.x1 != None and self.x2 != None : return self.x2-self.x1 else : return self.children[-1].x2 - self.children[0].x1 def __repr__(self): return 'Segment Tree, id:%s, father id:%s, (x1, x2): (%s, %s)' %(self.id, self.father.id, self.x1, self.x2)
if __name__== "__main__" : s = SegmentTree() s.insert(5, 10, 'region 1') s.insert(8, 12, 'region 2') s.insert(5, 8, 'region 3') s.insert(34, 40, 'region 4') s.insert(35, 38, 'region 5') s.insert(36, 37, 'region 6', 'aaa') s.insert(36, 37, 'region 6', 'aaa2') print("Tree:") print(s) print("indexed length", s.getIndexedLength()) print("removing gaps and adding region 7 : [13-37[") s.removeGaps() #s.insert(13, 37, 'region 7') print(s) print("indexed length", s.getIndexedLength()) #print "intersections" #for c in [6, 10, 14, 1000] : # print c, s.intersect(c) print("Move") s.move(0) print(s) print("indexed length", s.getIndexedLength())