Source code for tools.parsers.CasavaTools

import gzip
import pyGeno.tools.UsefulFunctions as uf

[docs]class SNPsTxtEntry(object) : """A single entry in the casavas snps.txt file""" def __init__(self, lineNumber, snpsTxtFile) : self.snpsTxtFile = snpsTxtFile self.lineNumber = lineNumber self.values = {} sl = str(snpsTxtFile.data[lineNumber]).replace('\t\t', '\t').split('\t') self.values['chromosomeNumber'] = sl[0].upper().replace('CHR', '') #first column: chro, second first of range (identical to second column) self.values['start'] = int(sl[2]) self.values['end'] = int(sl[2])+1 self.values['bcalls_used'] = sl[3] self.values['bcalls_filt'] = sl[4] self.values['ref'] = sl[5] self.values['QSNP'] = int(sl[6]) self.values['alleles'] = uf.encodePolymorphicNucleotide(sl[7]) #max_gt self.values['Qmax_gt'] = int(sl[8]) self.values['max_gt_poly_site'] = sl[9] self.values['Qmax_gt_poly_site'] = int(sl[10]) self.values['A_used'] = int(sl[11]) self.values['C_used'] = int(sl[12]) self.values['G_used'] = int(sl[13]) self.values['T_used'] = int(sl[14]) def __getitem__(self, fieldName): """Returns the value of field 'fieldName'""" return self.values[fieldName] def __setitem__(self, fieldName, value) : """Sets the value of field 'fieldName' to 'value' """ self.values[fieldName] = value def __str__(self): return str(self.values)
[docs]class SNPsTxtFile(object) : """ Represents a whole casava's snps.txt file:: f = SNPsTxtFile('snps.txt') for line in f : print(line['ref']) """ def __init__(self, fil, gziped = False) : self.reset() if not gziped : f = open(fil) else : f = gzip.open(fil, 'rt') for l in f : if l[0] != '#' : self.data.append(l) f.close()
[docs] def reset(self) : """Frees the file""" self.data = [] self.currentPos = 0
def __iter__(self) : self.currentPos = 0 return self def __next__(self) : if self.currentPos >= len(self) : raise StopIteration() v = self[self.currentPos] self.currentPos += 1 return v def __getitem__(self, i) : if self.data[i].__class__ is not SNPsTxtEntry : self.data[i] = SNPsTxtEntry(i, self) return self.data[i] def __len__(self) : return len(self.data)