Source code for bootstrap

import pyGeno.importation.Genomes as PG
import pyGeno.importation.SNPs as PS
from pyGeno.tools.io import printf
import os, tempfile, json
import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
import pyGeno.configuration as conf

this_dir, this_filename = os.path.split(__file__)



[docs]def listRemoteDatawraps(location = conf.pyGeno_REMOTE_LOCATION) : """Lists all the datawraps availabe from a remote a remote location.""" print(location) loc = location + "/datawraps.json" response = urllib.request.urlopen(loc) js = json.loads(response.read()) return js
[docs]def printRemoteDatawraps(location = conf.pyGeno_REMOTE_LOCATION) : """ print all available datawraps from a remote location the location must have a datawraps.json in the following format:: { "Ordered": { "Reference genomes": { "Human" : ["GRCh37.75", "GRCh38.78"], "Mouse" : ["GRCm38.78"] }, "SNPs":{ } }, "Flat":{ "Reference genomes": { "GRCh37.75": "Human.GRCh37.75.tar.gz", "GRCh38.78": "Human.GRCh37.75.tar.gz", "GRCm38.78": "Mouse.GRCm38.78.tar.gz" }, "SNPs":{ } } """ l = listRemoteDatawraps(location) printf("Available datawraps for bootstraping\n") print(json.dumps(l["Ordered"], sort_keys=True, indent=4, separators=(',', ': ')))
def _DW(name, url) : packageDir = tempfile.mkdtemp(prefix = "pyGeno_remote_") printf("~~~:>\n\tDownloading datawrap: %s..." % name) finalFile = os.path.normpath('%s/%s' %(packageDir, name)) urllib.request.urlretrieve (url, finalFile) printf('\tdone.\n~~~:>') return finalFile
[docs]def importRemoteGenome(name, batchSize = 100) : """Import a genome available from http://pygeno.iric.ca (might work).""" try : dw = listRemoteDatawraps()["Flat"]["Reference genomes"][name] except AttributeError : raise AttributeError("There's no remote genome datawrap by the name of: '%s'" % name) finalFile = _DW(name, dw["url"]) PG.importGenome(finalFile, batchSize)
[docs]def importRemoteSNPs(name) : """Import a SNP set available from http://pygeno.iric.ca (might work).""" try : dw = listRemoteDatawraps()["Flat"]["SNPs"] except AttributeError : raise AttributeError("There's no remote genome datawrap by the name of: '%s'" % name) finalFile = _DW(name, dw["url"]) PS.importSNPs(finalFile)
[docs]def listDatawraps() : """Lists all the datawraps pyGeno comes with""" l = {"Genomes" : [], "SNPs" : []} for f in os.listdir(os.path.join(this_dir, "bootstrap_data/genomes")) : if f.find(".tar.gz") > -1 : l["Genomes"].append(f) for f in os.listdir(os.path.join(this_dir, "bootstrap_data/SNPs")) : if f.find(".tar.gz") > -1 : l["SNPs"].append(f) return l
[docs]def printDatawraps() : """print all available datawraps for bootstraping""" l = listDatawraps() printf("Available datawraps for boostraping\n") for k, v in l.items() : printf(k) printf("~"*len(k) + "|") for vv in v : printf(" "*len(k) + "|" + "~~~:> " + vv) printf('\n')
[docs]def importGenome(name, batchSize = 100) : """Import a genome shipped with pyGeno. Most of the datawraps only contain URLs towards data provided by third parties.""" path = os.path.join(this_dir, "bootstrap_data", "genomes/" + name) PG.importGenome(path, batchSize)
[docs]def importSNPs(name) : """Import a SNP set shipped with pyGeno. Most of the datawraps only contain URLs towards data provided by third parties.""" path = os.path.join(this_dir, "bootstrap_data", "SNPs/" + name) PS.importSNPs(path)