import pyGeno.importation.Genomes as PG
import pyGeno.importation.SNPs as PS
from pyGeno.tools.io import printf
import os, tempfile, json
import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
import pyGeno.configuration as conf
this_dir, this_filename = os.path.split(__file__)
[docs]def listRemoteDatawraps(location = conf.pyGeno_REMOTE_LOCATION) :
"""Lists all the datawraps availabe from a remote a remote location."""
print(location)
loc = location + "/datawraps.json"
response = urllib.request.urlopen(loc)
js = json.loads(response.read())
return js
[docs]def printRemoteDatawraps(location = conf.pyGeno_REMOTE_LOCATION) :
"""
print all available datawraps from a remote location the location must have a datawraps.json in the following format::
{
"Ordered": {
"Reference genomes": {
"Human" : ["GRCh37.75", "GRCh38.78"],
"Mouse" : ["GRCm38.78"]
},
"SNPs":{
}
},
"Flat":{
"Reference genomes": {
"GRCh37.75": "Human.GRCh37.75.tar.gz",
"GRCh38.78": "Human.GRCh37.75.tar.gz",
"GRCm38.78": "Mouse.GRCm38.78.tar.gz"
},
"SNPs":{
}
}
"""
l = listRemoteDatawraps(location)
printf("Available datawraps for bootstraping\n")
print(json.dumps(l["Ordered"], sort_keys=True, indent=4, separators=(',', ': ')))
def _DW(name, url) :
packageDir = tempfile.mkdtemp(prefix = "pyGeno_remote_")
printf("~~~:>\n\tDownloading datawrap: %s..." % name)
finalFile = os.path.normpath('%s/%s' %(packageDir, name))
urllib.request.urlretrieve (url, finalFile)
printf('\tdone.\n~~~:>')
return finalFile
[docs]def importRemoteGenome(name, batchSize = 100) :
"""Import a genome available from http://pygeno.iric.ca (might work)."""
try :
dw = listRemoteDatawraps()["Flat"]["Reference genomes"][name]
except AttributeError :
raise AttributeError("There's no remote genome datawrap by the name of: '%s'" % name)
finalFile = _DW(name, dw["url"])
PG.importGenome(finalFile, batchSize)
[docs]def importRemoteSNPs(name) :
"""Import a SNP set available from http://pygeno.iric.ca (might work)."""
try :
dw = listRemoteDatawraps()["Flat"]["SNPs"]
except AttributeError :
raise AttributeError("There's no remote genome datawrap by the name of: '%s'" % name)
finalFile = _DW(name, dw["url"])
PS.importSNPs(finalFile)
[docs]def listDatawraps() :
"""Lists all the datawraps pyGeno comes with"""
l = {"Genomes" : [], "SNPs" : []}
for f in os.listdir(os.path.join(this_dir, "bootstrap_data/genomes")) :
if f.find(".tar.gz") > -1 :
l["Genomes"].append(f)
for f in os.listdir(os.path.join(this_dir, "bootstrap_data/SNPs")) :
if f.find(".tar.gz") > -1 :
l["SNPs"].append(f)
return l
[docs]def printDatawraps() :
"""print all available datawraps for bootstraping"""
l = listDatawraps()
printf("Available datawraps for boostraping\n")
for k, v in l.items() :
printf(k)
printf("~"*len(k) + "|")
for vv in v :
printf(" "*len(k) + "|" + "~~~:> " + vv)
printf('\n')
[docs]def importGenome(name, batchSize = 100) :
"""Import a genome shipped with pyGeno. Most of the datawraps only contain URLs towards data provided by third parties."""
path = os.path.join(this_dir, "bootstrap_data", "genomes/" + name)
PG.importGenome(path, batchSize)
[docs]def importSNPs(name) :
"""Import a SNP set shipped with pyGeno. Most of the datawraps only contain URLs towards data provided by third parties."""
path = os.path.join(this_dir, "bootstrap_data", "SNPs/" + name)
PS.importSNPs(path)