diff --git a/allel/io/fasta.py b/allel/io/fasta.py index 80321aa7..6e3fbbc8 100644 --- a/allel/io/fasta.py +++ b/allel/io/fasta.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import numpy as np +from allel.util import resolve_path def write_fasta(path, sequences, names, mode='w', width=80): @@ -35,8 +36,7 @@ def write_fasta(path, sequences, names, mode='w', width=80): # force binary mode mode = 'ab' if 'a' in mode else 'wb' - # write to file - with open(path, mode=mode) as fasta: + def save_as_fasta(fasta): for name, sequence in zip(names, sequences): # force bytes if isinstance(name, str): @@ -46,3 +46,11 @@ def write_fasta(path, sequences, names, mode='w', width=80): for i in range(0, sequence.size, width): line = sequence[i:i+width].tostring() + b'\n' fasta.write(line) + + # write to file + path = resolve_path(path) + if hasattr(path, 'write'): + save_as_fasta(path) + else: + with open(path, mode=mode) as f: + save_as_fasta(f) diff --git a/allel/io/gff.py b/allel/io/gff.py index cda4493f..c8961e1d 100644 --- a/allel/io/gff.py +++ b/allel/io/gff.py @@ -2,6 +2,7 @@ import subprocess import gzip from urllib.parse import unquote_plus +from allel.util import resolve_path import numpy as np @@ -31,7 +32,7 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1, Parameters ---------- - path : string + path : string, pathlib.Path or any file-like object Path to input file. attributes : list of strings, optional List of columns to extract from the "attributes" field. @@ -64,14 +65,19 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1, attributes_fill = [attributes_fill] * len(attributes) # open input stream - if region is not None: - cmd = [tabix, path, region] - buffer = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout - elif path.endswith('.gz') or path.endswith('.bgz'): - buffer = gzip.open(path, mode='rb') - else: - buffer = open(path, mode='rb') + # write to file + path = resolve_path(path) + if isinstance(path, str): + if region is not None: + cmd = [tabix, path, region] + buffer = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout + elif path.endswith('.gz') or path.endswith('.bgz'): + buffer = gzip.open(path, mode='rb') + else: + buffer = open(path, mode='rb') + else: + buffer = path try: for line in buffer: if line[0] == b'>': @@ -123,7 +129,7 @@ def gff3_to_recarray(path, attributes=None, region=None, score_fill=-1, Parameters ---------- - path : string + path : string, pathlib.Path or any file-like object Path to input file. attributes : list of strings, optional List of columns to extract from the "attributes" field. @@ -180,7 +186,7 @@ def gff3_to_dataframe(path, attributes=None, region=None, score_fill=-1, Parameters ---------- - path : string + path : string, pathlib.Path or any file-like object Path to input file. attributes : list of strings, optional List of columns to extract from the "attributes" field. diff --git a/allel/io/vcf_read.py b/allel/io/vcf_read.py index 9eae0261..4677e7e5 100644 --- a/allel/io/vcf_read.py +++ b/allel/io/vcf_read.py @@ -15,8 +15,7 @@ import subprocess import textwrap from collections import OrderedDict - - +from allel.util import resolve_path import numpy as np @@ -434,8 +433,9 @@ def vcf_to_npz(input, output, """ + output = resolve_path(output) # guard condition - if not overwrite and os.path.exists(output): + if not overwrite and isinstance(output, str) and os.path.exists(output): raise ValueError('file exists at path %r; use overwrite=True to replace' % output) # read all data into memory @@ -1012,6 +1012,9 @@ def _setup_input_stream(input, region=None, tabix=None, buffer_size=DEFAULT_BUFF # obtain a file-like object close = False + + input = resolve_path(input) + if isinstance(input, str) and input.endswith('gz'): if region and tabix and os.name != 'nt': diff --git a/allel/io/vcf_write.py b/allel/io/vcf_write.py index 7c7cef44..8cdf4c56 100644 --- a/allel/io/vcf_write.py +++ b/allel/io/vcf_write.py @@ -5,10 +5,10 @@ from operator import itemgetter import logging - import numpy as np +from allel.util import resolve_path import allel @@ -50,12 +50,19 @@ def write_vcf(path, callset, rename=None, number=None, description=None, names, callset = normalize_callset(callset) - with open(path, 'w') as vcf_file: + def write_file(vcf_file): if write_header: write_vcf_header(vcf_file, names, callset=callset, rename=rename, number=number, description=description) write_vcf_data(vcf_file, names, callset=callset, rename=rename, fill=fill) + path = resolve_path(path) + if hasattr(path, 'write'): + write_file(path) + else: + with open(path, 'w') as f: + write_file(f) + def write_vcf_header(vcf_file, names, callset, rename, number, description): if rename is None: @@ -65,6 +72,7 @@ def write_vcf_header(vcf_file, names, callset, rename, number, description): if description is None: description = dict() + vcf_file = resolve_path(vcf_file) # write file format version print('##fileformat=VCFv4.1', file=vcf_file) @@ -153,6 +161,7 @@ def write_vcf_data(vcf_file, names, callset, rename, fill): if fill is None: fill = dict() + vcf_file = resolve_path(vcf_file) # find the fixed columns, allowing for case insensitive naming in the # input array col_chrom = None diff --git a/allel/util.py b/allel/util.py index fb85807e..7ca882d3 100644 --- a/allel/util.py +++ b/allel/util.py @@ -9,6 +9,17 @@ import numpy as np +def resolve_path(path): + """ + + :param path: + :return: + """ + if hasattr(path, 'read') or isinstance(path, (str, bytes)): + return path + return os.fspath(path) + + @contextmanager def ignore_invalid(): err = np.seterr(invalid='ignore')