From b3a6abc2f22fcbfe65f3b5d5ee9ffe192ea04b10 Mon Sep 17 00:00:00 2001
From: Soma Mbadiwe <somasystemsng@gmail.com>
Date: Tue, 28 Jul 2020 00:41:28 -0400
Subject: [PATCH 1/2] Fixed Issue #271: Added support for functions taking a
 filepath

---
 allel/io/fasta.py     | 2 +-
 allel/io/gff.py       | 3 ++-
 allel/io/vcf_read.py  | 6 +++++-
 allel/io/vcf_write.py | 3 +--
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/allel/io/fasta.py b/allel/io/fasta.py
index 80321aa7..6c6a904c 100644
--- a/allel/io/fasta.py
+++ b/allel/io/fasta.py
@@ -36,7 +36,7 @@ def write_fasta(path, sequences, names, mode='w', width=80):
     mode = 'ab' if 'a' in mode else 'wb'
 
     # write to file
-    with open(path, mode=mode) as fasta:
+    with open(str(path), mode=mode) as fasta:
         for name, sequence in zip(names, sequences):
             # force bytes
             if isinstance(name, str):
diff --git a/allel/io/gff.py b/allel/io/gff.py
index cda4493f..1d455b0b 100644
--- a/allel/io/gff.py
+++ b/allel/io/gff.py
@@ -31,7 +31,7 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1,
 
     Parameters
     ----------
-    path : string
+    path : string or pathlib.Path
         Path to input file.
     attributes : list of strings, optional
         List of columns to extract from the "attributes" field.
@@ -64,6 +64,7 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1,
             attributes_fill = [attributes_fill] * len(attributes)
 
     # open input stream
+    path = str(path)
     if region is not None:
         cmd = [tabix, path, region]
         buffer = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
diff --git a/allel/io/vcf_read.py b/allel/io/vcf_read.py
index 9eae0261..1805f167 100644
--- a/allel/io/vcf_read.py
+++ b/allel/io/vcf_read.py
@@ -15,7 +15,7 @@
 import subprocess
 import textwrap
 from collections import OrderedDict
-
+from pathlib import Path
 
 import numpy as np
 
@@ -1012,6 +1012,10 @@ def _setup_input_stream(input, region=None, tabix=None, buffer_size=DEFAULT_BUFF
 
     # obtain a file-like object
     close = False
+
+    if isinstance(input, Path):
+        input = str(input)
+
     if isinstance(input, str) and input.endswith('gz'):
 
         if region and tabix and os.name != 'nt':
diff --git a/allel/io/vcf_write.py b/allel/io/vcf_write.py
index 7c7cef44..86651d18 100644
--- a/allel/io/vcf_write.py
+++ b/allel/io/vcf_write.py
@@ -5,7 +5,6 @@
 from operator import itemgetter
 import logging
 
-
 import numpy as np
 
 
@@ -50,7 +49,7 @@ def write_vcf(path, callset, rename=None, number=None, description=None,
 
     names, callset = normalize_callset(callset)
 
-    with open(path, 'w') as vcf_file:
+    with open(str(path), 'w') as vcf_file:
         if write_header:
             write_vcf_header(vcf_file, names, callset=callset, rename=rename,
                              number=number, description=description)

From 4432362fc2dea5706ad358f6b4bab4186fb70a60 Mon Sep 17 00:00:00 2001
From: Soma Mbadiwe <somasystemsng@gmail.com>
Date: Fri, 31 Jul 2020 02:02:45 -0400
Subject: [PATCH 2/2] Fixed Issue #271: Added support for functions taking a
 filepath. Changes to accept even more file options: IOBase, pathlib.Path,
 file path.

---
 allel/io/fasta.py     | 12 ++++++++++--
 allel/io/gff.py       | 27 ++++++++++++++++-----------
 allel/io/vcf_read.py  |  9 ++++-----
 allel/io/vcf_write.py | 12 +++++++++++-
 allel/util.py         | 11 +++++++++++
 5 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/allel/io/fasta.py b/allel/io/fasta.py
index 6c6a904c..6e3fbbc8 100644
--- a/allel/io/fasta.py
+++ b/allel/io/fasta.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 import numpy as np
+from allel.util import resolve_path
 
 
 def write_fasta(path, sequences, names, mode='w', width=80):
@@ -35,8 +36,7 @@ def write_fasta(path, sequences, names, mode='w', width=80):
     # force binary mode
     mode = 'ab' if 'a' in mode else 'wb'
 
-    # write to file
-    with open(str(path), mode=mode) as fasta:
+    def save_as_fasta(fasta):
         for name, sequence in zip(names, sequences):
             # force bytes
             if isinstance(name, str):
@@ -46,3 +46,11 @@ def write_fasta(path, sequences, names, mode='w', width=80):
             for i in range(0, sequence.size, width):
                 line = sequence[i:i+width].tostring() + b'\n'
                 fasta.write(line)
+
+    # write to file
+    path = resolve_path(path)
+    if hasattr(path, 'write'):
+        save_as_fasta(path)
+    else:
+        with open(path, mode=mode) as f:
+            save_as_fasta(f)
diff --git a/allel/io/gff.py b/allel/io/gff.py
index 1d455b0b..c8961e1d 100644
--- a/allel/io/gff.py
+++ b/allel/io/gff.py
@@ -2,6 +2,7 @@
 import subprocess
 import gzip
 from urllib.parse import unquote_plus
+from allel.util import resolve_path
 
 
 import numpy as np
@@ -31,7 +32,7 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1,
 
     Parameters
     ----------
-    path : string or pathlib.Path
+    path : string, pathlib.Path or any file-like object
         Path to input file.
     attributes : list of strings, optional
         List of columns to extract from the "attributes" field.
@@ -64,15 +65,19 @@ def iter_gff3(path, attributes=None, region=None, score_fill=-1,
             attributes_fill = [attributes_fill] * len(attributes)
 
     # open input stream
-    path = str(path)
-    if region is not None:
-        cmd = [tabix, path, region]
-        buffer = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
-    elif path.endswith('.gz') or path.endswith('.bgz'):
-        buffer = gzip.open(path, mode='rb')
-    else:
-        buffer = open(path, mode='rb')
 
+    # write to file
+    path = resolve_path(path)
+    if isinstance(path, str):
+        if region is not None:
+            cmd = [tabix, path, region]
+            buffer = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
+        elif path.endswith('.gz') or path.endswith('.bgz'):
+            buffer = gzip.open(path, mode='rb')
+        else:
+            buffer = open(path, mode='rb')
+    else:
+        buffer = path
     try:
         for line in buffer:
             if line[0] == b'>':
@@ -124,7 +129,7 @@ def gff3_to_recarray(path, attributes=None, region=None, score_fill=-1,
 
     Parameters
     ----------
-    path : string
+    path : string, pathlib.Path or any file-like object
         Path to input file.
     attributes : list of strings, optional
         List of columns to extract from the "attributes" field.
@@ -181,7 +186,7 @@ def gff3_to_dataframe(path, attributes=None, region=None, score_fill=-1,
 
     Parameters
     ----------
-    path : string
+    path : string, pathlib.Path or any file-like object
         Path to input file.
     attributes : list of strings, optional
         List of columns to extract from the "attributes" field.
diff --git a/allel/io/vcf_read.py b/allel/io/vcf_read.py
index 1805f167..4677e7e5 100644
--- a/allel/io/vcf_read.py
+++ b/allel/io/vcf_read.py
@@ -15,8 +15,7 @@
 import subprocess
 import textwrap
 from collections import OrderedDict
-from pathlib import Path
-
+from allel.util import resolve_path
 import numpy as np
 
 
@@ -434,8 +433,9 @@ def vcf_to_npz(input, output,
 
     """
 
+    output = resolve_path(output)
     # guard condition
-    if not overwrite and os.path.exists(output):
+    if not overwrite and isinstance(output, str) and os.path.exists(output):
         raise ValueError('file exists at path %r; use overwrite=True to replace' % output)
 
     # read all data into memory
@@ -1013,8 +1013,7 @@ def _setup_input_stream(input, region=None, tabix=None, buffer_size=DEFAULT_BUFF
     # obtain a file-like object
     close = False
 
-    if isinstance(input, Path):
-        input = str(input)
+    input = resolve_path(input)
 
     if isinstance(input, str) and input.endswith('gz'):
 
diff --git a/allel/io/vcf_write.py b/allel/io/vcf_write.py
index 86651d18..8cdf4c56 100644
--- a/allel/io/vcf_write.py
+++ b/allel/io/vcf_write.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 
+from allel.util import resolve_path
 import allel
 
 
@@ -49,12 +50,19 @@ def write_vcf(path, callset, rename=None, number=None, description=None,
 
     names, callset = normalize_callset(callset)
 
-    with open(str(path), 'w') as vcf_file:
+    def write_file(vcf_file):
         if write_header:
             write_vcf_header(vcf_file, names, callset=callset, rename=rename,
                              number=number, description=description)
         write_vcf_data(vcf_file, names, callset=callset, rename=rename, fill=fill)
 
+    path = resolve_path(path)
+    if hasattr(path, 'write'):
+        write_file(path)
+    else:
+        with open(path, 'w') as f:
+            write_file(f)
+
 
 def write_vcf_header(vcf_file, names, callset, rename, number, description):
     if rename is None:
@@ -64,6 +72,7 @@ def write_vcf_header(vcf_file, names, callset, rename, number, description):
     if description is None:
         description = dict()
 
+    vcf_file = resolve_path(vcf_file)
     # write file format version
     print('##fileformat=VCFv4.1', file=vcf_file)
 
@@ -152,6 +161,7 @@ def write_vcf_data(vcf_file, names, callset, rename, fill):
     if fill is None:
         fill = dict()
 
+    vcf_file = resolve_path(vcf_file)
     # find the fixed columns, allowing for case insensitive naming in the
     # input array
     col_chrom = None
diff --git a/allel/util.py b/allel/util.py
index fb85807e..7ca882d3 100644
--- a/allel/util.py
+++ b/allel/util.py
@@ -9,6 +9,17 @@
 import numpy as np
 
 
+def resolve_path(path):
+    """
+
+    :param path:
+    :return:
+    """
+    if hasattr(path, 'read') or isinstance(path, (str, bytes)):
+        return path
+    return os.fspath(path)
+
+
 @contextmanager
 def ignore_invalid():
     err = np.seterr(invalid='ignore')