Refactor VECTORIZED support using time_axis in BaseRawIO

Kkuntal990 · Kkuntal990 · commit d214b4aadd51 · 2025-11-04T18:26:30.000-08:00
Moved VECTORIZED orientation logic to BaseRawIO as suggested by @samuelgarcia: - Added time_axis parameter to buffer_description (0=MULTIPLEXED, 1=VECTORIZED) - Extended BaseRawIO._get_analogsignal_chunk() to handle time_axis=1 for raw buffers - Removed custom _get_analogsignal_chunk() override from BrainVisionRawIO - Fixed _get_signal_size() to correctly handle raw buffers with time_axis=1 Benefits: - Cleaner, more general solution applicable to other readers - Consistent with existing HDF5 time_axis pattern - Reduced code duplication - All tests pass with identical MNE-Python validation
diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py
@@ -1577,9 +1577,14 @@ def __init__(self, *arg, **kwargs):
     def _get_signal_size(self, block_index, seg_index, stream_index):
         buffer_id = self.header["signal_streams"][stream_index]["buffer_id"]
         buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
-        # some hdf5 revert teh buffer
-        time_axis = buffer_desc.get("time_axis", 0)
-        return buffer_desc["shape"][time_axis]
+        # For "raw" type, shape is always (time, channels) regardless of file layout
+        # For "hdf5" type, shape can be (time, channels) or (channels, time) based on time_axis
+        if buffer_desc["type"] == "raw":
+            return buffer_desc["shape"][0]
+        else:
+            # some hdf5 revert the buffer
+            time_axis = buffer_desc.get("time_axis", 0)
+            return buffer_desc["shape"][time_axis]
 
     def _get_analogsignal_chunk(
         self,
@@ -1603,29 +1608,61 @@ def _get_analogsignal_chunk(
 
         if buffer_desc["type"] == "raw":
 
-            # open files on demand and keep reference to opened file
-            if not hasattr(self, "_memmap_analogsignal_buffers"):
-                self._memmap_analogsignal_buffers = {}
-            if block_index not in self._memmap_analogsignal_buffers:
-                self._memmap_analogsignal_buffers[block_index] = {}
-            if seg_index not in self._memmap_analogsignal_buffers[block_index]:
-                self._memmap_analogsignal_buffers[block_index][seg_index] = {}
-            if buffer_id not in self._memmap_analogsignal_buffers[block_index][seg_index]:
-                fid = open(buffer_desc["file_path"], mode="rb")
-                self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] = fid
-            else:
-                fid = self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id]
+            time_axis = buffer_desc.get("time_axis", 0)
 
-            num_channels = buffer_desc["shape"][1]
+            if time_axis == 0:
+                # MULTIPLEXED: time_axis=0 means (time, channels) layout
+                # open files on demand and keep reference to opened file
+                if not hasattr(self, "_memmap_analogsignal_buffers"):
+                    self._memmap_analogsignal_buffers = {}
+                if block_index not in self._memmap_analogsignal_buffers:
+                    self._memmap_analogsignal_buffers[block_index] = {}
+                if seg_index not in self._memmap_analogsignal_buffers[block_index]:
+                    self._memmap_analogsignal_buffers[block_index][seg_index] = {}
+                if buffer_id not in self._memmap_analogsignal_buffers[block_index][seg_index]:
+                    fid = open(buffer_desc["file_path"], mode="rb")
+                    self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] = fid
+                else:
+                    fid = self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id]
+
+                num_channels = buffer_desc["shape"][1]
+
+                raw_sigs = get_memmap_chunk_from_opened_file(
+                    fid,
+                    num_channels,
+                    i_start,
+                    i_stop,
+                    np.dtype(buffer_desc["dtype"]),
+                    file_offset=buffer_desc["file_offset"],
+                )
 
-            raw_sigs = get_memmap_chunk_from_opened_file(
-                fid,
-                num_channels,
-                i_start,
-                i_stop,
-                np.dtype(buffer_desc["dtype"]),
-                file_offset=buffer_desc["file_offset"],
-            )
+            elif time_axis == 1:
+                # VECTORIZED: time_axis=1 means (channels, time) layout
+                # Data is stored as [all_samples_ch1, all_samples_ch2, ...]
+                dtype = np.dtype(buffer_desc["dtype"])
+                num_channels = buffer_desc["shape"][1]
+                num_samples = i_stop - i_start
+                total_samples_per_channel = buffer_desc["shape"][0]
+
+                # Determine which channels to read
+                if channel_indexes is None:
+                    chan_indices = np.arange(num_channels)
+                else:
+                    chan_indices = np.asarray(channel_indexes)
+
+                raw_sigs = np.empty((num_samples, len(chan_indices)), dtype=dtype)
+
+                for i, chan_idx in enumerate(chan_indices):
+                    offset = buffer_desc["file_offset"] + chan_idx * total_samples_per_channel * dtype.itemsize
+                    channel_data = np.memmap(buffer_desc["file_path"], dtype=dtype, mode='r',
+                                            offset=offset, shape=(total_samples_per_channel,))
+                    raw_sigs[:, i] = channel_data[i_start:i_stop]
+
+                # Channel slicing already done above, so skip later channel_indexes slicing
+                channel_indexes = None
+
+            else:
+                raise ValueError(f"time_axis must be 0 or 1, got {time_axis}")
 
         elif buffer_desc["type"] == "hdf5":
 
diff --git a/neo/rawio/brainvisionrawio.py b/neo/rawio/brainvisionrawio.py
@@ -91,13 +91,10 @@ def _parse_header(self):
         self._buffer_descriptions = {0: {0: {}}}
         self._stream_buffer_slice = {}
 
-        # Calculate the shape based on orientation
-        if self._data_orientation == "MULTIPLEXED":
-            shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
-        else:  # VECTORIZED
-            # For VECTORIZED, data is stored as [all_samples_ch1, all_samples_ch2, ...]
-            # We still report shape as (num_samples, num_channels) for compatibility
-            shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
+        shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
+
+        # time_axis indicates data layout: 0 for MULTIPLEXED (time, channels), 1 for VECTORIZED (channels, time)
+        time_axis = 0 if self._data_orientation == "MULTIPLEXED" else 1
 
         self._buffer_descriptions[0][0][buffer_id] = {
             "type": "raw",
@@ -106,12 +103,10 @@ def _parse_header(self):
             "order": "C",
             "file_offset": 0,
             "shape": shape,
+            "time_axis": time_axis,
         }
         self._stream_buffer_slice[stream_id] = None
 
-        # Store number of channels for VECTORIZED reading
-        self._nb_channel = nb_channel
-
         signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype)
         signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype)
 
@@ -253,42 +248,6 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index)
     def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id):
         return self._buffer_descriptions[block_index][seg_index][buffer_id]
 
-    def _get_analogsignal_chunk(
-        self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes
-    ):
-        """
-        Override to handle VECTORIZED orientation.
-        VECTORIZED: all samples for ch1, then all samples for ch2, etc.
-        """
-        if self._data_orientation == "MULTIPLEXED":
-            return super()._get_analogsignal_chunk(
-                block_index, seg_index, i_start, i_stop, stream_index, channel_indexes
-            )
-
-        # VECTORIZED: use memmap to read each channel's data block
-        buffer_id = self.header["signal_streams"][stream_index]["buffer_id"]
-        buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
-
-        i_start = i_start or 0
-        i_stop = i_stop or buffer_desc["shape"][0]
-
-        if channel_indexes is None:
-            channel_indexes = np.arange(self._nb_channel)
-
-        dtype = np.dtype(buffer_desc["dtype"])
-        num_samples = i_stop - i_start
-        total_samples_per_channel = buffer_desc["shape"][0]
-
-        raw_sigs = np.empty((num_samples, len(channel_indexes)), dtype=dtype)
-
-        for i, chan_idx in enumerate(channel_indexes):
-            offset = buffer_desc["file_offset"] + chan_idx * total_samples_per_channel * dtype.itemsize
-            channel_data = np.memmap(buffer_desc["file_path"], dtype=dtype, mode='r',
-                                    offset=offset, shape=(total_samples_per_channel,))
-            raw_sigs[:, i] = channel_data[i_start:i_stop]
-
-        return raw_sigs
-
     def _ensure_filename(self, filename, kind, entry_name):
         if not os.path.exists(filename):
             # file not found, subsequent import stage would fail