Skip to content

Reader

openseize.file_io.edf.Reader

Bases: bases.Reader

A reader of European Data Format (EDF/EDF+) files.

This reader supports reading EEG data and metadata from an EDF file with and without context management (see Introduction). If opened outside of context management, you should close this Reader's instance manually by calling the 'close' method to recover open file resources when you finish processing a file.

Attributes:

Name Type Description
header dict

A dictionary representation of the EDFs header.

shape tuple

A (channels, samples) shape tuple.

channels Sequence

The channels to be returned from the 'read' method call.

Examples:

>>> from openseize.demos import paths
>>> filepath = paths.locate('recording_001.edf')
>>> from openseize.io.edf import Reader
>>> # open a reader using context management and reading 120 samples
>>> # from all 4 channels
>>> with Reader(filepath) as infile:
>>>     x = infile.read(start=0, stop=120)
>>> print(x.shape)
... (4, 120)
Source code in openseize/file_io/edf.py
class Reader(bases.Reader):
    """A reader of European Data Format (EDF/EDF+) files.

    This reader supports reading EEG data and metadata from an EDF file with
    and without context management (see Introduction). If opened outside
    of context management, you should close this Reader's instance manually
    by calling the 'close' method to recover open file resources when you
    finish processing a file.

    Attributes:
        header (dict):
            A dictionary representation of the EDFs header.
        shape (tuple):
            A (channels, samples) shape tuple.
        channels (Sequence):
            The channels to be returned from the 'read' method call.

    Examples:
        >>> from openseize.demos import paths
        >>> filepath = paths.locate('recording_001.edf')
        >>> from openseize.io.edf import Reader
        >>> # open a reader using context management and reading 120 samples
        >>> # from all 4 channels
        >>> with Reader(filepath) as infile:
        >>>     x = infile.read(start=0, stop=120)
        >>> print(x.shape)
        ... (4, 120)
    """

    def __init__(self, path: Union[str, Path]) -> None:
        """Extends the Reader ABC with a header attribute."""

        super().__init__(path, mode='rb')
        self.header = Header(path)
        self._channels = self.header.channels

    @property
    def channels(self) -> Sequence[int]:
        """Returns the channels that this Reader will read."""

        return self._channels

    @channels.setter
    def channels(self, values: Sequence[int]):
        """Sets the channels that this Reader will read.

        Args:
            values:
                Sets the channels this Reader's 'read' method will return
                data from.
        """

        if not isinstance(values, Sequence):
            msg = 'Channels must be type Sequence not {}'
            raise ValueError(msg.format(type(values)))

        self._channels = values

    @property
    def shape(self) -> Tuple[int, ...]:
        """Returns a 2-tuple containing the number of channels and
        number of samples in this EDF."""

        return len(self.channels), max(self.header.samples)

    def _decipher(self,
                  arr: np.ndarray,
                  channels: Sequence[int],
                  axis: int = -1,
    ):
        """Converts decoded data record integers to float voltages.

        Physical voltage values 'p' are linearly mapped from the
        decoded integer values 'd' according to:

            p = slope * d + offset
            slope = (pmax -pmin) / (dmax - dmin)
            offset = p - slope * d for any (p,d)

        The EDF header contains pmax, pmin, dmax and dmin for each channel.

        Args:
            arr:
                An array of integer values decoded from the EDF.
            channels:
                The channel indices that were decoded. Each channel may have
                a unique slope and offset.
            axis:
                The samples axis of arr.

        Returns:
            A float64 ndarray of voltages with the same shape as 'arr'.
        """

        slopes = np.array(self.header.slopes[channels])
        offsets = np.array(self.header.offsets[channels])
        #expand to 2-D for broadcasting
        slopes = np.expand_dims(slopes, axis=axis)
        offsets = np.expand_dims(offsets, axis=axis)
        result = arr * slopes
        result += offsets
        return cast(np.ndarray, result)

    def _find_records(self,
                      start: int,
                      stop: int,
                      channels: Sequence[int],
    ) -> Sequence[Tuple[int, int]]:
        """Returns the first and last record indices that include start to
        stop samples for each channel in channels.

        Notes:
            The number of samples for each channel will be different if the
            sample rates are unequal. Thus, this method returns a first and
            last record number for each channel.

        Args:
            start:
                The start sample used to locate the first record.
            stop:
                The stop sample (exclusive) used to locate the last record.
            channels:
                The channel indices to read.

        Returns:
            A list of (first, last) record numbers for each channel.
        """

        spr = np.array(self.header.samples_per_record)[channels]
        starts = start // spr
        stops = np.ceil(stop / spr).astype('int')
        return list(zip(starts, stops))

    def _records(self, a: int, b: int):
        """Reads samples in the ath to bth record.

        If b exceeds the number of records in the EDF, then samples up to the
        end of file are returned. If a exceeds the number of records, an
        empty array is returned.

        Args:
            a:
                The first record to read.
            b:
                The last record to be read (exclusive).

        Returns:
            A ndarray of shape (b-a) * sum(samples_per_record)
        """

        if a >= self.header.num_records:
            return np.empty((1,0))
        b = min(b, self.header.num_records)
        cnt = b - a

        self._fobj.seek(0)
        #EDF samples are 2-byte little endian integers
        bytes_per_record = sum(self.header.samples_per_record) * 2
        #get offset in bytes & num samples spanning a to b
        offset = self.header.header_bytes + a * bytes_per_record
        nsamples = cnt * sum(self.header.samples_per_record)
        #read records and reshape to num_records x sum(samples_per_record)
        recs = np.fromfile(self._fobj, '<i2', nsamples, offset=offset)
        arr = recs.reshape(cnt, sum(self.header.samples_per_record))
        return arr

    def _padstack(self,
                  arrs: Sequence[np.ndarray],
                  value: float,
                  axis: int = 0
    ):
        """Returns a 2-D array from a ragged sequence of 1-D arrays.

        Args:
            arrs:
                A ragged sequence of 1-D arrays to combine.
            value:
                Padding value used to lengthen 1-D arrays.
            axis:
                The axis along which to stack the padded 1-D arrays.

        Returns:
            A 2-D array.
        """

        longest = max(len(arr) for arr in arrs)
        pad_sizes = np.array([longest - len(arr) for arr in arrs])

        if all(pad_sizes == 0):
            return np.stack(arrs, axis=0)

        x = [np.pad(arr.astype(float), (0, pad), constant_values=value)
                for arr, pad in zip(arrs, pad_sizes)]
        return np.stack(x, axis=axis)

    def _read_array(self,
                    start: int,
                    stop: int,
                    channels: Sequence[int],
                    padvalue: float,
    ):
        """Reads samples between start & stop indices for each channel index
        in channels.

        Args:
            start:
                The start sample index to read.
            stop:
                The stop sample index to read (exclusive).
            channels:
                Sequence of channel indices to read from EDF.
            padvalue:
                Value to pad to channels that run out of data to return.
                Only applicable if sample rates of channels differ.

        Returns:
            A float64 2-D array of shape len(channels) x (stop-start).
        """

        # Locate record tuples that include start & stop samples for
        # each channel but only perform reads over unique record tuples.
        rec_tuples = self._find_records(start, stop, channels)
        uniq_tuples = set(rec_tuples)
        reads = {tup: self._records(*tup) for tup in uniq_tuples}

        result=[]
        for ch, rec_tup in zip(channels, rec_tuples):

            #get preread array and extract samples for this ch
            arr = reads[rec_tup]
            arr = arr[:, self.header.record_map[ch]].flatten()

            #adjust start & stop relative to records start pt
            a = start - rec_tup[0] * self.header.samples_per_record[ch]
            b = a + (stop - start)
            result.append(arr[a:b])

        res = self._padstack(result, padvalue)
        return self._decipher(res, channels)

    def read(self,
             start: int,
             stop: Optional[int] = None,
             padvalue: float = np.NaN
    ) -> npt.NDArray[np.float64]:
        """Reads samples from this EDF from this Reader's channels.

        Args:
            start:
                The start sample index to read.
            stop:
                The stop sample index to read (exclusive). If None, samples
                will be read until the end of file.
            padvalue:
                Value to pad to channels that run out of samples to return.
                Only applicable if sample rates of channels differ.

        Returns:
            A float64 array of shape len(chs) x (stop-start) samples.
        """

        if start > max(self.header.samples):
            return np.empty((len(self.channels), 0))

        if not stop:
            stop = max(self.header.samples)

        arr = self._read_array(start, stop, self.channels, padvalue)
        # use cast to indicate ndarray type for docs
        return cast(np.ndarray, arr)

__init__(path)

Extends the Reader ABC with a header attribute.

Source code in openseize/file_io/edf.py
def __init__(self, path: Union[str, Path]) -> None:
    """Extends the Reader ABC with a header attribute."""

    super().__init__(path, mode='rb')
    self.header = Header(path)
    self._channels = self.header.channels

channels() property writable

Returns the channels that this Reader will read.

Source code in openseize/file_io/edf.py
@property
def channels(self) -> Sequence[int]:
    """Returns the channels that this Reader will read."""

    return self._channels

shape() property

Returns a 2-tuple containing the number of channels and number of samples in this EDF.

Source code in openseize/file_io/edf.py
@property
def shape(self) -> Tuple[int, ...]:
    """Returns a 2-tuple containing the number of channels and
    number of samples in this EDF."""

    return len(self.channels), max(self.header.samples)

read(start, stop=None, padvalue=np.NaN)

Reads samples from this EDF from this Reader's channels.

Parameters:

Name Type Description Default
start int

The start sample index to read.

required
stop Optional[int]

The stop sample index to read (exclusive). If None, samples will be read until the end of file.

None
padvalue float

Value to pad to channels that run out of samples to return. Only applicable if sample rates of channels differ.

np.NaN

Returns:

Type Description
npt.NDArray[np.float64]

A float64 array of shape len(chs) x (stop-start) samples.

Source code in openseize/file_io/edf.py
def read(self,
         start: int,
         stop: Optional[int] = None,
         padvalue: float = np.NaN
) -> npt.NDArray[np.float64]:
    """Reads samples from this EDF from this Reader's channels.

    Args:
        start:
            The start sample index to read.
        stop:
            The stop sample index to read (exclusive). If None, samples
            will be read until the end of file.
        padvalue:
            Value to pad to channels that run out of samples to return.
            Only applicable if sample rates of channels differ.

    Returns:
        A float64 array of shape len(chs) x (stop-start) samples.
    """

    if start > max(self.header.samples):
        return np.empty((len(self.channels), 0))

    if not stop:
        stop = max(self.header.samples)

    arr = self._read_array(start, stop, self.channels, padvalue)
    # use cast to indicate ndarray type for docs
    return cast(np.ndarray, arr)

Bases and Mixins

Reader Base

openseize.file_io.bases.Reader

Bases: abc.ABC, mixins.ViewInstance

Abstract base class for reading EEG data.

This ABC defines a protocol for reading EEG data from any file type. Specifically, all EEG readers support opening EEG files under context management or as an open file whose resources should be closed when finished. Inheritors must override the 'read' abstract method.

Attributes:

Name Type Description
path

Python path instance to EEG file.

mode

String file mode option for 'open' builtin. Must be 'r' for plain text files or 'rb' for binary file types.

kwargs

Additional kwargs needed for opening the file at path.

Source code in openseize/file_io/bases.py
class Reader(abc.ABC, mixins.ViewInstance):
    """Abstract base class for reading EEG data.

    This ABC defines a protocol for reading EEG data from any file type.
    Specifically, all EEG readers support opening EEG files under context
    management or as an open file whose resources should be closed when
    finished. Inheritors must override the 'read' abstract method.

    Attributes:
        path:
            Python path instance to EEG file.
        mode:
            String file mode option for 'open' builtin. Must be 'r' for
            plain text files or 'rb' for binary file types.
        kwargs:
            Additional kwargs needed for opening the file at path.
    """

    def __init__(self, path: typing.Union[str, Path], mode: str, **kwargs: str
    ) -> None:
        """Initialize this reader.

        Args:
            path:
                Python path instance to an EEG data file.
            mode:
                A mode for reading the eeg file. Must be 'r' for plain
                text files and 'rb' for binary files.
            kwargs:
                Any additional kwargs are routed to the open method.
        """

        self.path = Path(path)
        self.mode = mode
        self.kwargs = kwargs
        self._fobj = None
        self.open()

    def open(self):
        """Opens the file at path for reading & stores the file descriptor to
        this Reader's '_fobj' attribute."""

        # allow Readers to read with or without context management
        # pylint: disable-next=consider-using-with, unspecified-encoding
        self._fobj = open(self.path, self.mode, **self.kwargs)

    @property
    @abc.abstractmethod
    def channels(self):
        """Returns the channels that this Reader will read."""

    @channels.setter
    @abc.abstractmethod
    def channels(self, val: int):
        """Sets the channels that this Reader will read."""

    @property
    @abc.abstractmethod
    def shape(self):
        """Returns the summed shape of all arrays the Reader will read."""

    @abc.abstractmethod
    def read(self, start: int, stop:int) -> npt.NDArray[np.float64]:
        """Returns a numpy array of sample values between start and stop for
        each channel in channels.

        Args:
            start:
                Start sample index of file read.
            stop:
                Stop sample index of file read (exclusive).

        Returns:
            A channels x (stop-start) array of sample values.
        """

    def __enter__(self):
        """Return reader instance as target variable of this context."""

        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """On context exit, close this reader's file object and propagate
        errors by returning None."""

        self.close()

    def close(self):
        """Close this reader instance's opened file object and destroy the
        reference to the file object.

        File descriptors whether opened or closed are not serializable. To
        support concurrent processing we close & remove all references to the
        file descriptor on close.
        """

        if self._fobj:
            self._fobj.close()
            self._fobj = None