ProcessData.py

import numpy as np
import math

def extract_digital_data(header, raw_data, extracted_data):
    """ Extract digital i/o from a 1D raw array to a 2D array with separate channels
    
    Parameters
    ----------
    header : dict
        Dict containing previously read header information
    raw_data : 
        Populated 1D array from which channel-specific data must be extracted
    extracted_data : 
        Previously allocated 2D array to which extracted data is written
        
    Returns
    -------
    None
    """
    # Apply channel-specific masks to raw digin data to determine each channel's samples as 1 or 0
    for channel in range(header['num_board_dig_in_channels']):
        channel_mask = 1 << header['board_dig_in_channels'][channel]['native_order']
        extracted_data[channel, :] = np.not_equal(np.bitwise_and(raw_data, channel_mask), 0)

def extract_stim_data(header, data):
    """ Extract raw stim data containing multiple fields in a 2D array of uint16 to multiple 2D arrays for each field
    
    Parameters
    ----------
    header : dict
        Dict containing previously read header information
    data :
        Dict containing both previously read 'raw' data, and previously allocated data fields to which extracted data is written
    
    Returns
    -------
    None
    """
    data['compliance_limit_data'] = (np.bitwise_and(data['stim_data_raw'], 32768) >= 1).astype(int) # get 2^15 bit, interpret as True or False
    data['charge_recovery_data'] = (np.bitwise_and(data['stim_data_raw'], 16384) >= 1).astype(int) # get 2^14 bit, interpret as True or False
    data['amp_settle_data'] = (np.bitwise_and(data['stim_data_raw'], 8192) >= 1).astype(int) # get 2^13 bit, interpret as True or False
    data['stim_polarity'] = (1 - (2*(np.bitwise_and(data['stim_data_raw'], 256) >> 8))).astype(int) # get 2^8 bit, interpret as +1 for 0_bit or -1 for 1_bit
    
    curr_amp = np.bitwise_and(data['stim_data_raw'], 255) # get least-significant 8 bits corresponding to the current amplitude
    data['stim_data'] = curr_amp * data['stim_polarity'] # multiply current amplitude by the correct sign

def check_for_gaps(t_amplifier, previous_num_gaps, previous_timestamp, chunk_idx):
    """ Check for gaps in timestamp data
    
    Parameters
    ----------
    t_amplifier : numpy.ndarray
        1D numpy array containing previously read timestamp data
    previous_num_gaps : int
        After this function call, how many gaps have been found 
    previous_timestamp : int
        Last timestamp of the previous chunk
    chunk_idx : int
        Index of which chunk is currently being converted (if this is 0, the first chunk has no previous data to consult)
    
    Returns
    -------
    previous_timestamp : int
        Last timestamp of this chunk to pass along to the next chunk for continuity between chunks
    num_gaps : int
        After this function call, how many gaps have been found
    """
    # Check for gaps across this whole chunk
    num_gaps = previous_num_gaps + np.sum(np.not_equal(t_amplifier[1:]-t_amplifier[:-1], 1))
    
    # Handle seam case between the previous chunk and this one
    if chunk_idx > 0:
        if t_amplifier[0] - previous_timestamp != 1:
            num_gaps = num_gaps + 1
            
    # Save this chunk's last timestamp for the next iteration
    previous_timestamp = t_amplifier[-1]
    
    return previous_timestamp, num_gaps

def scale(header, data, file_format):
    """ Scale data arrays from the read integer values to appropriate SI units
    
    Parameters
    ----------
    header : dict
        Dict containing previously read header information
    data : dict
        Dict with fields containing data that needs to be scaled
    file_format : str
        Which file format this read is following - 'traditional', 'per_signal_type', or 'per_channel'
    
    Returns
    -------
    None
    """
    scale_timestamps(header, data)
    scale_data(header, data, file_format)

def scale_timestamps(header, data):
    """ Scale all timestamps arrays in data to seconds, with the correct sample rate for each signal type.
    
    Parameters
    ----------
    header : dict
        Dict containing previously read header information
    data : dict
        Dict with fields containing data. In this case, timestamp data like data['t_amplifier'] can be written to
        
    Returns
    -------
    None
    """
    # Divide int timestamp data by the sample rate in Hz to get timestamp data in seconds
    t_key = 't_amplifier' if header['filetype'] == 'rhd' else 't'
    base_timestamps = data[t_key] / header['sample_rate']
    
    # Amplifiers are sampled at the base sample rate, so all timestamps should be included
    data[t_key] = base_timestamps
    
    # Only for .rhd files are multiple timestamp vectors used
    if header['filetype'] == 'rhs':
        return
    
    # Aux inputs are sampled 4x slower than the base sample rate, so every 4th timestamp should be included
    t_aux_range = range(0, len(base_timestamps), 4)
    data['t_aux_input'] = base_timestamps[t_aux_range]
    
    # Supply voltages are sampled 60x or 128x slower than the base sample rate, so only one timestamp per data block should be included
    t_supply_range = range(0, len(base_timestamps), header['num_samples_per_data_block'])
    data['t_supply_voltage'] = base_timestamps[t_supply_range]
    
    # Analog inputs are sampled at the base sample rate, so all timestamps should be included
    data['t_board_adc'] = base_timestamps
    
    # Digital inputs/outputs are sampled at the base sample rate, so all timestamps should be included
    data['t_dig'] = base_timestamps
    
    # Temp sensors are sampled at the same rate as supply voltages
    data['t_temp_sensor'] = data['t_supply_voltage']
    
def scale_data(header, data, file_format):
    """ Scale data arrays from the read integer values to appropriate units
    
    Parameters
    ----------
    header: dict
        Dict containing previously read header information
    data : dict
        Dict with fields containing data that must be scaled
    file_format : str
        Which file format this read is following - 'traditional', 'per_signal_type', or 'per_channel'
    
    Returns
    -------
    None
    """
    # Scale amplifier data to Volts
    if file_format == 'traditional':
        data['amplifier_data'] = 1.95e-7 * (data['amplifier_data'].astype('float32') - 32768)
    else:
        data['amplifier_data'] = 1.95e-7 * data['amplifier_data'].astype('float32')
        
    if header['lowpass_present']:
        data['lowpass_data'] = 1.95e-7 * data['lowpass_data'].astype('float32')
        
    if header['highpass_present']:
        data['highpass_data'] = 1.95e-7 * data['highpass_data'].astype('float32')
        
    if header['filetype'] == 'rhd':

        # Scale aux input data to Volts
        data['aux_input_data'] = 37.4e-6 * data['aux_input_data']

        # Scale supply voltage data to Volts
        data['supply_voltage_data'] = 74.8e-6 * data['supply_voltage_data']
            
        # Scale temp sensor data to deg C
        data['temp_sensor_data'] = data['temp_sensor_data'] / 100
        
        # Scale analog input data to Volts
        if header['board_mode'] == 1:
            data['board_adc_data'] = 152.59e-6 * (data['board_adc_data'].astype(np.int32) - 32768)
        elif header['board_mode'] == 13:
            data['board_adc_data'] = 312.5e-6 * (data['board_adc_data'].astype(np.int32) - 32768)
        else:
            data['board_adc_data'] = 50.354e-6 * data['board_adc_data']
        
    else:
        
        # Scale stim data to Amps
        data['stim_data'] = header['stim_step_size'] * data['stim_data']
        
        # If present, scale DC amp data to Volts
        if header['dc_amplifier_data_saved']:
            data['dc_amplifier_data'] = -0.01923 * (data['dc_amplifier_data'].astype(np.int32) - 512)
            
        data['board_adc_data'] = 312.5e-6 * (data['board_adc_data'].astype(np.int32) - 32768) # units = volts
        data['board_dac_data'] = 312.5e-6 * (data['board_dac_data'].astype(np.int32) - 32768) # units = volts
    
def process_wideband(header, chunk_idx, data, previous_samples):
    """ Process wideband data prior to final write, applying a notch filter if appropriate
    
    Parameters
    ----------
    header : dict
        Dict containing previously read header information
    chunk_idx : int
        Index of which chunk is currently being converted (if this is 0, the first chunk has no previous data to consult)
    data : dict
        Dict with fields containing data that must be processed
    previous_samples : list
        List of last samples of previous chunk, used for allowing notch filter to be continuous across chunks
    Returns
    -------
    wideband_filter_string :
        String describing how the wideband data has been filtered, used for writing to NWB later
    previous_samples : list
        List of last samples of this chunk, used for allowing notch filter to be continuous across chunks
    """
    wideband_filter_string = 'Wideband data'
    # If the software notch filter was selected during recording, apply the same notch filter to amplifier data here.
    # But don't do this for v3.0+ files (from Intan RHX software) because RHX saves notch-filtered data.
    if header['notch_filter_frequency'] > 0 and header['version']['major'] < 3:
        for channel in range(header['num_amplifier_channels']):
            continue_previous = False if chunk_idx == 0 else True
            data['amplifier_data'][channel,:] = notch_filter(data['amplifier_data'][channel,:],
                                                             header['sample_rate'],
                                                             header['notch_filter_frequency'],
                                                             10,
                                                             continue_previous,
                                                             previous_samples[channel * 2],
                                                             previous_samples[channel * 2 + 1]
                                                            )
            previous_samples[channel * 2] = data['amplifier_data'][channel, -2]
            previous_samples[channel * 2 + 1] = data['amplifier_data'][channel, -1]
        wideband_filter_string = 'Wideband data, filtered through a ' + str(header['notch_filter_frequency']) + ' Hz IIR notch filter'
    return wideband_filter_string, previous_samples

def notch_filter(in_array, f_sample, f_notch, bandwidth, continue_previous, second_to_last, last):
    """ Implement a notch filter (e.g., for 50 or 60 Hz) on input vector.
    
    Example:  If neural data was sampled at 30 kSamples/sec and you wish to implement a 60 Hz notch filter:
    out_array = notch_filter(in_array, 3000, 60, 10, false, None, None)
    
    Parameters
    ----------
    in_array : numpy.ndarray
        1D array containing unfiltered data that should have a notch filter applied to it
    f_sample : float
        Sample rate of data (Hz or Samples/sec)
    f_notch : float or int
        Filter notch frequency (Hz)
    bandwidth : float or int
        Notch 3-dB bandwidth (Hz). A bandwidth of 10 Hz is recommended for 50 or 60 Hz notch filters;
        narrower bandwidths lead to poor time-domain properties with an extended ringing response to transient disturbances.
    continue_previous : bool
        Whether this filter is continuous with earlier data, which should be stored in previous_samples
    second_to_last : float
        Second to last sample used for continuous filtering if continue_previous is True
    last : float
        Last sample used for continuous filtering if continue_previous is True
        
    Returns
    -------
    out_array : numpy.ndarray
        1D array containing notch-filtered data
    """
    t_step = 1.0/f_sample
    f_c = f_notch*t_step
    
    L = len(in_array)
    
    # Calculate IIR filter parameters
    d = math.exp(-2.0*math.pi*(bandwidth/2.0)*t_step)
    b = (1.0 + d*d) * math.cos(2.0*math.pi*f_c)
    a0 = 1.0
    a1 = -b
    a2 = d*d
    a = (1.0 + d*d)/2.0
    b0 = 1.0
    b1 = -2.0 * math.cos(2.0*math.pi*f_c)
    b2 = 1.0
    
    out_array = np.zeros(len(in_array))
    if continue_previous:
        out_array[0] = second_to_last
        out_array[1] = last
    else:
        out_array[0] = in_array[0]
        out_array[1] = in_array[1]
    # (If filtering a continuous data stream, change out_array[0:1] to the
    #  previous final two values of out_array.)

    # Run filter
    for i in range(2,L):
        out_array[i] = (a*b2*in_array[i-2] + a*b1*in_array[i-1] + a*b0*in_array[i] - a2*out_array[i-2] - a1*out_array[i-1])/a0

    return out_array