sd.py

"""
Import python libraries
"""
from dataclasses import dataclass, field
from typing import Dict, List, Tuple
from itertools import groupby
from operator import itemgetter

"""
Import other libraries
"""
import numpy as np
import numpy.typing as npt
import scipy.interpolate as interp
from scipy.signal import find_peaks, argrelmin
from astropy.convolution import Gaussian1DKernel
from lmfit import Model                                 # Necessary for curve fitting
from lmfit.models import LinearModel   # Necessary for curve fitting

"""
Import user libraries
"""
from fit import lmfit_exp_gaus_single, FitVar
from utils import (bin, date_to_data, get_flare_class, get_snr, merge_ranges,
                   remove_ranges, nan_ranges, smoothen, nan_helper, isSetupCorrect)

globdate = 'date'

"""
Define (external / exposed) parameters
Intended to be modified by user to determine best fit by use-case
"""
@dataclass
class Config:
    date: str       # Lightcurve date for initialisation
    gen: bool       # Is file generated by user (yes: define file path in utils.py
                    # no: code will use standard 1s cadence lightcurves
                    # f'$PWD/data/XSM_Extracted_LightCurve/ch2_xsm_{date}_v1_level2.lc')
    super_smoothening_kernel: List[int] = \
        field(default_factory = lambda: [10,10])
                    # @ashwin
    threshold_mergepeaks: int = 120     # To merge neightbouring flare regions
                                        # Must be higher than post-binning cadence
    threshold_duration: int = 120       # Minimum duration of region to be 
                                        # acknowledged as a region
    threshold_snr_region: int = 1       # Minimum SNR of flare to be acknowledged
                                        # as a region
    threshold_upturns: int = 3          # Number of upturns allowed in region
                                        # identification step
    nsigma: float = 0.3                 # n-value in n-sigma estimation of 
                                        # peak significance

"""
Define internal parameters
Not intended to be modified without understanding code functionalities
"""
@dataclass
class InternalConfig:
    binning_size: int = 12              # Keep it even! Size of datapoint bins
    smoothening_kernel: int = 2         # Size of Gaussian Kernel for 
                                        # smoothening lightcurve
    init_prominence: int = 10           # Prominence argument for initial peak
                                        # detection using `scipy.signal.find_peaks`
    bgest_prominence: List[int] = \
        field(default_factory = lambda: [10, 10])   # @aswhin renamed quantity for prominence in `MultiIterBg`
    fit_binning_size: int = 1
    threshold_redchisq: List[float] = field(default_factory = lambda: [5.0,30.0])    
                                        # Maximum numerical value of reduced chi square
                                        # for terminating addition of more peaks to 
                                        # flare fit function
                                        # [0] -> for single flare, [1] -> for multi flare
    threshold_rsquared: float = 0.8     # Minimum numerical value of R-squared fit
                                        # for terminating addition of more peaks to 
                                        # flare fit function
    m1: List[float] = field(default_factory = lambda: [0.01, 0.01])           # @ashwin
    m2: List[float] = field(default_factory = lambda: [0.01, 0.01])           # @ashwin
    aggression: float = 1               # Default set to 1. For more aggression to try and 
                                        # detect sub-A class flares, set < 1
                                        # Meant to be equal to least count of detector

"""
Flare class definition
Used in creation of object instance of a flare in a lightcurve
Inputs:
    1. Guess for initial fit
    2. Flare timespan
    3. Flare counts in timespan
    4. Flare count errors in timespace
    5. Flare background (either linear or constant array (not a scalar))
        estimate from curve fitting
Inputs (optional):
    6. Multi flare region flag @devansh
    7. Multi flare flag @devansh
    8. Internal config file to be used
Outputs: 
    None
"""
@dataclass
class Flare:
    fit: FitVar                 # @devansh
    snr: float                  # Flare SNR
    complex_region: int
    multi_flare_region: bool    # @devansh
    multi_flare_flag: bool      # @devansh
    iconfig: InternalConfig = field(repr=False) # Importing internal config values

    flare_class_with_bg: str = field(init=False)
    flare_class_without_bg: str = field(init=False)

    complex_flag: bool = False

    """
    Flare class init function
    """
    def __init__(self, fit: FitVar, time: npt.NDArray[np.int32], counts: npt.NDArray[np.float64], cnterror: npt.NDArray[np.float64], background: npt.NDArray[np.float64], multi_flare_region: bool = False, multi_flare_flag: bool = False, complex_region: int = -1, iconfig: InternalConfig = InternalConfig()) -> None:
        self.fit = fit
        self.time = time
        self.counts = counts
        self.cnterror = cnterror
        self.background = background        

        self.multi_flare_region = multi_flare_region
        self.multi_flare_flag = multi_flare_flag
        self.complex_region = complex_region
        self.iconfig = iconfig
        self.date = globdate

        # Array indices of start and end times
        idx_start = np.where(self.time == self.fit.post_fit_start_time)[0][0]
        idx_end = np.where(self.time == self.fit.post_fit_end_time)[0][0]

        # Calculate flare SNR
        self.snr = get_snr(self.counts[idx_start:idx_end], self.background[idx_start:idx_end])
        self.snr = np.nansum(self.snr)

        # Calculate flare class (using fitted curve, not original data points)
        self.flare_class_with_bg = get_flare_class(self.fit.post_fit_peak_count + np.mean(self.background)) # Flare object comes in with background separated
        self.flare_class_without_bg = get_flare_class(self.fit.post_fit_peak_count)

    """
    @devansh
    """
    def dump(self) -> Dict[str, np.int32 | np.float64 | str | bool]:
        data = {
            'date': globdate,

            'post_fit_start_time': self.fit.post_fit_start_time,
            'post_fit_start_count': self.fit.post_fit_start_count.round(2),
            'post_fit_peak_time': self.fit.post_fit_peak_time,
            'post_fit_peak_count': self.fit.post_fit_peak_count.round(2),
            'post_fit_end_time': self.fit.post_fit_end_time,
            'post_fit_end_count': self.fit.post_fit_end_count.round(2),

            'complex_region': self.complex_region,  # len(idx) from find_peaks with prom=0.05
            'active_region': self.complex_flag,
            'multi_flare_region_flag': self.multi_flare_region, # Is it a multi flare region?
            'multi_flare_unresolved_flag': self.multi_flare_flag, # Did flare pass r-squared and redchisq requirements?
            'flare_class_with_bg': self.flare_class_with_bg,
            'flare_class_without_bg': self.flare_class_without_bg,
            'mean_background': np.mean(self.background).round(2),
            
            'flare amplitude': self.fit.A.round(2),
            'fit_param_Aprime': self.fit.pfp.Aprime.round(2), # A = A' * sigma / tau * sqrt(pi / 2)
            'fit_param_mu': self.fit.pfp.mu.round(2),
            'fit_param_sigma': self.fit.pfp.sigma.round(2),
            'fit_param_tau': self.fit.pfp.tau.round(2),
            'redchisq': self.fit.redchisq.round(3), 
            'rsquared': self.fit.rsquared.round(3),
            'snr': self.snr.round(2),

            'pre_fit_start_time': self.fit.pre_fit_start_time,
            'pre_fit_start_count': self.fit.pre_fit_start_count.round(2),
            'pre_fit_peak_time': self.fit.pre_fit_peak_time,
            'pre_fit_peak_count': self.fit.pre_fit_peak_count.round(2),
            'pre_fit_end_time': self.fit.pre_fit_end_time,
            'pre_fit_end_count': self.fit.pre_fit_end_count.round(2),

            'flag': self.fit.flag, # What is this @devansh?
        }
        return data

"""
Region class definition (Paper refers to 'regions' as 'flaring durations')
Code uses 'region' throughout to be self-consistent. May be updated in 
    future revisions
Used in creation of object instance of a region in a lightcurve
Inputs:
    1. Region timespan
    2. Region counts in timespan
    3. Region count errors in timespace
    4. Region background (constant scalar (note the difference between 
        flare and region background))
Input (optional):
    5. Internal config file to be used
Outputs: 
    None
"""
@dataclass
class Region:
    iconfig: InternalConfig = field(repr=False)
    background: float

    time: npt.NDArray[np.int32] = field(repr=False)
    counts: npt.NDArray[np.float64] = field(repr=False)

    nonancounts: npt.NDArray[np.float64] = field(repr=False, init=False)

    start_time: int = field(init=False)
    peak_time: int = field(init=False)
    end_time: int = field(init=False)
    snr: float = field(init=False)
    numflares: int = 0

    complex_flag: bool = False

    flares: List[Flare] = field(init=False)

    def __init__(self, time: npt.NDArray[np.int32], counts: npt.NDArray[np.float64], cnterror: npt.NDArray[np.float64], complex_flag: bool = False, iconfig: InternalConfig = InternalConfig()) -> None:
        self.iconfig = iconfig
        self.time = bin(time, self.iconfig.fit_binning_size)
        self.counts = bin(counts, self.iconfig.fit_binning_size)
        self.cnterror = bin(cnterror, self.iconfig.fit_binning_size)
        self.complex_flag = complex_flag
        self.date = globdate

        # Preprocessing the region has been overriden in favour of 
        # mathematically accurate curve-fitting
        # TODO: Region codes will be updated to remove all self.ppcounts (now self.nonancounts)
        # Using np.array to copy by value and not copy by reference
        self.nonancounts = np.array(self.counts)

        try:
            self.start_time = time[0]   # Region start time
            self.peak_time = self.time[np.argmax(self.counts)]  # Region peak time
            self.end_time = time[-1]    # Region end time
        except IndexError:
            self.start_time = -1
            self.peak_time = -1
            self.end_time = -1
            print('Region initialisation has an IndexError')
            return
        
        # Function to remove nan values in region counts
        nans, tmp_x = nan_helper(self.nonancounts)
        if (np.sum(~nans) <= 2):
            print('Region has too few non-NaNs')
            return
        self.nonancounts[nans] = np.interp(tmp_x(nans), tmp_x(~nans), self.nonancounts[~nans])    # TODO: This is where nans are linearly interpolated
        self.wherenans = nans
        # Multiflare fitting and flare list creation
        self.flares = self.FlareDecomposition()
        if (complex_flag == True):
            for flare in self.flares:
                flare.complex_flag = True

        # Extract background from lmfit
        # And calculate SNR
        if self.fit_result != None:
            self.background = (self.fit_result.eval_components())['bg_']
            self.snr = get_snr((self.nonancounts - self.background)[~self.wherenans], self.background[~self.wherenans])    # Region SNR
        else:
            self.background = np.mean(self.nonancounts)
            self.snr = get_snr(self.nonancounts[~self.wherenans] - self.background, self.background)    # Region SNR
        
        self.snr = np.nansum(self.snr)

    """
    Function to add extra peaks based on heuristics
    (Maybe put it under `utils.py`)
    Inputs:
        1. Flare prefix for unique identification
        2. Initial amplitude estimate
        3. Initial peak time estimate
    Inputs (optional):
        4. Initial flare width estimate
        5. Initial flare decay constant estimate
    Outputs:
        1. `lmfit` Model associated with ExpGaus fitting
        2. Initial parameter values encapsulated within `lmfit` Model
    """
    def add_peak(self, prefix, A, mu, sigma=20.0, tau=128.0):
        p = Model(lmfit_exp_gaus_single, prefix=prefix)
        pars = p.make_params()
        pars[prefix + 'Aprime'].set(A, min=0)
        pars[prefix + 'mu'].set(value=mu, vary=True, min=mu-600, max=mu+300)
        pars[prefix + 'sigma'].set(sigma, min=0)
        pars[prefix + 'tau'].set(tau, min=0) # 28.05.23 Removed tau max value of 600. I think strongly overlapping flares are a problem
                                             # My fault that I don't remember why I added it in the first place
        return p, pars

    """
    Multiflare fitting using `lmfit` and some heuristics
    Inputs: 
        None
    Output: 
        1. List of flare objects
    """
    def FlareDecomposition(self) -> List[Flare]:
        flares = []     # Initialise empty list of flare objects

        # Find and sort 'significant' peaks within region by prominence
        # Significance is thresholded by `prominence` argument
        peaks, prom = find_peaks(self.nonancounts / np.max(self.nonancounts),
                                 prominence=0.05, distance=10)      # This prominence is another parameter that can be toggled to 
                                                                    # make the code more aggressive TODO
        idx = peaks[np.argsort(-prom['prominences'])]               # Negate prominences to make argsort perform descending sort

        # Initialise `lmfit` Model with (effectively) constant background (See 3 lines below for details)
        bg = LinearModel(prefix='bg_')
        # params = bg.guess(self.counts, x=self.time) # (Guesses and) Creates Model parameters so that they can be user-initialised
        params = bg.make_params()
        params['bg_intercept'].set(np.mean(self.nonancounts), min=0)
        params['bg_slope'].set(value=0, vary=False) # Background model is effectively a constant as slope parameter is (currently) 
                                                    # not allowed to vary

        model = bg      # Initialising region fit model with the background model
        result = None   # Initialising lmfit curve-fitting result to None (to workaround edge case: failure to detect any flares)

        # Loop to add heuristically-determined #flares within region
        # Maximum number of flares = 4; (i = 0,1,2,3)
        for i, index in enumerate(idx):
            prev_result = result
            p, pars = self.add_peak(prefix=f'p{i+1}_', A=self.nonancounts[index], mu=self.time[index])
            params.update(pars)
            model_tmp = model + p # Adding flare to (constant) background model

            # Attempt to fit current model (with (i+1) #flares) to region
            try:
                result = model_tmp.fit(self.nonancounts, params, x=self.time)
            # If fit fails, exception handling
            except ValueError:
                # If it fails at first peak
                if i == 0:
                    print("First peak causing issues. Modifying sigma guess")
                    # This larger sigma value helps in cases where big flares are close to each other such that a single flare fit
                    # is a bad choice. The ideal "start" condition itself would be a double flare fit, hence we try to 
                    # encapsulate both flares into one larger envelope instead of modifying the initial 'numflares' guess
                    p, pars = self.add_peak(prefix=f'p{i+1}_', A=self.nonancounts[index], mu=self.time[index], sigma=300.0)
                    params.update(pars)
                    # Attempt peak fit with modified sigma value
                    try:
                        result = model_tmp.fit(self.nonancounts, params, x=self.time) 
                        continue # If modified sigma guess works, continue instead of break
                        # TODO: this should not continue and go back to top of loop. It should ideally go through the statements
                        # after the `except ValueError` block
                    # If fit fails again, break
                    except ValueError:
                        print("Modifying sigma guess is not enough")
                # If fit fails for second or more peaks, break with i #flares
                else:
                    print("Extra peak causing issues")
                    # result = prev_result        # Works around a subset of situations when new peak fit parameters are initialised
                                                # but the fitting process fails. TODO: There are still other situations this may 
                                                # happen in
                break
            
            # TODO: Brainstorm ideal add_peaks termination heuristics
            # If resulting amplitude is 'too small', break from loop 1. to stop adding more peaks 
            # 2. before assigning model_tmp (Updated #flares) to model (from previous loop iteration)
            # if result.values[f'p{i+1}_Aprime'] * result.values[f'p{i+1}_sigma'] / result.values[f'p{i+1}_tau'] * np.sqrt(np.pi/2) < 3:#np.nanmax(self.cnterror[index-5 : index+5]): 
                                            # TODO: Change amplitude threshold to something based on cnterror (still consult Abhilash)
            if (result.values[f'p{i+1}_Aprime'] < 2.0) or (result.values[f'p{i+1}_tau'] > 1e7):# or \
                # (result.params[f'p{i+1}_sigma'].value * 1000 < result.params[f'p{i+1}_sigma'].stderr): # self.cnterror[np.round(, -1)]
                if (i == 0) and (result.values[f'p{i+1}_Aprime'] > 0.1) and (result.values[f'p{i+1}_tau'] < 1e7):
                    print(f"Not removing small amplitude but single peak. Amp: {result.values[f'p{i+1}_Aprime'].round(2)}")
                    model = model_tmp   # TODO: something is going wrong here. recheck logic
                    break
                print(f"Removing small amplitude peak. Amp: {result.values[f'p{i+1}_Aprime'].round(2)}, tau: {result.values[f'p{i+1}_tau'].round(2)}")
                result = prev_result
                break
            model = model_tmp
            print(i+1)
            # If #flares exceeds 4 or (reduced chi square *and* r-squared thresholds are satisfied), 
            # or (#flares is 1 and 'short' highest count value), break from loop
            # to stop adding more peaks
            if i > 2:
                print("Exited after adding 4 peaks")
                break
            elif i == 0:
                if (np.max(self.nonancounts) < 10):  # i.e., don't try to decompose further
                    print("Short region. Exited after adding 1 peak")
                    break
                if (result.redchi < self.iconfig.threshold_redchisq[0] and result.rsquared > self.iconfig.threshold_rsquared):
                    print(f"Exited as first redchisq threshold is satisfied. Redchisq: {result.redchi}")
                    break
            else:
                if (result.redchi < self.iconfig.threshold_redchisq[1] and result.rsquared > self.iconfig.threshold_rsquared):
                    print(f"Exited as second redchisq threshold is satisfied. Redchisq: {result.redchi}")
                    break

        # If fit_result is None, then no peaks have been added to model!
        if result != None:
            # Attempt linear background model fit
            try:
                params['bg_slope'].set(value=0, vary=True)
                result_tmp = model.fit(self.nonancounts, params, x=self.time)
                if result_tmp.rsquared > result.rsquared and result_tmp.values['bg_slope'] > 1e-4: # If linear BG model fit is both better and has a 'significant' slope
                                                                                                   # TODO: Find appropriate slope significance value
                    result = result_tmp
            # If linear background fit fails, revert to using constant background model
            except ValueError:
                print("Linear background causing issues")

        complex_region = len(idx)
        # Flags variables @devansh (verify if this is correct)
        multi_flare_region = False      # is it a multiflare region?
        multi_flare_flag = False        # are all flares properly decomposed (i.e., do they meet redchisq and rsquared thresholds)

        # Save fit_result to instance of Region class
        self.fit_result = result        # TODO: Correct result is being added to self.fit_result. Sometimes, excess peak variables (which were probably removed) are still present
        # Calculate number of flares in given region
        self.numflares = len(model.components) - 1

        # Equivalent to checking if fit_result is None
        if self.numflares < 1:
            # assert False, "No flares found in region"
            print(f"No flares found in region {self.time[0]} using lmfit")    # Update: If no flares are found, it is mostly because
                                                                                # the amplitude threshold was not satisfied. The flare
                                                                                # fit using lmfit itself typically passes, so there is
                                                                                # no need to try out scipy.optimize.curve_fit
            return flares
        elif self.numflares > 1:
            multi_flare_region = True   # Set multiflare region flag
        if not(result.redchi < self.iconfig.threshold_redchisq[1] and result.rsquared > self.iconfig.threshold_rsquared):
            multi_flare_flag = True     # Set multiflare (properly fitted) flag

        # Populate the flares list
        # i is for indexing individual flares (having its own expgaus fit) in the region 
        # i == 0 corresponds to background
        comps = result.eval_components()
        for i, (name, comp) in enumerate(comps.items()):
            # Skip the background model
            if (i == 0):
                continue
            # TODO: The following two lines only exist to make the code backcompatible. We will remove perfitparam in later commit
            popt = np.array([result.values[name+'Aprime'], result.values[name+'mu'], result.values[name+'sigma'], result.values[name+'tau']])
            var: FitVar = FitVar(True, popt, self.time[~self.wherenans], (self.nonancounts - result.best_fit + comp)[~self.wherenans], self.cnterror[~self.wherenans])
            flares.append(Flare(fit=var, time=self.time[~self.wherenans], counts=(self.nonancounts - result.best_fit + comp)[~self.wherenans], 
                                cnterror=self.cnterror[~self.wherenans], background=comps['bg_'], multi_flare_region=multi_flare_region, 
                                multi_flare_flag=multi_flare_flag, complex_region=complex_region, iconfig=self.iconfig))
        return flares


"""
Lightcurve class definition (Paper refers to 'fmarks' as 'flaring sectors')
Used in creation of object instance of a region in a lightcurve
Input:
    1. Config file to be used
Input (optional):
    2. Internal config file to be used
Outputs: 
    None
    (Creates a lightcurve object (Use <object_name>.`tab` to see all features / attributes))
"""
@dataclass
class LightCurve:

    config: Config
    iconfig: InternalConfig = field(repr=False)

    time: npt.NDArray[np.int32] = field(repr=False, init=False)
    counts: npt.NDArray[np.float64] = field(repr=False, init=False)

    pptime: npt.NDArray[np.int32] = field(repr=False, init=False)     # Preprocessed time after binning
    ppcounts: npt.NDArray[np.float64] = field(repr=False, init=False) # Preprocessed time after binning followed by smoothening

    scpeaks: npt.NDArray[np.int32] = field(repr=False, init=False)    # Preliminary 'flaring sector' peaks identified by scipy.
                                                                      # signal.find_peaks with prominence=config.init_prominence

    regions: List[Region] = field(init=False)   # List of regions in lightcurve
    llist: List[List[int]] = field(init=False)  # List of lists of merged region pptime indices

    numflares: int = 0  # Number of solar flares in lightcurve

    def __init__(self, config: Config, iconfig: InternalConfig = InternalConfig()) -> None:
        # @ashwin Ensure that bgest_prominence, m1 and m2 lists are of the same length as super_smoothening_kernel
        self.config = config
        self.iconfig = iconfig
        global globdate
        globdate = self.config.date

        # Verify correctness of config and iconfig
        # isSetupCorrect(self.config, self.iconfig)

        # Inputs light curve data corresponding to given date
        self.time, self.counts, self.cnterror, self.added_days, self.len_added_days = date_to_data(config.date, config.gen)

        # Preprocessing input data
        self.pptime = bin(self.time, self.iconfig.binning_size)
        self.ppcounts = smoothen(bin(self.counts, self.iconfig.binning_size), self.iconfig.smoothening_kernel).astype(np.float64)
        self.ppcnterror = bin(self.cnterror, self.iconfig.binning_size)

        print(f'Lightcurve {self.config.date}')

        # If there is a large contiguous section of NaNs (data-taking gaps),
        # this function identifies and fixes them
        self.IdGaps()

        self.scpeaks, _ = find_peaks(self.ppcounts, prominence=self.iconfig.init_prominence) # Find preliminary peaks
        self.MultiIterBg()  # Perform iterative background estimation
        self.UnstitchLc()    # Undo light curve stitching (but retain intended background estimate)
        self.scpeaks, _ = find_peaks(self.ppcounts, prominence=self.iconfig.init_prominence) # Calling this once again to ensure that peaks 
                                                                                             # are only within current date
        self.llist = self.IdRegion()    # List of lists of merged region pptime indices
        
        # Calculate total number of flares in lightcurve
        # TODO: Can add lightcurve related properties like max flare SNR in lc,...
        for i in range(len(self.regions)):
            self.numflares += self.regions[i].numflares

    # Gap identifier: https://stackoverflow.com/questions/2154249/identify-groups-of-consecutive-numbers-in-a-list
    def IdGaps(self):
        idx = np.where(np.isnan(self.counts))[0]
        ranges = []
        bin_size = self.iconfig.binning_size
        min_range = len(np.array(Gaussian1DKernel(self.iconfig.smoothening_kernel))) * bin_size

        for k,g in groupby(enumerate(idx),lambda x:x[0]-x[1]):
            group = (map(itemgetter(1),g))
            group = list(map(int,group))
            
            if group[-1] - group[0] > min_range:
                ranges.append((group[0],group[-1]+1))
        
        if ranges == [] or \
            np.all(np.isnan(self.counts[ranges[0][0] : ranges[0][-1]])) or \
            np.abs(ranges[0][-1] - ranges[0][0] - len(np.where(np.isnan(self.counts))[0])) <= 1 :
            return

        ranges = np.array(ranges) // 12
        # Linearly interpolate big data taking gaps
        for i in range(len(ranges)):
            sub = 1
            nans, tmp_x = nan_helper(self.ppcounts[ranges[i][0] - sub : ranges[i][-1] + sub])
            (self.ppcounts[ranges[i][0] - sub : ranges[i][-1] + sub])[nans] = np.interp(tmp_x(nans), tmp_x(~nans), \
                (self.ppcounts[ranges[i][0] - sub : ranges[i][-1] + sub])[~nans])
        return

    # NOAA-based lightcurve region finder
    # @ashwin (replicate input/output comments similar to other function definitions)
    """
    <FourPeaks one-line description>
    Inputs:
        1. time
        2. counts
        3. peaks
    Inputs (Optional):
        1. m1
        2. m2
    Outputs:
        1. pmarks
        2. fmarks
    """
    def FourPeaks(self, time, counts, peaks, m1=0.003, m2=0.003):
        pmarks = np.array([], dtype=int)  # range
        fmarks = []  # start and end tuples
        for peak in peaks:
            pos = peak
            while ((pos > 2) and counts[pos] > counts[pos - 1] > counts[pos - 2] and (counts[pos] - counts[pos - 2]) / (time[pos] - time[pos - 2]) > m1):
                pos -= 1
            st_pos = pos - 2  # -2 is part of algorithm
            pos = peak
            while ((pos < len(counts) - 3) and counts[pos + 2] < counts[pos + 1] < counts[pos] and (counts[pos] - counts[pos + 2]) / (time[pos + 2] - time[pos]) > m2):
                pos += 1
            end_pos = pos + 2
            pmarks = np.append(pmarks, np.arange(st_pos, end_pos))
            fmarks.append([st_pos, end_pos])
        return pmarks, np.array(fmarks)

    # Lightcurve background finder
    # @ashwin (replicate input/output comments similar to other function definitions)
    """
    <FindBg one-line description>
    Inputs:
        1. time
        2. counts
        3. peaks
        4. iteration_num
        5. sskernel
    Outputs:
        1. bgtime
        2. bgcounts
        3. nobgcounts
    """
    def FindBg(self, time: npt.NDArray[np.int32], counts: npt.NDArray[np.float64], peaks: npt.NDArray[np.int32], iteration_num : int, sskernel: int) -> Tuple[npt.NDArray[np.int32], npt.NDArray[np.float64], npt.NDArray[np.float64]]:
        # Identify Ranges (Flare Durations) to prepare for background estimation
        index, fmarks = self.FourPeaks(time, counts, peaks, m1=self.iconfig.m1[iteration_num], m2 = self.iconfig.m2[iteration_num])
        #ASHWIN COMMENT - Slope thresholds are varying with iteration number.. the exact function should be experimented with.
        index = merge_ranges(index, fmarks, time, self.config.threshold_mergepeaks)

        # Remove Ranges (Flare Durations) to proceed to background estimation
        # Large flares will act as contaminants to background estimation
        # Mark `index` elements in time and counts as np.nan so that they can be interpolated
        noregiontime, noregioncounts = nan_ranges(index, time, counts)

        # Setting bgtime as np.nan(sskernel//2)__remove_ranges__np.nan(-sskernel//2)
        # Using np.array to copy by value and not copy by reference
        bgtime = np.array(noregiontime)
        bgcounts = np.zeros(len(bgtime))
        bgcounts[sskernel:-sskernel] = smoothen(noregioncounts, sskernel)[sskernel:-sskernel]
        
        bgcounts[:2*sskernel] = np.nanmean(noregioncounts[:2*sskernel])
        bgcounts[-2*sskernel:] = np.nanmean(noregioncounts[-2*sskernel - 1])

        if len(np.where(bgcounts > 0)[0]) == 0:
            raise Exception(f'No Background Detected on {self.config.date}')
        # Interpolating Removed Ranges (Flare Durations) 
        # Also after this step, bgcounts and nobgcounts will have no more NaNs
        ## Turns out, bgcountsinterp + bgest and modified bgcounts give the exact same result
        bgcountsinterp = interp.interp1d(np.arange(bgcounts.size), bgcounts)
        bgest = bgcountsinterp(np.linspace(0, bgcounts.size - 1, self.ppcounts.size))
        nans, tmp_x = nan_helper(bgcounts)
        bgcounts[nans] = np.interp(tmp_x(nans), tmp_x(~nans), bgcounts[~nans])

        # Subtracting interpolated background counts from preprocessed counts
        nobgcounts = self.ppcounts - bgcounts
        nobgcounts[np.where(nobgcounts < self.iconfig.aggression)] = 0
        # TODO: To make the detection rate more aggressive, can set < 1 to < 0.1.
        # Right now the idea stems from Poissonian error associated with 1 count is 1
        
        return bgtime, bgcounts, nobgcounts

    # @ashwin (replicate input/output comments similar to other function definitions)
    """
    <MultiIterBg one-line description>
    Inputs:
        None
    Outputs:
        None
    """
    def MultiIterBg(self) -> None:
        # Using np.array to copy by value and not copy by reference
        time = np.array(self.pptime)
        counts = np.array(self.ppcounts)
        peaks = np.array(self.scpeaks)
        bgtime = np.array([])
        bgcounts = np.array([])
        nobgcounts = np.array([])

        for iteration_num, sskernel in enumerate(self.config.super_smoothening_kernel):
            bgtime, bgcounts, nobgcounts = self.FindBg(time, counts, peaks, iteration_num ,sskernel=sskernel)
            # Next two lines are required for paper_plots.ipynb
            if iteration_num == 0:
                self.tmp_bgcounts = np.array(bgcounts)
            time = bgtime
            counts = bgcounts
            # Updated with enumerate
            peaks, _ = find_peaks(counts, prominence=self.iconfig.init_prominence) #ASHWIN COMMENT - Over iterations, prominence parameter for find peaks is increasing
                                                                                    # TODO: Why?

        # Note: nobgcounts has the same number of data points as pptime
        self.bgtime = bgtime
        self.bgcounts = bgcounts
        self.nobgcounts = nobgcounts

    ## ASHWIN FUNCTION to reduce the day to a single day after the bg estimation process.
    # @ashwin (replicate input/output comments similar to other function definitions)
    """
    <reduce_len_to_one_day one-line description>
    Inputs:
        None
    Outputs:
        None
    """
    def UnstitchLc(self) -> None:
        allowed_indices_og = np.where((self.time >= 0) & (self.time < 86400))
        allowed_indices_pp = np.where((self.pptime >= 0) & (self.pptime < 86400))
        self.time = self.time[allowed_indices_og]
        self.counts = self.counts[allowed_indices_og]
        self.cnterror = self.cnterror[allowed_indices_og]

        # scpeaks are not adapted according to one day. Recalculate the 
        # peaks if they need to be used

        self.pptime = self.pptime[allowed_indices_pp]
        self.ppcounts = self.ppcounts[allowed_indices_pp]
        self.ppcnterror = self.ppcnterror[allowed_indices_pp]

        self.bgcounts = self.bgcounts[allowed_indices_pp]
        self.bgtime = self.bgtime[allowed_indices_pp]
        self.nobgcounts = self.nobgcounts[allowed_indices_pp]

    """
    Find contiguous lists of indices (after walk down) of regions present in light curve
    Inputs:
        None
    Output:
        1. List of lists of merged region pptime indices
    """
    def IdRegion(self) -> List[List[int]]:
        sigma = float(np.std(self.bgcounts))
        index = np.where(self.nobgcounts > max(self.config.nsigma * sigma, self.iconfig.aggression))  # Calculating pptime indices which exceed nsigma*sigma threshold
        # TODO: To make the detection more aggressive, can set max of threshold and 0.1. 
        # Right now the idea stems from Poissonian error associated with 1 count is 1
        llist = []      # List of lists of region indices
        slist = []      # temporary list to store list of contiguous numbers in variable `index`
        self.regions = []
        i = 1
        while (i < np.size(index)):
            if (index[0][i] == index[0][i - 1] + 1) and (i != np.size(index) - 1):
                slist.append(index[0][i - 1])
            else:
                if len(slist) > 0:
                    left = slist[0]     # start pptime index where current potential region exceeds nsigma*sigma threshold
                    right = slist[-1]   # end pptime endex where current potential region exceeds nsigma*sigma threshold
                    flag = 0            # Number of upturns so far in the walkdown (variable is reset to zero after left walkdown)
                    # Performing left walk down
                    for j in range(left, 1, -1):
                        if (self.ppcounts[j] < self.ppcounts[j + 1]):   # If ppcounts is continuously increasing (as we go leftwards), 
                                                                        # keep adding index to slist
                            if (self.ppcounts[j] <= self.iconfig.aggression):     # If ppcounts touches base level, break from walk down
                                break
                            slist.append(j)
                        else:                               # Upturn detected
                            flag += 1                       # Increment number of upturns variable
                            if flag == self.config.threshold_upturns:   # Break if number of upturns exceeds upturns threshold
                                flag = 0
                                break
                    # Performing right walk down
                    for j in range(right, np.size(self.pptime) - 1, 1): # If ppcounts is continuously increasing (as we go rightwards), 
                                                                        # keep adding index to slist
                        if (self.ppcounts[j] > self.ppcounts[j + 1]):
                            if (self.ppcounts[j] <= self.iconfig.aggression):     # If ppcounts touches base level, break from walk down
                                break
                            slist.append(j)
                        else:                               # Upturn detected
                            flag += 1                       # Increment number of upturns variable
                            if flag == self.config.threshold_upturns:   # Break if number of upturns exceeds upturns threshold
                                break
                    slist.sort()                            # Because we used slist.append earlier, we need to sort the list of indices
                    left = slist[0]     # update the start pptime index to reflect start time of candidate region
                    right = slist[-1]   # update the end pptime index to reflect end time of candidate region
                    if (self.pptime[right] - self.pptime[left]) > self.config.threshold_duration:   # Does duration of candidate region exceed duration threshold?
                                                                                                    # If not, discard slist
                        if len(self.regions) != 0:
                            if llist[-1][-1] in slist:      # If last element of slist of previous candidate lies in current slist
                                                            # then merge the previous slist with current slist and pop the 
                                                            # (previous?) candidate region # TODO: Is it popping current or previous?
                                llist[-1].extend([*set(slist)]) # Merge with previous slist
                                self.regions.pop()          # Pop double counted candidate
                                pptstartindex = llist[-1][0]
                                pptendindex = llist[-1][-1]
                            else:
                                llist.append([*set(slist)]) # Same as previous except that a new candidate region is added llist
                                pptstartindex = slist[0]
                                pptendindex = slist[-1]
                        else:
                            llist.append([*set(slist)])     # Same as previous else clause. TODO: merge the two else blocks through a different if condition (if possible)
                            pptstartindex = slist[0]
                            pptendindex = slist[-1]
                    
                        tstartindex = np.argmin(np.abs(self.pptime[pptstartindex] - self.time))  # Find start lc.time closest to start lc.pptime
                        tendindex = np.argmin(np.abs(self.pptime[pptendindex] - self.time)) + 1   # Find end lc.time closest to end lc.pptime

                        print(f'\nCandidate Region tstart = {self.time[tstartindex]}s')
                        # Create region object
                        if (self.time[tendindex] - self.time[tstartindex] > 14400):#or ((np.max(self.counts) > 1e4) and (self.time[tendindex] - 
                                                                                   #                                     self.time[tstartindex] > 7200)):
                            print(f"We are in this {self.time[tendindex] - self.time[tstartindex]}")
                            localminima = argrelmin(self.ppcounts[pptstartindex:pptendindex])[0]   # Find indices of local minima in long region
                            localminima = (pptstartindex + localminima) * 12 + 5          # Convert (cropped) pptime indices to time indices
                            print(f'Length of localminima {len(localminima)}')

                            if len(localminima) != 0:
                                #################################################
                                # Append new Region objects to self.regions lists
                                #################################################
                                self.regions.append(Region(time=self.time[tstartindex:localminima[0]],
                                                        counts=self.counts[tstartindex:localminima[0]],
                                                        cnterror=self.cnterror[tstartindex:localminima[0]],
                                                        complex_flag=True,
                                                        iconfig=self.iconfig
                                                        ))
                                # If new region has no flares, delete it
                                if (self.regions[-1].numflares == 0):
                                    print(f'No flares detected at t={int(self.regions[-1].start_time)}, hence, removed')
                                    self.regions.pop()
                                    ## Don't pop llist
                                # If new region has very low SNR, delete it. Update: 11.6.23 Don't delete it for now. nanmedian(get_snr()) may delete good flare like in 20200311
                                elif (self.regions[-1].snr < self.config.threshold_snr_region):
                                    print(f'Low SNR region at t={int(self.regions[-1].start_time)}. SNR: {self.regions[-1].snr.round(2)}')
                                ###
                                for lmiter in range(len(localminima) - 1):
                                    self.regions.append(Region(time=self.time[localminima[lmiter]:localminima[lmiter+1]],
                                                                counts=self.counts[localminima[lmiter]:localminima[lmiter+1]],
                                                                cnterror=self.cnterror[localminima[lmiter]:localminima[lmiter+1]],
                                                                complex_flag=True,
                                                                iconfig=self.iconfig
                                                                ))
                                    # If new region has no flares, delete it
                                    if (self.regions[-1].numflares == 0):
                                        print(f'No flares detected at t={int(self.regions[-1].start_time)}, hence, removed')
                                        self.regions.pop()
                                        ## Don't pop llist
                                    # If new region has very low SNR, delete it. Update: 11.6.23 Don't delete it for now. nanmedian(get_snr()) may delete good flare like in 20200311
                                    elif (self.regions[-1].snr < self.config.threshold_snr_region):
                                        print(f'Low SNR region at t={int(self.regions[-1].start_time)}. SNR: {self.regions[-1].snr.round(2)}')
                                ###
                                self.regions.append(Region(time=self.time[localminima[-1]:tendindex],
                                                        counts=self.counts[localminima[-1]:tendindex],
                                                        cnterror=self.cnterror[localminima[-1]:tendindex],
                                                        complex_flag=True,
                                                        iconfig=self.iconfig
                                                        ))
                                # If new region has no flares, delete it
                                if (self.regions[-1].numflares == 0):
                                    print(f'No flares detected at t={int(self.regions[-1].start_time)}, hence, removed')
                                    self.regions.pop()
                                    ## Don't pop llist
                                # If new region has very low SNR, delete it. Update: 11.6.23 Don't delete it for now. nanmedian(get_snr()) may delete good flare like in 20200311
                                elif (self.regions[-1].snr < self.config.threshold_snr_region):
                                    print(f'Low SNR region at t={int(self.regions[-1].start_time)}. SNR: {self.regions[-1].snr.round(2)}')
                                ################################################
                            else:
                                self.regions.append(Region(time=self.time[tstartindex:tendindex],
                                                        counts=self.counts[tstartindex:tendindex],
                                                        cnterror=self.cnterror[tstartindex:tendindex],
                                                        iconfig=self.iconfig
                                                        ))
                                # If new region has no flares, delete it
                                if (self.regions[-1].numflares == 0):
                                    print(f'No flares detected at t={int(self.regions[-1].start_time)}, hence, removed')
                                    self.regions.pop()
                                    ## Don't pop llist
                                # If new region has very low SNR, delete it. Update: 11.6.23 Don't delete it for now. nanmedian(get_snr()) may delete good flare like in 20200311
                                elif (self.regions[-1].snr < self.config.threshold_snr_region):
                                    print(f'Low SNR region at t={int(self.regions[-1].start_time)}. SNR: {self.regions[-1].snr.round(2)}')

                        else:
                            self.regions.append(Region(time=self.time[tstartindex:tendindex],
                                                    counts=self.counts[tstartindex:tendindex],
                                                    cnterror=self.cnterror[tstartindex:tendindex],
                                                    iconfig=self.iconfig
                                                    ))
                            # If new region has no flares, delete it
                            if (self.regions[-1].numflares == 0):
                                print(f'No flares detected at t={int(self.regions[-1].start_time)}, hence, removed')
                                self.regions.pop()
                                llist.pop(-1)
                            # If new region has very low SNR, delete it. Update: 11.6.23 Don't delete it for now. nanmedian(get_snr()) may delete good flare like in 20200311
                            elif (self.regions[-1].snr < self.config.threshold_snr_region):
                                print(f'Low SNR region at t={int(self.regions[-1].start_time)}. SNR: {self.regions[-1].snr.round(2)}')
                slist = []              # Reset slist to prepare to evaluate next candidate region
            i += 1
            # Go back and check while loop condition
        return llist

    def dump(self) -> List[Dict[str, np.int32 | np.float64 | str | bool]]:
        data = []
        for region in self.regions:
            for flare in region.flares:
                data.append(flare.dump())
        return data