From 1b836dedc5da1dbbc7987c9674c0f5cc4d087e73 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 25 Oct 2024 14:35:28 -0700 Subject: [PATCH] update merge_overlapping images --- src/coastsat/SDS_download.py | 303 ++++++++++++----------------------- 1 file changed, 105 insertions(+), 198 deletions(-) diff --git a/src/coastsat/SDS_download.py b/src/coastsat/SDS_download.py index 0f128c7..723a88b 100644 --- a/src/coastsat/SDS_download.py +++ b/src/coastsat/SDS_download.py @@ -2021,8 +2021,7 @@ def filter_S2_collection(im_list): return im_list_flt - -def merge_overlapping_images(metadata, inputs): +def merge_overlapping_images(metadata,inputs): """ Merge simultaneous overlapping images that cover the area of interest. When the area of interest is located at the boundary between 2 images, there @@ -2069,22 +2068,20 @@ def merge_overlapping_images(metadata, inputs): """ # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images) - sat = "S2" - filepath = os.path.join(inputs["filepath"], inputs["sitename"]) - filenames = metadata[sat]["filenames"] + sat = 'S2' + filepath = os.path.join(inputs['filepath'], inputs['sitename']) + filenames = metadata[sat]['filenames'] total_images = len(filenames) - # nested function def duplicates_dict(lst): "return duplicates and indices" - def duplicates(lst, item): - return [i for i, x in enumerate(lst) if x == item] + return [i for i, x in enumerate(lst) if x == item] - return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1) + return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1) # first pass on images that have the exact same timestamp - duplicates = duplicates_dict([_.split("_")[0] for _ in filenames]) + duplicates = duplicates_dict([_.split('_')[0] for _ in filenames]) # {"S2-2029-2020": [0,1,2,3]} # {"duplicate_filename": [indices of duplicated files]"} @@ -2097,57 +2094,31 @@ def duplicates(lst, item): fn_im, polygons, im_epsg = [], [], [] for index in range(len(idx_dup)): # image names - fn_im.append( - [ - os.path.join(filepath, "S2", "10m", filenames[idx_dup[index]]), - os.path.join( - filepath, - "S2", - "20m", - filenames[idx_dup[index]].replace("10m", "20m"), - ), - os.path.join( - filepath, - "S2", - "60m", - filenames[idx_dup[index]].replace("10m", "60m"), - ), - os.path.join( - filepath, - "S2", - "meta", - filenames[idx_dup[index]] - .replace("_10m", "") - .replace(".tif", ".txt"), - ), - ] - ) - try: + fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[idx_dup[index]]), + os.path.join(filepath, 'S2', '20m', filenames[idx_dup[index]].replace('10m','20m')), + os.path.join(filepath, 'S2', '60m', filenames[idx_dup[index]].replace('10m','60m')), + os.path.join(filepath, 'S2', 'meta', filenames[idx_dup[index]].replace('_10m','').replace('.tif','.txt'))]) + try: # bounding polygons polygons.append(SDS_tools.get_image_bounds(fn_im[index][0])) - im_epsg.append(metadata[sat]["epsg"][idx_dup[index]]) + im_epsg.append(metadata[sat]['epsg'][idx_dup[index]]) except AttributeError: - print( - "\n Error getting the TIF. Skipping this iteration of the loop" - ) + print("\n Error getting the TIF. Skipping this iteration of the loop") continue except FileNotFoundError: - print(f"\n The file {fn_im[index][0]} did not exist") + print(f"\n The file {fn_im[index][0]} did not exist") continue - # check if epsg are the same, print a warning message if len(np.unique(im_epsg)) > 1: - print( - "WARNING: there was an error as two S2 images do not have the same epsg," - + " please open an issue on Github at https://github.com/kvos/CoastSat/issues" - + " and include your script so I can find out what happened." - ) + print('WARNING: there was an error as two S2 images do not have the same epsg,'+ + ' please open an issue on Github at https://github.com/kvos/CoastSat/issues'+ + ' and include your script so I can find out what happened.') # find which images contain other images contain_bools_list = [] - for i, poly1 in enumerate(polygons): + for i,poly1 in enumerate(polygons): contain_bools = [] - for k, poly2 in enumerate(polygons): - if k == i: + for k,poly2 in enumerate(polygons): + if k == i: contain_bools.append(True) # print('%d*: '%k+str(poly1.contains(poly2))) else: @@ -2162,21 +2133,21 @@ def duplicates(lst, item): for i in [_ for _ in range(len(idx_dup)) if not _ == idx_keep]: # print('removed %s'%(fn_im[i][-1])) # remove the 3 .tif files + the .txt file - for k in range(4): + for k in range(4): os.chmod(fn_im[i][k], 0o777) os.remove(fn_im[i][k]) total_removed_step1 += 1 # load metadata again and update filenames - metadata = get_metadata(inputs) - filenames = metadata[sat]["filenames"] - + metadata = get_metadata(inputs) + filenames = metadata[sat]['filenames'] + # find the pairs of images that are within 5 minutes of each other and merge them - time_delta = 5 * 60 # 5 minutes in seconds - dates = metadata[sat]["dates"].copy() + time_delta = 5*60 # 5 minutes in seconds + dates = metadata[sat]['dates'].copy() pairs = [] - for i, date in enumerate(metadata[sat]["dates"]): + for i,date in enumerate(metadata[sat]['dates']): # dummy value so it does not match it again - dates[i] = pytz.utc.localize(datetime(1, 1, 1) + timedelta(days=i + 1)) + dates[i] = pytz.utc.localize(datetime(1,1,1) + timedelta(days=i+1)) # calculate time difference time_diff = np.array([np.abs((date - _).total_seconds()) for _ in dates]) # find the matching times and add to pairs list @@ -2185,14 +2156,14 @@ def duplicates(lst, item): continue else: idx_dup = np.where(boolvec)[0][0] - pairs.append([i, idx_dup]) - total_merged_step2 = len(pairs) + pairs.append([i,idx_dup]) + total_merged_step2 = len(pairs) # because they could be triplicates in S2 images, adjust the pairs for consecutive merges - for i in range(1, len(pairs)): - if pairs[i - 1][1] == pairs[i][0]: - pairs[i][0] = pairs[i - 1][0] - - # check also for quadruplicates and remove them + for i in range(1,len(pairs)): + if pairs[i-1][1] == pairs[i][0]: + pairs[i][0] = pairs[i-1][0] + + # check also for quadruplicates and remove them pair_first = [_[0] for _ in pairs] idx_remove_pair = [] for idx in np.unique(pair_first): @@ -2200,93 +2171,59 @@ def duplicates(lst, item): n_duplicates = sum(pair_first == idx) # if more than 3 duplicates, delete the other images so that a max of 3 duplicates are handled if n_duplicates > 2: - for i in range(2, n_duplicates): + for i in range(2,n_duplicates): # remove the last image: 3 .tif files + the .txt file idx_last = [pairs[_] for _ in np.where(pair_first == idx)[0]][i][-1] - fn_im = [ - os.path.join(filepath, "S2", "10m", filenames[idx_last]), - os.path.join( - filepath, "S2", "20m", filenames[idx_last].replace("10m", "20m") - ), - os.path.join( - filepath, "S2", "60m", filenames[idx_last].replace("10m", "60m") - ), - os.path.join( - filepath, - "S2", - "meta", - filenames[idx_last].replace("_10m", "").replace(".tif", ".txt"), - ), - ] - for k in range(4): + fn_im = [os.path.join(filepath, 'S2', '10m', filenames[idx_last]), + os.path.join(filepath, 'S2', '20m', filenames[idx_last].replace('10m','20m')), + os.path.join(filepath, 'S2', '60m', filenames[idx_last].replace('10m','60m')), + os.path.join(filepath, 'S2', 'meta', filenames[idx_last].replace('_10m','').replace('.tif','.txt'))] + for k in range(4): os.chmod(fn_im[k], 0o777) - os.remove(fn_im[k]) + os.remove(fn_im[k]) # store the index of the pair to remove it outside the loop idx_remove_pair.append(np.where(pair_first == idx)[0][i]) # remove quadruplicates from list of pairs pairs = [i for j, i in enumerate(pairs) if j not in idx_remove_pair] - + # for each pair of image, first check if one image completely contains the other # in that case keep the larger image. Otherwise merge the two images. - for i, pair in enumerate(pairs): + for i,pair in enumerate(pairs): # get filenames of all the files corresponding to the each image in the pair fn_im = [] for index in range(len(pair)): - fn_im.append( - [ - os.path.join(filepath, "S2", "10m", filenames[pair[index]]), - os.path.join( - filepath, - "S2", - "20m", - filenames[pair[index]].replace("10m", "20m"), - ), - os.path.join( - filepath, - "S2", - "60m", - filenames[pair[index]].replace("10m", "60m"), - ), - os.path.join( - filepath, - "S2", - "meta", - filenames[pair[index]] - .replace("_10m", "") - .replace(".tif", ".txt"), - ), - ] - ) + fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]), + os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m','20m')), + os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m','60m')), + os.path.join(filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m','').replace('.tif','.txt'))]) # get polygon for first image - try: + try: polygon0 = SDS_tools.get_image_bounds(fn_im[0][0]) - im_epsg0 = metadata[sat]["epsg"][pair[0]] + im_epsg0 = metadata[sat]['epsg'][pair[0]] except AttributeError: - print("\n Error getting the TIF. Skipping this iteration of the loop") + print("\n Error getting the TIF. Skipping this iteration of the loop") continue except FileNotFoundError: - print(f"\n The file {fn_im[index][0]} did not exist") + print(f"\n The file {fn_im[index][0]} did not exist") continue # get polygon for second image - try: + try: polygon1 = SDS_tools.get_image_bounds(fn_im[1][0]) - im_epsg1 = metadata[sat]["epsg"][pair[1]] + im_epsg1 = metadata[sat]['epsg'][pair[1]] except AttributeError: - print("\n Error getting the TIF. Skipping this iteration of the loop") + print("\n Error getting the TIF. Skipping this iteration of the loop") continue except FileNotFoundError: - print(f"\n The file {fn_im[index][0]} did not exist") - continue + print(f"\n The file {fn_im[index][0]} did not exist") + continue # check if epsg are the same if not im_epsg0 == im_epsg1: - print( - "WARNING: there was an error as two S2 images do not have the same epsg," - + " please open an issue on Github at https://github.com/kvos/CoastSat/issues" - + " and include your script so we can find out what happened." - ) + print('WARNING: there was an error as two S2 images do not have the same epsg,'+ + ' please open an issue on Github at https://github.com/kvos/CoastSat/issues'+ + ' and include your script so we can find out what happened.') break # check if one image contains the other one - if polygon0.contains(polygon1): + if polygon0.contains(polygon1): # if polygon0 contains polygon1, remove files for polygon1 for k in range(4): # remove the 3 .tif files + the .txt file os.chmod(fn_im[1][k], 0o777) @@ -2295,66 +2232,46 @@ def duplicates(lst, item): continue elif polygon1.contains(polygon0): # if polygon1 contains polygon0, remove image0 - for k in range(4): # remove the 3 .tif files + the .txt file + for k in range(4): # remove the 3 .tif files + the .txt file os.chmod(fn_im[0][k], 0o777) os.remove(fn_im[0][k]) # print('removed 0') # adjust the order in case of triplicates - if i + 1 < len(pairs): - if pairs[i + 1][0] == pair[0]: - pairs[i + 1][0] = pairs[i][1] + if i+1 < len(pairs): + if pairs[i+1][0] == pair[0]: pairs[i+1][0] = pairs[i][1] continue # otherwise merge the two images after masking the nodata values else: for index in range(len(pair)): # read image - ( - im_ms, - georef, - cloud_mask, - im_extra, - im_QA, - im_nodata, - ) = SDS_preprocess.preprocess_single(fn_im[index], sat, False, "C02") + im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn_im[index], sat, False, 'C02') # in Sentinel2 images close to the edge of the image there are some artefacts, # that are squares with constant pixel intensities. They need to be masked in the # raster (GEOTIFF). It can be done using the image standard deviation, which # indicates values close to 0 for the artefacts. if len(im_ms) > 0: # calculate image std for the first 10m band - im_std = SDS_tools.image_std(im_ms[:, :, 0], 1) + im_std = SDS_tools.image_std(im_ms[:,:,0],1) # convert to binary im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) # dilate to fill the edges (which have high std) mask10 = morphology.dilation(im_binary, morphology.square(3)) # mask the 10m .tif file (add no_data where mask is True) - SDS_tools.mask_raster(fn_im[index][0], mask10) - # for the newer versions just resample the mask for the 10m bands - else: - # create mask for the 20m band (SWIR1) by resampling the 10m one - mask20 = ndimage.zoom(mask10, zoom=1 / 2, order=0) - mask20 = transform.resize( - mask20, - im_extra.shape, - mode="constant", - order=0, - preserve_range=True, - ) - mask20 = mask20.astype(bool) + SDS_tools.mask_raster(fn_im[index][0], mask10) + # create mask for the 20m band (SWIR1) by resampling the 10m one + mask20 = ndimage.zoom(mask10,zoom=1/2,order=0) + mask20 = transform.resize(mask20, im_extra.shape, mode='constant', + order=0, preserve_range=True) + mask20 = mask20.astype(bool) # mask the 20m .tif file (im_extra) SDS_tools.mask_raster(fn_im[index][1], mask20) # create a mask for the 60m QA band by resampling the 20m one - mask60 = ndimage.zoom(mask20, zoom=1 / 3, order=0) - mask60 = transform.resize( - mask60, - im_QA.shape, - mode="constant", - order=0, - preserve_range=True, - ) + mask60 = ndimage.zoom(mask20,zoom=1/3,order=0) + mask60 = transform.resize(mask60, im_QA.shape, mode='constant', + order=0, preserve_range=True) mask60 = mask60.astype(bool) # mask the 60m .tif file (im_QA) - SDS_tools.mask_raster(fn_im[index][2], mask60) + SDS_tools.mask_raster(fn_im[index][2], mask60) # make a figure for quality control/debugging # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) # fig,ax= plt.subplots(2,3,tight_layout=True) @@ -2369,69 +2286,59 @@ def duplicates(lst, item): # ax[0,2].imshow(im_QA) # ax[0,2].set_title('Im QA') # ax[1,2].imshow(im_nodata) - # ax[1,2].set_title('Im nodata') + # ax[1,2].set_title('Im nodata') else: continue - + # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge - fn_merged = os.path.join(filepath, "merged.tif") - for k in range(3): + fn_merged = os.path.join(filepath, 'merged.tif') + for k in range(3): # merge masked bands - gdal_merge.main( - ["", "-o", fn_merged, "-n", "0", fn_im[0][k], fn_im[1][k]] - ) + gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][k], fn_im[1][k]]) # remove old files os.chmod(fn_im[0][k], 0o777) os.remove(fn_im[0][k]) os.chmod(fn_im[1][k], 0o777) os.remove(fn_im[1][k]) # rename new file - fn_new = fn_im[0][k].split(".")[0] + "_merged.tif" + fn_new = fn_im[0][k].split('.')[0] + '_merged.tif' os.chmod(fn_merged, 0o777) os.rename(fn_merged, fn_new) - + # open both metadata files metadict0 = dict([]) - with open(fn_im[0][3], "r") as f: - metadict0["filename"] = f.readline().split("\t")[1].replace("\n", "") - metadict0["acc_georef"] = float( - f.readline().split("\t")[1].replace("\n", "") - ) - metadict0["epsg"] = int(f.readline().split("\t")[1].replace("\n", "")) + with open(fn_im[0][3], 'r') as f: + metadict0['filename'] = f.readline().split('\t')[1].replace('\n','') + metadict0['acc_georef'] = float(f.readline().split('\t')[1].replace('\n','')) + metadict0['epsg'] = int(f.readline().split('\t')[1].replace('\n','')) metadict1 = dict([]) - with open(fn_im[1][3], "r") as f: - metadict1["filename"] = f.readline().split("\t")[1].replace("\n", "") - metadict1["acc_georef"] = float( - f.readline().split("\t")[1].replace("\n", "") - ) - metadict1["epsg"] = int(f.readline().split("\t")[1].replace("\n", "")) + with open(fn_im[1][3], 'r') as f: + metadict1['filename'] = f.readline().split('\t')[1].replace('\n','') + metadict1['acc_georef'] = float(f.readline().split('\t')[1].replace('\n','')) + metadict1['epsg'] = int(f.readline().split('\t')[1].replace('\n','')) # check if both images have the same georef accuracy - if np.any( - np.array([metadict0["acc_georef"], metadict1["acc_georef"]]) == -1 - ): - metadict0["georef"] = -1 + if np.any(np.array([metadict0['acc_georef'],metadict1['acc_georef']]) == -1): + metadict0['georef'] = -1 # add new name - metadict0["filename"] = metadict0["filename"].split(".")[0] + "_merged.tif" + metadict0['filename'] = metadict0['filename'].split('.')[0] + '_merged.tif' # remove the old metadata.txt files os.chmod(fn_im[0][3], 0o777) os.remove(fn_im[0][3]) os.chmod(fn_im[1][3], 0o777) - os.remove(fn_im[1][3]) + os.remove(fn_im[1][3]) # rewrite the .txt file with a new metadata file - fn_new = fn_im[0][3].split(".")[0] + "_merged.txt" - with open(fn_new, "w") as f: + fn_new = fn_im[0][3].split('.')[0] + '_merged.txt' + with open(fn_new, 'w') as f: for key in metadict0.keys(): - f.write("%s\t%s\n" % (key, metadict0[key])) - + f.write('%s\t%s\n'%(key,metadict0[key])) + # update filenames list (in case there are triplicates) - filenames[pair[0]] = metadict0["filename"] - - print( - "%d out of %d Sentinel-2 images were merged (overlapping or duplicate)" - % (total_removed_step1 + total_merged_step2, total_images) - ) + filenames[pair[0]] = metadict0['filename'] + + print('%d out of %d Sentinel-2 images were merged (overlapping or duplicate)'%(total_removed_step1+total_merged_step2, + total_images)) # update the metadata dict metadata_updated = get_metadata(inputs) - return metadata_updated + return metadata_updated \ No newline at end of file