Skip to content

Commit

Permalink
Merge pull request #352 from WilhelmusLab/350-pairing-extra-feats
Browse files Browse the repository at this point in the history
350 pairing extra feats
  • Loading branch information
cpaniaguam authored Nov 28, 2023
2 parents 290d7bd + ec0c674 commit 03eb811
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 94 deletions.
1 change: 1 addition & 0 deletions src/IceFloeTracker.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ using HDF5
export HDF5, PyCall
export DataFrames, DataFrame, nrow, Not, select!
export Dates, Time, Date, DateTime, @dateformat_str
export addlatlon!, getlatlon, convertcentroid!, converttounits!, dropcols!

include("utils.jl")
include("persist.jl")
Expand Down
75 changes: 65 additions & 10 deletions src/tracker/tracker-funcs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct MatchingProps
end

"""
Container for matched pairs of floes. `props1` and `props2` are dataframes with the same column names as the input dataframes. `ratios` is a dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_under`, and `corr` for similarity ratios. `dist` is a vector of (pixel) distances between paired floes.
Container for matched pairs of floes. `props1` and `props2` are dataframes with the same column names as the input dataframes. `ratios` is a dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_mismatch`, and `corr` for similarity ratios. `dist` is a vector of (pixel) distances between paired floes.
"""
struct MatchedPairs
props1::DataFrame
Expand All @@ -23,7 +23,7 @@ end
"""
MatchedPairs(df)
Return an object of type `MatchedPairs` with an empty dataframe with the same column names as `df`, an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_under`, and `corr` for similarity ratios, and an empty vector for distances.
Return an object of type `MatchedPairs` with an empty dataframe with the same column names as `df`, an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_mismatch`, and `corr` for similarity ratios, and an empty vector for distances.
"""
function MatchedPairs(df)
emptypropsdf = similar(df, 0)
Expand Down Expand Up @@ -163,7 +163,7 @@ end
"""
makeemptydffrom(df::DataFrame)
Return an object with an empty dataframe with the same column names as `df` and an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_under`, and `corr` for similarity ratios.
Return an object with an empty dataframe with the same column names as `df` and an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_mismatch`, and `corr` for similarity ratios.
"""
function makeemptydffrom(df::DataFrame)
return MatchingProps(
Expand All @@ -174,15 +174,15 @@ end
"""
makeemptyratiosdf()
Return an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_under`, and `corr` for similarity ratios.
Return an empty dataframe with column names `area`, `majoraxis`, `minoraxis`, `convex_area`, `area_mismatch`, and `corr` for similarity ratios.
"""
function makeemptyratiosdf()
return DataFrame(;
area=Float64[],
majoraxis=Float64[],
minoraxis=Float64[],
convex_area=Float64[],
area_under=Float64[],
area_mismatch=Float64[],
corr=Float64[],
)
end
Expand Down Expand Up @@ -258,19 +258,19 @@ function callmatchcorr(conditions)
end

"""
isfloegoodmatch(conditions, mct, area_under, corr)
isfloegoodmatch(conditions, mct, area_mismatch, corr)
Return `true` if the floes are a good match as per the set thresholds. Return `false` otherwise.
# Arguments
- `conditions`: tuple of booleans for evaluating the conditions
- `mct`: tuple of thresholds for the match correlation test
- `area_under` and `corr`: values returned by `match_corr`
- `area_mismatch` and `corr`: values returned by `match_corr`
"""
function isfloegoodmatch(conditions, mct, area_under, corr)
function isfloegoodmatch(conditions, mct, area_mismatch, corr)
return (
(conditions.cond3 && area_under < mct.area3) ||
(conditions.cond2 && area_under < mct.area2)
(conditions.cond3 && area_mismatch < mct.area3) ||
(conditions.cond2 && area_mismatch < mct.area2)
) && corr > mct.corr
end

Expand Down Expand Up @@ -497,3 +497,58 @@ function addfloemasks!(props, imgs)
end
return nothing
end

## LatLon functions originally from IFTPipeline.jl

"""
convertcentroid!(propdf, latlondata, colstodrop)
Convert the centroid coordinates from row and column to latitude and longitude dropping unwanted columns specified in `colstodrop` for the output data structure. Addionally, add columns `x` and `y` with the pixel coordinates of the centroid.
"""
function convertcentroid!(propdf, latlondata, colstodrop)
latitude, longitude = [
[latlondata[c][x, y] for
(x, y) in zip(propdf.row_centroid, propdf.col_centroid)] for
c in ["latitude", "longitude"]
]

x, y = [
[latlondata[c][z] for z in V] for
(c, V) in zip(["Y", "X"], [propdf.row_centroid, propdf.col_centroid])
]

propdf.latitude = latitude
propdf.longitude = longitude
propdf.x = x
propdf.y = y
dropcols!(propdf, colstodrop)
return nothing
end

"""
converttounits!(propdf, latlondata, colstodrop)
Convert the floe properties from pixels to kilometers and square kilometers where appropiate. Also drop the columns specified in `colstodrop`.
"""
function converttounits!(propdf, latlondata, colstodrop)
if nrow(propdf) == 0
dropcols!(propdf, colstodrop)
return nothing
end
convertcentroid!(propdf, latlondata, colstodrop)
x = latlondata["X"]
dx = abs(x[2] - x[1])
convertarea(area) = area * dx^2 / 1e6
convertlength(length) = length * dx / 1e3
propdf.area .= convertarea(propdf.area)
propdf.convex_area .= convertarea(propdf.convex_area)
propdf.minor_axis_length .= convertlength(propdf.minor_axis_length)
propdf.major_axis_length .= convertlength(propdf.major_axis_length)
propdf.perimeter .= convertlength(propdf.perimeter)
return nothing
end

function dropcols!(df, colstodrop)
select!(df, Not(colstodrop))
return nothing
end
202 changes: 129 additions & 73 deletions src/tracker/tracker.jl
Original file line number Diff line number Diff line change
@@ -1,41 +1,58 @@
"""
pairfloes(
segmented_imgs::Vector{BitMatrix},
props::Vector{DataFrame},
passtimes::Vector{DateTime},
dt::Vector{Float64},
condition_thresholds,
mc_thresholds,
)
addlatlon(pairedfloesdf::DataFrame, refimage::AbstractString)
Pair floes in `props[k]` to floes in `props[k+1]` for `k=1:length(props)-1`.
Add columns `latitude`, `longitude`, and pixel coordinates `x`, `y` to `pairedfloesdf`.
The main steps of the algorithm are as follows:
# Arguments
- `pairedfloesdf`: dataframe containing floe tracking data.
- `refimage`: path to reference image.
"""
function addlatlon!(pairedfloesdf::DataFrame, refimage::AbstractString)
latlondata = getlatlon(refimage)
colstodrop = [:row_centroid, :col_centroid, :min_row, :min_col, :max_row, :max_col]
converttounits!(pairedfloesdf, latlondata, colstodrop)
return nothing
end

1. Crop floes from `segmented_imgs` using bounding box data in `props`. Floes in the edges are removed.
2. For each floe_k_r in `props[k]`, compare to floe_k+1_s in `props[k+1]` by computing similarity ratios, set of `conditions`, and drift distance `dist`. If the conditions are met, compute the area mismatch `mm` and psi-s correlation `c` for this pair of floes. Pair these two floes if `mm` and `c` satisfy the thresholds in `mc_thresholds`.
3. If there are collisions (i.e. floe `s` in `props[k+1]` is paired to more than one floe in `props[k]`), then the floe in `props[k]` with the best match is paired to floe `s` in `props[k+1]`.
4. Drop paired floes from `props[k]` and `props[k+1]` and repeat steps 2 and 3 until there are no more floes to match in `props[k]`.
5. Repeat steps 2-4 for `k=2:length(props)-1`.
"""
add_passtimes!(props, passtimes)
Add a column `passtime` to each DataFrame in `props` containing the time of the image in which the floes were captured.
# Arguments
- `segmented_imgs`: array of images with segmented floes.
- `props`: array of dataframes containing floe properties.
- `props`: array of DataFrames containing floe properties.
- `passtimes`: array of `DateTime` objects containing the time of the image in which the floes were captured.
- `dt`: array of time elapsed between images in `segmented_imgs`.
- `condition_thresholds`: 3-tuple of thresholds (each a named tuple) for deciding whether to match floe `i` from day `k` to floe j from day `k+1`.
- `mc_thresholds`: thresholds for area mismatch and psi-s shape correlation.
Returns a tuple `(props, trackdata)` where `props` is a long dataframe containing floe ID's, passtimes, the original set of physical properties, and their masks and `trackdata` is a dataframe containing the floe tracking data.
"""
function pairfloes(
function add_passtimes!(props, passtimes)
for (i, passtime) in enumerate(passtimes)
props[i].passtime .= passtime
end
nothing
end

"""
sort_floes_by_area!(props)
Sort floes in `props` by area in descending order.
"""
function sort_floes_by_area!(props)
for prop in props
# sort by area in descending order
DataFrames.sort!(prop, :area; rev=true)
nothing
end
end

function _pairfloes(
segmented_imgs::Vector{BitMatrix},
props::Vector{DataFrame},
passtimes::Vector{DateTime},
dt::Vector{Float64},
condition_thresholds,
mc_thresholds,
mc_thresholds
)
dt = diff(passtimes) ./ Minute(1)

sort_floes_by_area!(props)

# Assign a unique ID to each floe in each image
Expand Down Expand Up @@ -74,17 +91,17 @@ function pairfloes(
)

if callmatchcorr(conditions)
(area_under, corr) = matchcorr(
(area_mismatch, corr) = matchcorr(
props1.mask[r], props2.mask[s], Δt; mc_thresholds.comp...
)

if isfloegoodmatch(
conditions, mc_thresholds.goodness, area_under, corr
conditions, mc_thresholds.goodness, area_mismatch, corr
)
appendrows!(
matching_floes,
props2[s, :],
(ratios..., area_under, 1 - corr),
(ratios..., area_mismatch, corr),
s,
dist,
)
Expand Down Expand Up @@ -118,72 +135,111 @@ function pairfloes(
sort!(tracked)
_pairs = tracked.data

# Concatenate horizontally props1, props2, tracked, and add dist as the last column for each item in _pairs
_pairs = [hcat(hcat(p.props1, p.props2, makeunique=true), p.ratios[:, ["area_mismatch", "corr"]]) for p in _pairs]

# Make a dict with keys in _pairs[i].props2.uuid and values in _pairs[i-1].props1.uuid
mappings = [Dict(pair.props2.uuid .=> pair.props1.uuid) for pair in _pairs]
mappings = [Dict(zip(p.uuid_1, p.uuid)) for p in _pairs]

# Convert mappings to functions
funcsfrommappings = [x -> get(mapping, x, x) for mapping in mappings]

# Compose functions in reverse order to push uuids forward
mapuuid = foldr((f, g) -> x -> f(g(x)), funcsfrommappings)

for prop in props[2:end]
prop.uuid = mapuuid.(prop.uuid)
end
# Apply mapuuid to uuid_1 in each set of props in _pairs => get consolidated uuids
[prop.uuid_0 = mapuuid.(prop.uuid_1) for prop in _pairs]

# Collect all unique uuids in props[i] to label as simple ints starting from 1
uuids = unique([uuid for prop in props for uuid in prop.uuid])

# create mapping from uuids to index
uuid2index = Dict(uuid => i for (i, uuid) in enumerate(uuids))
# Reshape _pairs to a long df
propsvert = vcat(_pairs...)
DataFrames.sort!(propsvert, [:uuid_0, :passtime])
rightcolnames = vcat([name for name in names(propsvert) if all([!(name in ["uuid_1", "psi_1", "mask_1"]), endswith(name, "_1")])], ["uuid_0"])
leftcolnames = [split(name, "_1")[1] for name in rightcolnames]
matchcolnames = ["area_mismatch", "corr", "uuid_0", "passtime", "passtime_1",]

# apply the uuid2index mapping to props
for prop in props
prop.uuid .= [uuid2index[uuid] for uuid in prop.uuid]
end
leftdf = propsvert[:, leftcolnames]
rightdf = propsvert[:, rightcolnames]
matchdf = propsvert[:, matchcolnames]
rename!(rightdf, Dict(zip(rightcolnames, leftcolnames)))

# Merge all props into one long DataFrame
propsvert = vcat(props...)
_pairs = vcat(leftdf, rightdf)

# rename uuid to ID
rename!(propsvert, :uuid => :ID)
# sort by uuid_0, passtime and keep unique rows
_pairs = DataFrames.sort!(_pairs, [:uuid_0, :passtime]) |> unique

# 2. Sort propsvert by uuid and then by passtime
DataFrames.sort!(propsvert, [:ID, :passtime])

# 3. Move ID, passtime columns to the front
propsvert = propsvert[:, unique(["ID", "passtime", names(propsvert)...])]
_pairs = leftjoin(_pairs, matchdf, on=[:uuid_0, :passtime])
DataFrames.sort!(_pairs, [:uuid_0, :passtime])

return (props = propsvert[:, names(propsvert)[1:15]], trackdata = _pairs)
# create mapping from uuids to index as ID
uuids = unique(_pairs.uuid_0)
uuid2index = Dict(uuid => i for (i, uuid) in enumerate(uuids))
_pairs.ID = [uuid2index[uuid] for uuid in _pairs.uuid_0]
_pairs = _pairs[:, [name for name in names(_pairs) if name != "uuid_0"]]
return _pairs
end


"""
add_passtimes!(props, passtimes)
Add a column `passtime` to each DataFrame in `props` containing the time of the image in which the floes were captured.
pairfloes(
segmented_imgs::Vector{BitMatrix},
props::Vector{DataFrame},
passtimes::Vector{DateTime},
latlonrefimage::AbstractString,
condition_thresholds,
mc_thresholds,
)
# Arguments
- `props`: array of DataFrames containing floe properties.
- `passtimes`: array of `DateTime` objects containing the time of the image in which the floes were captured.
Pair floes in `props[k]` to floes in `props[k+1]` for `k=1:length(props)-1`.
"""
function add_passtimes!(props, passtimes)
for (i, passtime) in enumerate(passtimes)
props[i].passtime .= passtime
end
nothing
end
The main steps of the algorithm are as follows:
"""
sort_floes_by_area!(props)
1. Crop floes from `segmented_imgs` using bounding box data in `props`. Floes in the edges are removed.
2. For each floe_k_r in `props[k]`, compare to floe_k+1_s in `props[k+1]` by computing similarity ratios, set of `conditions`, and drift distance `dist`. If the conditions are met, compute the area mismatch `mm` and psi-s correlation `c` for this pair of floes. Pair these two floes if `mm` and `c` satisfy the thresholds in `mc_thresholds`.
3. If there are collisions (i.e. floe `s` in `props[k+1]` is paired to more than one floe in `props[k]`), then the floe in `props[k]` with the best match is paired to floe `s` in `props[k+1]`.
4. Drop paired floes from `props[k]` and `props[k+1]` and repeat steps 2 and 3 until there are no more floes to match in `props[k]`.
5. Repeat steps 2-4 for `k=2:length(props)-1`.
Sort floes in `props` by area in descending order.
# Arguments
- `segmented_imgs`: array of images with segmented floes
- `props`: array of dataframes containing floe properties
- `passtimes`: array of `DateTime` objects containing the time of the image in which the floes were captured
- `condition_thresholds`: 3-tuple of thresholds (each a named tuple) for deciding whether to match floe `i` from day `k` to floe j from day `k+1`
- `mc_thresholds`: thresholds for area mismatch and psi-s shape correlation
Returns a dataframe containing the following columns:
- `ID`: unique ID for each floe pairing.
- `passtime`: time of the image in which the floes were captured.
- `area`: area of the floe in sq. kilometers
- `convex_area`: area of the convex hull of the floe in sq. kilometers
- `major_axis_length`: length of the major axis of the floe in kilometers
- `minor_axis_length`: length of the minor axis of the floe in kilometers
- `orientation`: angle between the major axis and the x-axis in radians
- `perimeter`: perimeter of the floe in kilometers
- `latitude`: latitude of the floe centroid
- `longitude`: longitude of the floe centroid
- `x`: x-coordinate of the floe centroid
- `y`: y-coordinate of the floe centroid
- `area_mismatch`: area mismatch between the two floes in row_i and row_i+1 after registration
- `corr`: psi-s shape correlation between the two floes in row_i and row_i+1
"""
function sort_floes_by_area!(props)
for prop in props
# sort by area in descending order
DataFrames.sort!(prop, :area; rev=true)
nothing
end
function pairfloes(
segmented_imgs::Vector{BitMatrix},
props::Vector{DataFrame},
passtimes::Vector{DateTime},
latlonrefimage::AbstractString,
condition_thresholds,
mc_thresholds,
)
_pairs = _pairfloes(
segmented_imgs,
props,
passtimes,
condition_thresholds,
mc_thresholds,
)
addlatlon!(_pairs, latlonrefimage)

cols = [:ID, :passtime, :area, :convex_area, :major_axis_length, :minor_axis_length, :orientation, :perimeter, :latitude, :longitude, :x, :y, :area_mismatch, :corr]
_pairs = _pairs[:, cols]
return _pairs
end
Loading

0 comments on commit 03eb811

Please sign in to comment.