From 63a6d36154be642832cdcdc2345a490af13bbb55 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Tue, 12 Mar 2024 18:28:05 +0100 Subject: [PATCH 01/12] Enable playlists based on multiple songs This commit introduces functionality for generating playlist based on a set of songs. For good performance, I also introduce a new distance measure, extended isolation distance. While the previous distance metrics, euclidean distance and cosine distance, could be made to measure distance to a set of songs, the performance will not be as good. --- Cargo.lock | 30 ++++++++++++++++++++ Cargo.toml | 3 +- src/library.rs | 45 ++++++++++++++++++++++++++++++ src/playlist.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 52f4773..c24a4d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,7 @@ dependencies = [ "bliss-audio-aubio-rs", "clap", "dirs", + "extended-isolation-forest", "ffmpeg-next", "ffmpeg-sys-next", "glob", @@ -396,6 +397,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "extended-isolation-forest" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db5193a74618ae2f7ea9c7feda2772192e0e3c04d9cbd2beb5ee9b0916d7eb3f" +dependencies = [ + "num-traits", + "rand 0.8.5", + "rand_distr", + "serde", +] + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -672,6 +685,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.0.1" @@ -889,6 +908,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1096,6 +1116,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rawpointer" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 401e17a..a142a95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ library = [ "dep:anyhow", "dep:serde_ini", "dep:serde_json", "dep:indicatif", ] -serde = ["dep:serde"] +serde = ["dep:serde", "extended-isolation-forest/serde"] [dependencies] # Until https://github.com/aubio/aubio/issues/336 is somehow solved @@ -54,6 +54,7 @@ thiserror = "1.0.40" strum = "0.24.1" strum_macros = "0.24.3" rcue = "0.1.3" +extended-isolation-forest = { version = "0.2.3", default-features = false } # Deps for the library feature serde = { version = "1.0", optional = true, features = ["derive"] } diff --git a/src/library.rs b/src/library.rs index 3fa8efa..3de0079 100644 --- a/src/library.rs +++ b/src/library.rs @@ -116,6 +116,7 @@ use crate::playlist::dedup_playlist_by_key; use crate::playlist::dedup_playlist_custom_distance_by_key; use crate::playlist::euclidean_distance; use crate::playlist::DistanceMetric; +use crate::playlist::PreTrainedSetDistanceMetric; use anyhow::{bail, Context, Result}; #[cfg(not(test))] use dirs::data_local_dir; @@ -554,6 +555,50 @@ impl Library { Ok(songs) } + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// song in the library at `song_paths`, using distance metric `distance`, + /// the sorting function `sort_by` and deduplicating if `dedup` is set to + /// `true`. + /// + /// You can use ready to use distance metrics such as + /// [extended_isolation_forest], and ready to use sorting functions like + /// [closest_to_selected_songs_by_key]. + pub fn playlist_from_many_custom( + &self, + song_paths: &[&str], + playlist_length: usize, + distance: G, + mut sort_by: F, + dedup: bool, + ) -> Result>> + where + F: FnMut(&[LibrarySong], &mut Vec>, G, fn(&LibrarySong) -> Song), + G: PreTrainedSetDistanceMetric, + { + let initial_songs: Vec> = song_paths + .iter() + .map(|s| { + self.song_from_path(s).map_err(|_| { + BlissError::ProviderError(format!("song '{s}' has not been analyzed")) + }) + }) + .collect::, BlissError>>()?; + let mut songs = self.songs_from_library()?; + sort_by( + &initial_songs, + &mut songs, + distance, + |s: &LibrarySong| s.bliss_song.to_owned(), + ); + if dedup { + dedup_playlist_by_key(&mut songs, None, |s: &LibrarySong| { + s.bliss_song.to_owned() + }); + } + songs.truncate(playlist_length); + Ok(songs) + } + /// Make a playlist of `number_albums` albums closest to the album /// with title `album_title`. /// The playlist starts with the album with `album_title`, and contains diff --git a/src/playlist.rs b/src/playlist.rs index 93477c1..e33d696 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -10,6 +10,7 @@ // TODO on the `by_key` functions: maybe Fn(&T) -> &Song is enough? Compared // to -> Song use crate::{BlissError, BlissResult, Song, NUMBER_FEATURES}; +use extended_isolation_forest::{Forest, ForestOptions}; use ndarray::{Array, Array1, Array2, Axis}; use ndarray_stats::QuantileExt; use noisy_float::prelude::*; @@ -19,6 +20,15 @@ use std::collections::HashMap; pub trait DistanceMetric: Fn(&Array1, &Array1) -> f32 {} impl DistanceMetric for F where F: Fn(&Array1, &Array1) -> f32 {} +/// Pre trained set distance metrics. +pub trait PreTrainedSetDistanceMetric { + /// Train this distance metric on the set of vectors that it should measure distance from. + fn train(&mut self, vectors: &[Array1]); + /// Return the distance from the set of vectors that this metric was trained on. Must not be + /// called before train. + fn distance(&self, vector: &Array1) -> f32; +} + /// Return the [euclidean /// distance](https://en.wikipedia.org/wiki/Euclidean_distance#Higher_dimensions) /// between two vectors. @@ -39,6 +49,48 @@ pub fn cosine_distance(a: &Array1, b: &Array1) -> f32 { 1. - similarity } +fn feature_array1_to_array(f: &Array1) -> [f32; NUMBER_FEATURES] { + f.as_slice() + .expect("Couldn't convert feature vector to slice") + .try_into() + .expect("Couldn't convert slice to array") +} + +/// Return the [extended isolation forest](https://ieeexplore.ieee.org/document/8888179) +/// score between a set of vectors and a single vector. +pub struct ExtendedIsolationForest { + forest: Option>, +} + +impl Default for ExtendedIsolationForest { + /// Create an ExtendedIsolationForest with an empty state. + fn default() -> ExtendedIsolationForest { + ExtendedIsolationForest { forest: None } + } +} + +impl PreTrainedSetDistanceMetric for ExtendedIsolationForest { + fn train(&mut self, vectors: &[Array1]) { + let opts = ForestOptions { + n_trees: 100, + sample_size: vectors.len().min(256), + max_tree_depth: None, + extension_level: 1, + }; + let a = &*vectors + .iter() + .map(feature_array1_to_array) + .collect::>(); + self.forest = Some(Forest::from_slice(a, &opts).unwrap()); + } + fn distance(&self, vector: &Array1) -> f32 { + self.forest + .as_ref() + .expect("distance() called before train()") + .score(&feature_array1_to_array(vector)) as f32 + } +} + /// Sort `songs` in place by putting songs close to `first_song` first /// using the `distance` metric. pub fn closest_to_first_song( @@ -66,6 +118,27 @@ pub fn closest_to_first_song_by_key( songs.sort_by_cached_key(|song| n32(first_song.custom_distance(&key_fn(song), &distance))); } +/// Sort `all_songs` in place by putting songs close to `selected_songs` first +/// using the `distance` metric. +/// +/// Sort songs with a key extraction function, useful for when you have a +/// structure like `CustomSong { bliss_song: Song, something_else: bool }` +pub fn closest_to_selected_songs_by_key( + selected_songs: &[T], + all_songs: &mut [T], + mut metric: impl PreTrainedSetDistanceMetric, + key_fn: F, +) where + F: Fn(&T) -> Song, +{ + let selected_songs = selected_songs + .iter() + .map(|c| key_fn(c).analysis.as_arr1()) + .collect::>(); + metric.train(&selected_songs); + all_songs.sort_by_cached_key(|song| n32(metric.distance(&key_fn(song).analysis.as_arr1()))); +} + /// Sort `songs` in place using the `distance` metric and ordering by /// the smallest distance between each song. /// From a7be203a12dcffdbdfb5538caadfb7b892e7aa79 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Tue, 12 Mar 2024 20:27:04 +0100 Subject: [PATCH 02/12] Remove *_by_key family of functions Remove code duplication by making these functions generic over AsRef instead of having separate versions. --- src/library.rs | 81 +++++------- src/playlist.rs | 331 +++++++++--------------------------------------- src/song.rs | 6 + 3 files changed, 97 insertions(+), 321 deletions(-) diff --git a/src/library.rs b/src/library.rs index 3de0079..de81152 100644 --- a/src/library.rs +++ b/src/library.rs @@ -110,10 +110,10 @@ //! [Library] to implement bliss for a MPD player. use crate::analyze_paths_with_cores; use crate::cue::CueInfo; -use crate::playlist::closest_album_to_group_by_key; -use crate::playlist::closest_to_first_song_by_key; -use crate::playlist::dedup_playlist_by_key; -use crate::playlist::dedup_playlist_custom_distance_by_key; +use crate::playlist::closest_album_to_group; +use crate::playlist::closest_to_first_song; +use crate::playlist::dedup_playlist; +use crate::playlist::dedup_playlist_custom_distance; use crate::playlist::euclidean_distance; use crate::playlist::DistanceMetric; use crate::playlist::PreTrainedSetDistanceMetric; @@ -341,6 +341,12 @@ pub struct LibrarySong { pub extra_info: T, } +impl AsRef for LibrarySong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } +} + // TODO add logging statement // TODO concrete examples // TODO example LibrarySong without any extra_info @@ -493,16 +499,14 @@ impl Library { ) -> Result>> { let first_song: LibrarySong = self.song_from_path(song_path)?; let mut songs = self.songs_from_library()?; - closest_to_first_song_by_key( - &first_song, - &mut songs, - euclidean_distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); - songs.sort_by_cached_key(|song| n32(first_song.bliss_song.distance(&song.bliss_song))); - dedup_playlist_by_key(&mut songs, None, |s: &LibrarySong| { - s.bliss_song.to_owned() + closest_to_first_song(&first_song, &mut songs, euclidean_distance); + songs.sort_by_cached_key(|song| { + n32(euclidean_distance( + &first_song.bliss_song.analysis.as_arr1(), + &song.bliss_song.analysis.as_arr1(), + )) }); + dedup_playlist(&mut songs, None); songs.truncate(playlist_length); Ok(songs) } @@ -514,7 +518,7 @@ impl Library { /// /// You can use ready to use distance metrics such as /// [euclidean_distance], and ready to use sorting functions like - /// [closest_to_first_song_by_key]. + /// [closest_to_first_song]. /// /// In most cases, you just want to use [Library::playlist_from]. /// Use `playlist_from_custom` if you want to experiment with different @@ -522,7 +526,7 @@ impl Library { /// /// Example: /// `library.playlist_from_song_custom(song_path, 20, euclidean_distance, - /// closest_to_first_song_by_key, true)`. + /// closest_to_first_song, true)`. /// TODO path here too pub fn playlist_from_custom( &self, @@ -533,23 +537,16 @@ impl Library { dedup: bool, ) -> Result>> where - F: FnMut(&LibrarySong, &mut Vec>, G, fn(&LibrarySong) -> Song), + F: FnMut(&LibrarySong, &mut [LibrarySong], G), G: DistanceMetric + Copy, { let first_song: LibrarySong = self.song_from_path(song_path).map_err(|_| { BlissError::ProviderError(format!("song '{song_path}' has not been analyzed")) })?; let mut songs = self.songs_from_library()?; - sort_by(&first_song, &mut songs, distance, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); + sort_by(&first_song, &mut songs, distance); if dedup { - dedup_playlist_custom_distance_by_key( - &mut songs, - None, - distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); + dedup_playlist_custom_distance(&mut songs, None, distance); } songs.truncate(playlist_length); Ok(songs) @@ -562,7 +559,7 @@ impl Library { /// /// You can use ready to use distance metrics such as /// [extended_isolation_forest], and ready to use sorting functions like - /// [closest_to_selected_songs_by_key]. + /// [closest_to_selected_songs]. pub fn playlist_from_many_custom( &self, song_paths: &[&str], @@ -572,7 +569,7 @@ impl Library { dedup: bool, ) -> Result>> where - F: FnMut(&[LibrarySong], &mut Vec>, G, fn(&LibrarySong) -> Song), + F: FnMut(&[LibrarySong], &mut Vec>, G), G: PreTrainedSetDistanceMetric, { let initial_songs: Vec> = song_paths @@ -584,16 +581,9 @@ impl Library { }) .collect::, BlissError>>()?; let mut songs = self.songs_from_library()?; - sort_by( - &initial_songs, - &mut songs, - distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); + sort_by(&initial_songs, &mut songs, distance); if dedup { - dedup_playlist_by_key(&mut songs, None, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); + dedup_playlist(&mut songs, None); } songs.truncate(playlist_length); Ok(songs) @@ -613,7 +603,7 @@ impl Library { let album = self.songs_from_album(&album_title)?; // Every song should be from the same album. Hopefully... let songs = self.songs_from_library()?; - let playlist = closest_album_to_group_by_key(album, songs, |s| s.bliss_song.to_owned())?; + let playlist = closest_album_to_group(album, songs)?; let mut album_count = 0; let mut index = 0; @@ -1983,7 +1973,7 @@ mod test { "/path/to/song2001", 20, distance, - closest_to_first_song_by_key, + closest_to_first_song, true, ) .unwrap(); @@ -2004,15 +1994,12 @@ mod test { ) } - fn custom_sort( + fn custom_sort( _: &LibrarySong, - songs: &mut Vec>, + songs: &mut [LibrarySong], _distance: impl DistanceMetric, - key_fn: F, - ) where - F: Fn(&LibrarySong) -> Song, - { - songs.sort_by_key(|song| key_fn(song).path); + ) { + songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); } #[test] @@ -2055,7 +2042,7 @@ mod test { "/path/to/song2001", 20, distance, - closest_to_first_song_by_key, + closest_to_first_song, true, ) .unwrap(); @@ -2078,7 +2065,7 @@ mod test { "/path/to/song2001", 20, distance, - closest_to_first_song_by_key, + closest_to_first_song, false, ) .unwrap(); diff --git a/src/playlist.rs b/src/playlist.rs index e33d696..3f011d1 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -7,8 +7,6 @@ //! They will yield different styles of playlists, so don't hesitate to //! experiment with them if the default (euclidean distance for now) doesn't //! suit you. -// TODO on the `by_key` functions: maybe Fn(&T) -> &Song is enough? Compared -// to -> Song use crate::{BlissError, BlissResult, Song, NUMBER_FEATURES}; use extended_isolation_forest::{Forest, ForestOptions}; use ndarray::{Array, Array1, Array2, Axis}; @@ -93,29 +91,16 @@ impl PreTrainedSetDistanceMetric for ExtendedIsolationForest { /// Sort `songs` in place by putting songs close to `first_song` first /// using the `distance` metric. -pub fn closest_to_first_song( - first_song: &Song, - #[allow(clippy::ptr_arg)] songs: &mut Vec, - distance: impl DistanceMetric, -) { - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(song, &distance))); -} - -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -pub fn closest_to_first_song_by_key( +pub fn closest_to_first_song>( first_song: &T, - #[allow(clippy::ptr_arg)] songs: &mut Vec, + songs: &mut [T], distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let first_song = key_fn(first_song); - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(&key_fn(song), &distance))); +) { + songs.sort_by_cached_key(|song| { + n32(first_song + .as_ref() + .custom_distance(song.as_ref(), &distance)) + }); } /// Sort `all_songs` in place by putting songs close to `selected_songs` first @@ -123,44 +108,17 @@ pub fn closest_to_first_song_by_key( /// /// Sort songs with a key extraction function, useful for when you have a /// structure like `CustomSong { bliss_song: Song, something_else: bool }` -pub fn closest_to_selected_songs_by_key( +pub fn closest_to_selected_songs>( selected_songs: &[T], all_songs: &mut [T], mut metric: impl PreTrainedSetDistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ +) { let selected_songs = selected_songs .iter() - .map(|c| key_fn(c).analysis.as_arr1()) + .map(|c| c.as_ref().analysis.as_arr1()) .collect::>(); metric.train(&selected_songs); - all_songs.sort_by_cached_key(|song| n32(metric.distance(&key_fn(song).analysis.as_arr1()))); -} - -/// Sort `songs` in place using the `distance` metric and ordering by -/// the smallest distance between each song. -/// -/// If the generated playlist is `[song1, song2, song3, song4]`, it means -/// song2 is closest to song1, song3 is closest to song2, and song4 is closest -/// to song3. -/// -/// Note that this has a tendency to go from one style to the other very fast, -/// and it can be slow on big libraries. -pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl DistanceMetric) { - let mut new_songs = Vec::with_capacity(songs.len()); - let mut song = first_song.to_owned(); - - while !songs.is_empty() { - let distances: Array1 = - Array::from_shape_fn(songs.len(), |i| song.custom_distance(&songs[i], &distance)); - let idx = distances.argmin().unwrap(); - song = songs[idx].to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); - } - *songs = new_songs; + all_songs.sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); } /// Sort `songs` in place using the `distance` metric and ordering by @@ -172,32 +130,23 @@ pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl Dis /// /// Note that this has a tendency to go from one style to the other very fast, /// and it can be slow on big libraries. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -// TODO: maybe Clone is not needed? -pub fn song_to_song_by_key( +pub fn song_to_song>( first_song: &T, - songs: &mut Vec, + songs: &mut [T], distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let mut new_songs: Vec = Vec::with_capacity(songs.len()); - let mut bliss_song = key_fn(&first_song.to_owned()); +) { + let mut song = first_song; - while !songs.is_empty() { - let distances: Array1 = Array::from_shape_fn(songs.len(), |i| { - bliss_song.custom_distance(&key_fn(&songs[i]), &distance) + for i in 0..songs.len() { + let remaining_songs = &songs[i..]; + let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { + song.as_ref() + .custom_distance(remaining_songs[j].as_ref(), &distance) }); let idx = distances.argmin().unwrap(); - let song = songs[idx].to_owned(); - bliss_song = key_fn(&songs[idx]).to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); + songs.swap(idx + i, i); + song = &songs[i]; } - *songs = new_songs; } /// Remove duplicate songs from a playlist, in place. @@ -211,33 +160,10 @@ pub fn song_to_song_by_key( /// * `songs`: The playlist to remove duplicates from. /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. -pub fn dedup_playlist(songs: &mut Vec, distance_threshold: Option) { +pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Option) { dedup_playlist_custom_distance(songs, distance_threshold, euclidean_distance); } -/// Remove duplicate songs from a playlist, in place. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` you -/// want to deduplicate. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_by_key(songs: &mut Vec, distance_threshold: Option, key_fn: F) -where - F: Fn(&T) -> Song, -{ - dedup_playlist_custom_distance_by_key(songs, distance_threshold, euclidean_distance, key_fn); -} - /// Remove duplicate songs from a playlist, in place, using a custom distance /// metric. /// @@ -251,12 +177,14 @@ where /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. /// * `distance`: A custom distance metric. -pub fn dedup_playlist_custom_distance( - songs: &mut Vec, +pub fn dedup_playlist_custom_distance>( + songs: &mut Vec, distance_threshold: Option, distance: impl DistanceMetric, ) { songs.dedup_by(|s1, s2| { + let s1 = s1.as_ref(); + let s2 = s2.as_ref(); n32(s1.custom_distance(s2, &distance)) < distance_threshold.unwrap_or(0.05) || (s1.title.is_some() && s2.title.is_some() @@ -267,45 +195,6 @@ pub fn dedup_playlist_custom_distance( }); } -/// Remove duplicate songs from a playlist, in place, using a custom distance -/// metric. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to deduplicate. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -/// * `distance`: A custom distance metric. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_custom_distance_by_key( - songs: &mut Vec, - distance_threshold: Option, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - songs.dedup_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); - n32(s1.custom_distance(&s2, &distance)) < distance_threshold.unwrap_or(0.05) - || (s1.title.is_some() - && s2.title.is_some() - && s1.artist.is_some() - && s2.artist.is_some() - && s1.title == s2.title - && s1.artist == s2.artist) - }); -} - /// Return a list of albums in a `pool` of songs that are similar to /// songs in `group`, discarding songs that don't belong to an album. /// It basically makes an "album" playlist from the `pool` of songs. @@ -326,33 +215,36 @@ pub fn dedup_playlist_custom_distance_by_key( /// A vector of songs, including `group` at the beginning, that you /// most likely want to plug in your audio player by using something like /// `ret.map(|song| song.path.to_owned()).collect::>()`. -pub fn closest_album_to_group(group: Vec, pool: Vec) -> BlissResult> { +pub fn closest_album_to_group + Clone>( + group: Vec, + pool: Vec, +) -> BlissResult> { let mut albums_analysis: HashMap<&str, Array2> = HashMap::new(); let mut albums = Vec::new(); // Remove songs from the group from the pool. let pool = pool .into_iter() - .filter(|s| !group.contains(s)) + .filter(|s| !group.iter().any(|gs| gs.as_ref() == s.as_ref())) .collect::>(); for song in &pool { - if let Some(album) = &song.album { + if let Some(album) = &song.as_ref().album { if let Some(analysis) = albums_analysis.get_mut(album as &str) { analysis - .push_row(song.analysis.as_arr1().view()) + .push_row(song.as_ref().analysis.as_arr1().view()) .map_err(|e| { BlissError::ProviderError(format!("while computing distances: {e}")) })?; } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); + let mut array = Array::zeros((1, song.as_ref().analysis.as_arr1().len())); + array.assign(&song.as_ref().analysis.as_arr1()); albums_analysis.insert(album, array); } } } let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - column.assign(&song.analysis.as_arr1()); + column.assign(&song.as_ref().analysis.as_arr1()); } let first_analysis = group_analysis .mean_axis(Axis(0)) @@ -370,123 +262,17 @@ pub fn closest_album_to_group(group: Vec, pool: Vec) -> BlissResult< for (album, _) in albums { let mut al = pool .iter() - .filter(|s| s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string()) - .map(|s| s.to_owned()) - .collect::>(); - al.sort_by(|s1, s2| { - let track_number1 = s1 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - let track_number2 = s2 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - if let Ok(x) = track_number1.parse::() { - if let Ok(y) = track_number2.parse::() { - return x.cmp(&y); - } - } - s1.track_number.cmp(&s2.track_number) - }); - playlist.extend_from_slice(&al); - } - Ok(playlist) -} - -/// Return a list of albums in a `pool` of songs that are similar to -/// songs in `group`, discarding songs that don't belong to an album. -/// It basically makes an "album" playlist from the `pool` of songs. -/// -/// `group` should be ordered by track number. -/// -/// Songs from `group` would usually just be songs from an album, but not -/// necessarily - they are discarded from `pool` no matter what. -/// -/// Order songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to order. -/// -/// # Arguments -/// -/// * `group` - A small group of songs, e.g. an album. -/// * `pool` - A pool of songs to find similar songs in, e.g. a user's song -/// library. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -/// -/// # Returns -/// -/// A vector of T, including `group` at the beginning, that you -/// most likely want to plug in your audio player by using something like -/// `ret.map(|song| song.path.to_owned()).collect::>()`. -// TODO: maybe Clone is not needed? -pub fn closest_album_to_group_by_key( - group: Vec, - pool: Vec, - key_fn: F, -) -> BlissResult> -where - F: Fn(&T) -> Song, -{ - let mut albums_analysis: HashMap> = HashMap::new(); - let mut albums = Vec::new(); - - // Remove songs from the group from the pool. - let pool = pool - .into_iter() - .filter(|s| !group.contains(s)) - .collect::>(); - for song in &pool { - let song = key_fn(song); - if let Some(album) = song.album { - if let Some(analysis) = albums_analysis.get_mut(&album as &str) { - analysis - .push_row(song.analysis.as_arr1().view()) - .map_err(|e| { - BlissError::ProviderError(format!("while computing distances: {e}")) - })?; - } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); - albums_analysis.insert(album.to_owned(), array); - } - } - } - let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); - for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - let song = key_fn(song); - column.assign(&song.analysis.as_arr1()); - } - let first_analysis = group_analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - for (album, analysis) in albums_analysis.iter() { - let mean_analysis = analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - let album = album.to_owned(); - albums.push((album, mean_analysis.to_owned())); - } - - albums.sort_by_key(|(_, analysis)| n32(euclidean_distance(&first_analysis, analysis))); - let mut playlist = group; - for (album, _) in albums { - let mut al = pool - .iter() - .filter(|s| { - let s = key_fn(s); - s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string() - }) - .map(|s| s.to_owned()) + .filter(|s| s.as_ref().album.as_deref() == Some(album)) + .cloned() .collect::>(); al.sort_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); let track_number1 = s1 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); let track_number2 = s2 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); @@ -495,9 +281,9 @@ where return x.cmp(&y); } } - s1.track_number.cmp(&s2.track_number) + s1.as_ref().track_number.cmp(&s2.as_ref().track_number) }); - playlist.extend_from_slice(&al); + playlist.extend(al); } Ok(playlist) } @@ -515,6 +301,12 @@ mod test { bliss_song: Song, } + impl AsRef for CustomSong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } + } + #[test] fn test_dedup_playlist_custom_distance() { let first_song = Song { @@ -656,9 +448,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, None, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, None, euclidean_distance); assert_eq!( playlist, vec![ @@ -675,9 +465,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, Some(20.), cosine_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, Some(20.), cosine_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -687,7 +475,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, Some(20.), |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, Some(20.)); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -697,7 +485,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, None, |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, None); assert_eq!( playlist, vec![ @@ -794,9 +582,7 @@ mod test { second_song.to_owned(), ]; - song_to_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + song_to_song(&first_song, &mut songs, euclidean_distance); assert_eq!( songs, @@ -901,9 +687,7 @@ mod test { fifth_song.to_owned(), ]; - closest_to_first_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + closest_to_first_song(&first_song, &mut songs, euclidean_distance); assert_eq!( songs, @@ -1050,8 +834,7 @@ mod test { fourth_song.to_owned(), second_song.to_owned() ], - closest_album_to_group_by_key(group, pool.to_owned(), |s| s.bliss_song.to_owned()) - .unwrap(), + closest_album_to_group(group, pool.to_owned()).unwrap(), ); } } diff --git a/src/song.rs b/src/song.rs index 673c7bc..4a38692 100644 --- a/src/song.rs +++ b/src/song.rs @@ -79,6 +79,12 @@ pub struct Song { pub cue_info: Option, } +impl AsRef for Song { + fn as_ref(&self) -> &Song { + self + } +} + #[derive(Debug, EnumIter, EnumCount)] /// Indexes different fields of an [Analysis](Song::analysis). /// From b76275031f730fee8083cc312e3868e937811402 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Wed, 13 Mar 2024 20:46:20 +0100 Subject: [PATCH 03/12] Get rid of circular dependency between playlist and song The playlist module depends on the song module, which in turn depends on the playlist module. This is confusing, and in this case also completely unecessary. --- examples/distance.rs | 4 +- src/lib.rs | 7 +- src/playlist.rs | 16 ++-- src/song.rs | 218 ------------------------------------------- 4 files changed, 17 insertions(+), 228 deletions(-) diff --git a/examples/distance.rs b/examples/distance.rs index 0fa2ac8..870eacd 100644 --- a/examples/distance.rs +++ b/examples/distance.rs @@ -1,4 +1,4 @@ -use bliss_audio::Song; +use bliss_audio::{playlist::euclidean_distance, Song}; use std::env; /** @@ -20,7 +20,7 @@ fn main() -> Result<(), String> { "d({:?}, {:?}) = {}", song1.path, song2.path, - song1.distance(&song2) + euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) ); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index abfc207..1dc673b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,13 +27,16 @@ //! //! ### Analyze & compute the distance between two songs //! ```no_run -//! use bliss_audio::{BlissResult, Song}; +//! use bliss_audio::{BlissResult, Song, playlist::euclidean_distance}; //! //! fn main() -> BlissResult<()> { //! let song1 = Song::from_path("/path/to/song1")?; //! let song2 = Song::from_path("/path/to/song2")?; //! -//! println!("Distance between song1 and song2 is {}", song1.distance(&song2)); +//! println!( +//! "Distance between song1 and song2 is {}", +//! euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) +//! ); //! Ok(()) //! } //! ``` diff --git a/src/playlist.rs b/src/playlist.rs index 3f011d1..79e1b5d 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -97,9 +97,10 @@ pub fn closest_to_first_song>( distance: impl DistanceMetric, ) { songs.sort_by_cached_key(|song| { - n32(first_song - .as_ref() - .custom_distance(song.as_ref(), &distance)) + n32(distance( + &first_song.as_ref().analysis.as_arr1(), + &song.as_ref().analysis.as_arr1(), + )) }); } @@ -140,8 +141,10 @@ pub fn song_to_song>( for i in 0..songs.len() { let remaining_songs = &songs[i..]; let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { - song.as_ref() - .custom_distance(remaining_songs[j].as_ref(), &distance) + distance( + &song.as_ref().analysis.as_arr1(), + &remaining_songs[j].as_ref().analysis.as_arr1(), + ) }); let idx = distances.argmin().unwrap(); songs.swap(idx + i, i); @@ -185,7 +188,8 @@ pub fn dedup_playlist_custom_distance>( songs.dedup_by(|s1, s2| { let s1 = s1.as_ref(); let s2 = s2.as_ref(); - n32(s1.custom_distance(s2, &distance)) < distance_threshold.unwrap_or(0.05) + n32(distance(&s1.analysis.as_arr1(), &s2.analysis.as_arr1())) + < distance_threshold.unwrap_or(0.05) || (s1.title.is_some() && s2.title.is_some() && s1.artist.is_some() diff --git a/src/song.rs b/src/song.rs index 4a38692..33f8363 100644 --- a/src/song.rs +++ b/src/song.rs @@ -13,9 +13,6 @@ extern crate ndarray; use crate::chroma::ChromaDesc; use crate::cue::CueInfo; use crate::misc::LoudnessDesc; -#[cfg(doc)] -use crate::playlist; -use crate::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance, DistanceMetric}; use crate::temporal::BPMDesc; use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc}; use crate::{BlissError, BlissResult, SAMPLE_RATE}; @@ -194,96 +191,9 @@ impl Analysis { pub fn as_vec(&self) -> Vec { self.internal_analysis.to_vec() } - - /// Compute distance between two analysis using a user-provided distance - /// metric. You most likely want to use `song.custom_distance` directly - /// rather than this function. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - distance(&self.as_arr1(), &other.as_arr1()) - } } impl Song { - #[allow(dead_code)] - /// Compute the distance between the current song and any given - /// Song. - /// - /// The smaller the number, the closer the songs; usually more useful - /// if compared between several songs - /// (e.g. if song1.distance(song2) < song1.distance(song3), then song1 is - /// closer to song2 than it is to song3. - /// - /// Currently uses the euclidean distance, but this can change in an - /// upcoming release if another metric performs better. - pub fn distance(&self, other: &Self) -> f32 { - self.analysis - .custom_distance(&other.analysis, euclidean_distance) - } - - /// Compute distance between two songs using a user-provided distance - /// metric. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - self.analysis.custom_distance(&other.analysis, distance) - } - - /// Orders songs in `pool` by proximity to `self`, using the distance - /// metric `distance` to compute the order. - /// Basically return a playlist from songs in `pool`, starting - /// from `self`, using `distance` (some distance metrics can - /// be found in the [playlist] module). - /// - /// Note that contrary to [Song::closest_from_pool], `self` is NOT added - /// to the beginning of the returned vector. - /// - /// No deduplication is ran either; if you're looking for something easy - /// that works "out of the box", use [Song::closest_from_pool]. - pub fn closest_from_pool_custom( - &self, - pool: Vec, - distance: impl DistanceMetric, - ) -> Vec { - let mut pool = pool; - closest_to_first_song(self, &mut pool, distance); - pool - } - - /// Order songs in `pool` by proximity to `self`. - /// Convenience method to return a playlist from songs in `pool`, - /// starting from `self`. - /// - /// The distance is already chosen, deduplication is ran, and the first song - /// is added to the top of the playlist, to make everything easier. - /// - /// If you want more control over which distance metric is chosen, - /// run deduplication manually, etc, use [Song::closest_from_pool_custom]. - pub fn closest_from_pool(&self, pool: Vec) -> Vec { - let mut playlist = vec![self.to_owned()]; - playlist.extend_from_slice(&pool); - closest_to_first_song(self, &mut playlist, euclidean_distance); - dedup_playlist(&mut playlist, None); - playlist - } - /// Returns a decoded [Song] given a file path, or an error if the song /// could not be analyzed for some reason. /// @@ -903,34 +813,6 @@ mod tests { assert!(sample_array.len() as f32 / (sample_array.capacity() as f32) < 1.); } - #[test] - fn test_analysis_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.distance(&b), 1.9469079) - } - - #[test] - fn test_analysis_distance_indiscernible() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., - 20., - ]); - assert_eq!(a.distance(&a), 0.) - } - #[test] fn test_decode_errors() { assert_eq!( @@ -979,106 +861,6 @@ mod tests { format!("{:?}", song.analysis), ); } - - fn dummy_distance(_: &Array1, _: &Array1) -> f32 { - 0. - } - - #[test] - fn test_custom_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.custom_distance(&b, dummy_distance), 0.); - } - - #[test] - fn test_closest_from_pool() { - let song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - let first_song_dupe = Song { - path: Path::new("path-to-dupe").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., - ]), - ..Default::default() - }; - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., - ]), - ..Default::default() - }; - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - let fifth_song = Song { - path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - - let songs = vec![ - song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), - ]; - let playlist = song.closest_from_pool(songs.to_owned()); - assert_eq!( - playlist, - vec![ - song.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - ], - ); - let playlist = song.closest_from_pool_custom(songs, euclidean_distance); - assert_eq!( - playlist, - vec![ - song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song - ], - ); - } } #[cfg(all(feature = "bench", test))] From e0ffd31cd09cfbd3783b373d39d088eb61d86029 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Thu, 14 Mar 2024 19:07:40 +0100 Subject: [PATCH 04/12] Unify distance metrics Allow euclidean distance and cosine distance to handle a set of songs as input. In doing so, we can also remove the ability to generate playlists from a single song, as that is just a special case of generating a playlist from many songs anyway. --- examples/library.rs | 2 +- examples/library_extra_info.rs | 2 +- examples/playlist.rs | 4 +- src/lib.rs | 4 +- src/library.rs | 148 +++++++------------ src/playlist.rs | 256 +++++++++++++++++---------------- 6 files changed, 187 insertions(+), 229 deletions(-) diff --git a/examples/library.rs b/examples/library.rs index f6e61d7..1cabb09 100644 --- a/examples/library.rs +++ b/examples/library.rs @@ -190,7 +190,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::<()>(song_path, playlist_length)?; + let songs = library.playlist_from::<()>(&[song_path], playlist_length)?; let song_paths = songs .into_iter() .map(|s| s.bliss_song.path.to_string_lossy().to_string()) diff --git a/examples/library_extra_info.rs b/examples/library_extra_info.rs index ec84cfd..a0d3167 100644 --- a/examples/library_extra_info.rs +++ b/examples/library_extra_info.rs @@ -208,7 +208,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::(song_path, playlist_length)?; + let songs = library.playlist_from::(&[song_path], playlist_length)?; let playlist = songs .into_iter() .map(|s| { diff --git a/examples/playlist.rs b/examples/playlist.rs index 86e118f..c106937 100644 --- a/examples/playlist.rs +++ b/examples/playlist.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use bliss_audio::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance}; +use bliss_audio::playlist::{closest_to_songs, dedup_playlist, euclidean_distance}; use bliss_audio::{analyze_paths, Song}; use clap::{App, Arg}; use glob::glob; @@ -77,7 +77,7 @@ fn main() -> Result<()> { .into_iter() .filter(|x| x == &first_song || paths.contains(&x.path.to_string_lossy().to_string())) .collect(); - closest_to_first_song(&first_song, &mut songs_to_chose_from, euclidean_distance); + closest_to_songs(&[first_song], &mut songs_to_chose_from, &euclidean_distance); dedup_playlist(&mut songs_to_chose_from, None); fs::write(analysis_path, serialized)?; diff --git a/src/lib.rs b/src/lib.rs index 1dc673b..2e6a51b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ //! ```no_run //! use bliss_audio::{ //! analyze_paths, -//! playlist::{closest_to_first_song, euclidean_distance}, +//! playlist::{closest_to_songs, euclidean_distance}, //! BlissResult, Song, //! }; //! @@ -56,7 +56,7 @@ //! // Assuming there is a first song //! let first_song = songs.first().unwrap().to_owned(); //! -//! closest_to_first_song(&first_song, &mut songs, euclidean_distance); +//! closest_to_songs(&[first_song], &mut songs, &euclidean_distance); //! //! println!("Playlist is:"); //! for song in songs { diff --git a/src/library.rs b/src/library.rs index de81152..795c20e 100644 --- a/src/library.rs +++ b/src/library.rs @@ -111,18 +111,15 @@ use crate::analyze_paths_with_cores; use crate::cue::CueInfo; use crate::playlist::closest_album_to_group; -use crate::playlist::closest_to_first_song; -use crate::playlist::dedup_playlist; +use crate::playlist::closest_to_songs; use crate::playlist::dedup_playlist_custom_distance; use crate::playlist::euclidean_distance; -use crate::playlist::DistanceMetric; -use crate::playlist::PreTrainedSetDistanceMetric; +use crate::playlist::DistanceMetricBuilder; use anyhow::{bail, Context, Result}; #[cfg(not(test))] use dirs::data_local_dir; use indicatif::{ProgressBar, ProgressStyle}; use log::warn; -use noisy_float::prelude::*; use rusqlite::params; use rusqlite::params_from_iter; use rusqlite::Connection; @@ -487,69 +484,23 @@ impl Library { Self::new(config) } - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`. + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// songs in the library at `song_path`. /// - /// It uses a simple euclidean distance between songs, and deduplicates songs - /// that are too close. + /// It uses the ExentedIsolationForest score as a distance between songs, and deduplicates + /// songs that are too close. pub fn playlist_from( &self, - song_path: &str, + song_paths: &[&str], playlist_length: usize, ) -> Result>> { - let first_song: LibrarySong = self.song_from_path(song_path)?; - let mut songs = self.songs_from_library()?; - closest_to_first_song(&first_song, &mut songs, euclidean_distance); - songs.sort_by_cached_key(|song| { - n32(euclidean_distance( - &first_song.bliss_song.analysis.as_arr1(), - &song.bliss_song.analysis.as_arr1(), - )) - }); - dedup_playlist(&mut songs, None); - songs.truncate(playlist_length); - Ok(songs) - } - - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`, using distance metric `distance`, - /// the sorting function `sort_by` and deduplicating if `dedup` is set to - /// `true`. - /// - /// You can use ready to use distance metrics such as - /// [euclidean_distance], and ready to use sorting functions like - /// [closest_to_first_song]. - /// - /// In most cases, you just want to use [Library::playlist_from]. - /// Use `playlist_from_custom` if you want to experiment with different - /// distance metrics / sorting functions. - /// - /// Example: - /// `library.playlist_from_song_custom(song_path, 20, euclidean_distance, - /// closest_to_first_song, true)`. - /// TODO path here too - pub fn playlist_from_custom( - &self, - song_path: &str, - playlist_length: usize, - distance: G, - mut sort_by: F, - dedup: bool, - ) -> Result>> - where - F: FnMut(&LibrarySong, &mut [LibrarySong], G), - G: DistanceMetric + Copy, - { - let first_song: LibrarySong = self.song_from_path(song_path).map_err(|_| { - BlissError::ProviderError(format!("song '{song_path}' has not been analyzed")) - })?; - let mut songs = self.songs_from_library()?; - sort_by(&first_song, &mut songs, distance); - if dedup { - dedup_playlist_custom_distance(&mut songs, None, distance); - } - songs.truncate(playlist_length); - Ok(songs) + self.playlist_from_custom( + song_paths, + playlist_length, + &euclidean_distance, + &mut closest_to_songs, + true, + ) } /// Build a playlist of `playlist_length` items from a set of already analyzed @@ -558,20 +509,19 @@ impl Library { /// `true`. /// /// You can use ready to use distance metrics such as - /// [extended_isolation_forest], and ready to use sorting functions like - /// [closest_to_selected_songs]. - pub fn playlist_from_many_custom( + /// [ExtendedIsolationForest], and ready to use sorting functions like + /// [closest_to_songs]. + pub fn playlist_from_custom< + T: Serialize + DeserializeOwned, + F: FnMut(&[LibrarySong], &mut [LibrarySong], &dyn DistanceMetricBuilder), + >( &self, song_paths: &[&str], playlist_length: usize, - distance: G, - mut sort_by: F, + distance: &dyn DistanceMetricBuilder, + sort_by: &mut F, dedup: bool, - ) -> Result>> - where - F: FnMut(&[LibrarySong], &mut Vec>, G), - G: PreTrainedSetDistanceMetric, - { + ) -> Result>> { let initial_songs: Vec> = song_paths .iter() .map(|s| { @@ -583,7 +533,7 @@ impl Library { let mut songs = self.songs_from_library()?; sort_by(&initial_songs, &mut songs, distance); if dedup { - dedup_playlist(&mut songs, None); + dedup_playlist_custom_distance(&mut songs, None, distance); } songs.truncate(playlist_length); Ok(songs) @@ -1925,11 +1875,19 @@ mod test { } } + fn first_factor_divided_by_30_distance(a: &Array1, b: &Array1) -> f32 { + ((a[1] - b[1]).abs() / 30.).floor() + } + + fn first_factor_distance(a: &Array1, b: &Array1) -> f32 { + (a[1] - b[1]).abs() + } + #[test] fn test_library_playlist_song_not_existing() { let (library, _temp_dir, _) = setup_test_library(); assert!(library - .playlist_from::("not-existing", 2) + .playlist_from::(&["not-existing"], 2) .is_err()); } @@ -1937,7 +1895,7 @@ mod test { fn test_library_playlist_crop() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 2).unwrap(); + library.playlist_from(&["/path/to/song2001"], 2).unwrap(); assert_eq!(2, songs.len()); } @@ -1945,7 +1903,7 @@ mod test { fn test_library_simple_playlist() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 20).unwrap(); + library.playlist_from(&["/path/to/song2001"], 20).unwrap(); assert_eq!( vec![ "/path/to/song2001", @@ -1966,14 +1924,12 @@ mod test { #[test] fn test_library_custom_playlist_distance() { let (library, _temp_dir, _) = setup_test_library(); - let distance = - |a: &Array1, b: &Array1| (a.get(1).unwrap() - b.get(1).unwrap()).abs(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song, + &first_factor_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -1995,9 +1951,9 @@ mod test { } fn custom_sort( - _: &LibrarySong, + _: &[LibrarySong], songs: &mut [LibrarySong], - _distance: impl DistanceMetric, + _distance: &dyn DistanceMetricBuilder, ) { songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); } @@ -2007,10 +1963,10 @@ mod test { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - euclidean_distance, - custom_sort, + &euclidean_distance, + &mut custom_sort, true, ) .unwrap(); @@ -2034,15 +1990,13 @@ mod test { #[test] fn test_library_custom_playlist_dedup() { let (library, _temp_dir, _) = setup_test_library(); - let distance = |a: &Array1, b: &Array1| { - ((a.get(1).unwrap() - b.get(1).unwrap()).abs() / 30.).floor() - }; + let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song, + &first_factor_divided_by_30_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -2058,14 +2012,12 @@ mod test { .collect::>(), ); - let distance = - |a: &Array1, b: &Array1| ((a.get(1).unwrap() - b.get(1).unwrap()).abs()); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song, + &first_factor_distance, + &mut closest_to_songs, false, ) .unwrap(); diff --git a/src/playlist.rs b/src/playlist.rs index 79e1b5d..569ac87 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -14,19 +14,46 @@ use ndarray_stats::QuantileExt; use noisy_float::prelude::*; use std::collections::HashMap; -/// Convenience trait for user-defined distance metrics. -pub trait DistanceMetric: Fn(&Array1, &Array1) -> f32 {} -impl DistanceMetric for F where F: Fn(&Array1, &Array1) -> f32 {} - -/// Pre trained set distance metrics. -pub trait PreTrainedSetDistanceMetric { - /// Train this distance metric on the set of vectors that it should measure distance from. - fn train(&mut self, vectors: &[Array1]); - /// Return the distance from the set of vectors that this metric was trained on. Must not be - /// called before train. +/// Trait for creating a distance metric, measuring the distance to a set of vectors. If this +/// metric requires any kind of training, this should be done in the build function so that the +/// returned DistanceMetric instance is already trained and ready to use. +pub trait DistanceMetricBuilder { + /// Build a distance metric that measures the distance to vectors. + fn build<'a>(&'a self, vectors: &[Array1]) -> Box; +} + +/// Measure the distance to a vector, from the vector(s) in the internal state of this metric. +pub trait DistanceMetric { + /// Return the distance from the set of vectors that this metric was built from. fn distance(&self, vector: &Array1) -> f32; } +/// Convenience struct used for implementing DistanceMetric for plain functions. +pub struct FunctionDistanceMetric<'a, F: Fn(&Array1, &Array1) -> f32> { + func: &'a F, + state: Vec>, +} + +impl DistanceMetricBuilder for F +where + F: Fn(&Array1, &Array1) -> f32 + 'static, +{ + fn build<'a>(&'a self, vectors: &[Array1]) -> Box { + Box::new(FunctionDistanceMetric { + func: self, + state: vectors.iter().map(|s| s.to_owned()).collect(), + }) + } +} + +impl<'a, F: Fn(&Array1, &Array1) -> f32 + 'static> DistanceMetric + for FunctionDistanceMetric<'a, F> +{ + fn distance(&self, vector: &Array1) -> f32 { + self.state.iter().map(|v| (self.func)(v, vector)).sum() + } +} + /// Return the [euclidean /// distance](https://en.wikipedia.org/wiki/Euclidean_distance#Higher_dimensions) /// between two vectors. @@ -54,72 +81,46 @@ fn feature_array1_to_array(f: &Array1) -> [f32; NUMBER_FEATURES] { .expect("Couldn't convert slice to array") } -/// Return the [extended isolation forest](https://ieeexplore.ieee.org/document/8888179) -/// score between a set of vectors and a single vector. -pub struct ExtendedIsolationForest { - forest: Option>, -} - -impl Default for ExtendedIsolationForest { - /// Create an ExtendedIsolationForest with an empty state. - fn default() -> ExtendedIsolationForest { - ExtendedIsolationForest { forest: None } - } -} - -impl PreTrainedSetDistanceMetric for ExtendedIsolationForest { - fn train(&mut self, vectors: &[Array1]) { - let opts = ForestOptions { - n_trees: 100, - sample_size: vectors.len().min(256), - max_tree_depth: None, - extension_level: 1, - }; +impl DistanceMetricBuilder for ForestOptions { + fn build(&self, vectors: &[Array1]) -> Box { let a = &*vectors .iter() .map(feature_array1_to_array) .collect::>(); - self.forest = Some(Forest::from_slice(a, &opts).unwrap()); - } - fn distance(&self, vector: &Array1) -> f32 { - self.forest - .as_ref() - .expect("distance() called before train()") - .score(&feature_array1_to_array(vector)) as f32 + + if self.sample_size > vectors.len() { + let mut opts = self.clone(); + opts.sample_size = self.sample_size.min(vectors.len()); + Box::new(Forest::from_slice(a, &opts).unwrap()) + } else { + Box::new(Forest::from_slice(a, self).unwrap()) + } } } -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -pub fn closest_to_first_song>( - first_song: &T, - songs: &mut [T], - distance: impl DistanceMetric, -) { - songs.sort_by_cached_key(|song| { - n32(distance( - &first_song.as_ref().analysis.as_arr1(), - &song.as_ref().analysis.as_arr1(), - )) - }); +impl DistanceMetric for Forest { + fn distance(&self, vector: &Array1) -> f32 { + self.score(&feature_array1_to_array(vector)) as f32 + } } -/// Sort `all_songs` in place by putting songs close to `selected_songs` first +/// Sort `candidate_songs` in place by putting songs close to `selected_songs` first /// using the `distance` metric. /// /// Sort songs with a key extraction function, useful for when you have a /// structure like `CustomSong { bliss_song: Song, something_else: bool }` -pub fn closest_to_selected_songs>( +pub fn closest_to_songs>( selected_songs: &[T], - all_songs: &mut [T], - mut metric: impl PreTrainedSetDistanceMetric, + candidate_songs: &mut [T], + metric_builder: &dyn DistanceMetricBuilder, ) { let selected_songs = selected_songs .iter() .map(|c| c.as_ref().analysis.as_arr1()) .collect::>(); - metric.train(&selected_songs); - all_songs.sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); + let metric = metric_builder.build(&selected_songs); + candidate_songs + .sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); } /// Sort `songs` in place using the `distance` metric and ordering by @@ -132,23 +133,27 @@ pub fn closest_to_selected_songs>( /// Note that this has a tendency to go from one style to the other very fast, /// and it can be slow on big libraries. pub fn song_to_song>( - first_song: &T, + from: &[T], songs: &mut [T], - distance: impl DistanceMetric, + metric_builder: &dyn DistanceMetricBuilder, ) { - let mut song = first_song; + let mut vectors = from + .iter() + .map(|s| s.as_ref().analysis.as_arr1()) + .collect::>(); for i in 0..songs.len() { - let remaining_songs = &songs[i..]; - let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { - distance( - &song.as_ref().analysis.as_arr1(), - &remaining_songs[j].as_ref().analysis.as_arr1(), - ) - }); - let idx = distances.argmin().unwrap(); - songs.swap(idx + i, i); - song = &songs[i]; + { + let metric = metric_builder.build(&vectors); + let remaining_songs = &songs[i..]; + let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { + metric.distance(&remaining_songs[j].as_ref().analysis.as_arr1()) + }); + let idx = distances.argmin().unwrap(); + songs.swap(idx + i, i); + } + vectors.clear(); + vectors.push(songs[i].as_ref().analysis.as_arr1()); } } @@ -164,7 +169,7 @@ pub fn song_to_song>( /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Option) { - dedup_playlist_custom_distance(songs, distance_threshold, euclidean_distance); + dedup_playlist_custom_distance(songs, distance_threshold, &euclidean_distance); } /// Remove duplicate songs from a playlist, in place, using a custom distance @@ -183,13 +188,14 @@ pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Op pub fn dedup_playlist_custom_distance>( songs: &mut Vec, distance_threshold: Option, - distance: impl DistanceMetric, + metric_builder: &dyn DistanceMetricBuilder, ) { songs.dedup_by(|s1, s2| { let s1 = s1.as_ref(); let s2 = s2.as_ref(); - n32(distance(&s1.analysis.as_arr1(), &s2.analysis.as_arr1())) - < distance_threshold.unwrap_or(0.05) + let vector = [s1.analysis.as_arr1()]; + let metric = metric_builder.build(&vector); + n32(metric.distance(&s2.analysis.as_arr1())) < distance_threshold.unwrap_or(0.05) || (s1.title.is_some() && s2.title.is_some() && s1.artist.is_some() @@ -371,7 +377,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, euclidean_distance); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -388,7 +394,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), cosine_distance); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &euclidean_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -452,7 +458,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, euclidean_distance); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -469,7 +475,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), cosine_distance); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &cosine_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -539,21 +545,21 @@ mod test { ..Default::default() }; let mut songs = vec![ - first_song.to_owned(), - third_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &third_song, + &first_song_dupe, + &second_song, + &fourth_song, ]; - song_to_song(&first_song, &mut songs, euclidean_distance); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); @@ -578,30 +584,30 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - second_song.to_owned(), + let mut songs: Vec<&CustomSong> = vec![ + &first_song, + &first_song_dupe, + &third_song, + &fourth_song, + &second_song, ]; - song_to_song(&first_song, &mut songs, euclidean_distance); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song, - first_song_dupe, - second_song, - third_song, - fourth_song, + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); } #[test] - fn test_sort_closest_to_first_song() { + fn test_sort_closest_to_songs() { let first_song = Song { path: Path::new("path-to-first").to_path_buf(), analysis: Analysis::new([ @@ -646,15 +652,15 @@ mod test { ..Default::default() }; - let mut songs = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song(&first_song, &mut songs, euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &&euclidean_distance); let first_song = CustomSong { bliss_song: first_song, @@ -682,26 +688,26 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song(&first_song, &mut songs, euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &&euclidean_distance); assert_eq!( songs, - vec![ - first_song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song + [ + &first_song, + &first_song_dupe, + &second_song, + &fourth_song, + &fifth_song, + &third_song ], ); } From 1898ba84260f074b45636d425da580a0704bc88c Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Thu, 28 Mar 2024 20:32:37 +0100 Subject: [PATCH 05/12] Review comment: Improve documentation for single song use case --- src/library.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/library.rs b/src/library.rs index 795c20e..79b1f1a 100644 --- a/src/library.rs +++ b/src/library.rs @@ -489,6 +489,9 @@ impl Library { /// /// It uses the ExentedIsolationForest score as a distance between songs, and deduplicates /// songs that are too close. + /// + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. pub fn playlist_from( &self, song_paths: &[&str], @@ -511,6 +514,9 @@ impl Library { /// You can use ready to use distance metrics such as /// [ExtendedIsolationForest], and ready to use sorting functions like /// [closest_to_songs]. + /// + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. pub fn playlist_from_custom< T: Serialize + DeserializeOwned, F: FnMut(&[LibrarySong], &mut [LibrarySong], &dyn DistanceMetricBuilder), From d62009c5350a5c534d45a195c1b93cd69d06fd02 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Fri, 29 Mar 2024 20:07:39 +0100 Subject: [PATCH 06/12] Add test for ForestOption as DistanceMetricBuilder. --- src/playlist.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/playlist.rs b/src/playlist.rs index 569ac87..45ce01a 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -847,4 +847,61 @@ mod test { closest_album_to_group(group, pool.to_owned()).unwrap(), ); } + + #[test] + fn test_forest_options() { + let first_song = Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([ + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ]), + ..Default::default() + }; + let first_song_dupe = Song { + path: Path::new("path-to-dupe").to_path_buf(), + analysis: Analysis::new([ + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ]), + ..Default::default() + }; + + let second_song = Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., + ]), + ..Default::default() + }; + let third_song = Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., + ]), + ..Default::default() + }; + let fourth_song = Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., + ]), + ..Default::default() + }; + let fifth_song = Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., + ]), + ..Default::default() + }; + + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, + ]; + closest_to_songs(&[&first_song], &mut songs, &ForestOptions::default()); + } } From 5cafbcc4b3b56d355c1420eb4e48e04514029ef1 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Fri, 29 Mar 2024 20:08:26 +0100 Subject: [PATCH 07/12] Remove unecessary double-reference --- src/playlist.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/playlist.rs b/src/playlist.rs index 45ce01a..bfabe8c 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -660,7 +660,7 @@ mod test { &fourth_song, &fifth_song, ]; - closest_to_songs(&[&first_song], &mut songs, &&euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); let first_song = CustomSong { bliss_song: first_song, @@ -697,7 +697,7 @@ mod test { &fifth_song, ]; - closest_to_songs(&[&first_song], &mut songs, &&euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, From 16763650613a5a31d5ce1ca49987c8dc77acd078 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Sun, 31 Mar 2024 20:50:54 +0200 Subject: [PATCH 08/12] Add missing test assertion. --- src/playlist.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/playlist.rs b/src/playlist.rs index bfabe8c..f39d4b8 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -903,5 +903,16 @@ mod test { &fifth_song, ]; closest_to_songs(&[&first_song], &mut songs, &ForestOptions::default()); + assert_eq!( + songs, + [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, + ], + ); } } From 6b336bf1dad6fcf37ffff9b1ed6da713e299e965 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Sun, 31 Mar 2024 20:53:19 +0200 Subject: [PATCH 09/12] Remove comment that is no longer relevant. --- src/playlist.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/playlist.rs b/src/playlist.rs index f39d4b8..4627828 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -106,9 +106,6 @@ impl DistanceMetric for Forest { /// Sort `candidate_songs` in place by putting songs close to `selected_songs` first /// using the `distance` metric. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` pub fn closest_to_songs>( selected_songs: &[T], candidate_songs: &mut [T], From e57ca4c6290e96cf32de9f05ab5b3b411b140350 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Mon, 1 Apr 2024 16:53:52 +0200 Subject: [PATCH 10/12] Fix extended isolation forest test case --- src/playlist.rs | 128 ++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 53 deletions(-) diff --git a/src/playlist.rs b/src/playlist.rs index 4627828..014d551 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -845,71 +845,93 @@ mod test { ); } + // This test case is non-deterministic and could fail in rare cases. #[test] fn test_forest_options() { - let first_song = Song { + // These songs contains analysis of actual music. Recordings of Mozart's piano concerto no. + // 19, Mozart's piano concerto no. 23, and tracks Miles Davis' "Kind Of Blue". + let mozart_piano_19 = [Song { path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - let first_song_dupe = Song { - path: Path::new("path-to-dupe").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), + analysis: Analysis::new([0.5522649, -0.8664422, -0.81236243, -0.9475107, -0.76129013, -0.90520144, -0.8474938, -0.8924977, 0.4956385, 0.5076021, -0.5037869, -0.61038315, -0.47157913, -0.48194122, -0.36397678, -0.6443357, -0.9713509, -0.9781786, -0.98285836, -0.983834] +), ..Default::default() - }; - - let second_song = Song { + }, + Song { path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., - ]), + analysis: Analysis::new([0.28091776, -0.86352056, -0.8175835, -0.9497457, -0.77833027, -0.91656536, -0.8477104, -0.889485, 0.41879785, 0.45311546, -0.6252063, -0.6838323, -0.5326821, -0.63320035, -0.5573063, -0.7433087, -0.9815542, -0.98570454, -0.98824924, -0.9903612] +), ..Default::default() - }; - let third_song = Song { + }, + Song { path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., - ]), + analysis: Analysis::new([0.5978223, -0.84076107, -0.7841455, -0.886415, -0.72486377, -0.8015111, -0.79157853, -0.7739525, 0.517207, 0.535398, -0.30007458, -0.3972137, -0.41319674, -0.40709, -0.32283908, -0.5261506, -0.9656949, -0.9715169, -0.97524375, -0.9756616] +), ..Default::default() - }; - let fourth_song = Song { + }]; + + let kind_of_blue = [ + Song { path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), + analysis: Analysis::new([0.35871255, -0.8679545, -0.6833263, -0.87800264, -0.7235142, -0.73546195, -0.48577756, -0.7732977, 0.51237035, 0.5379869, -0.00649637, -0.534671, -0.5743973, -0.5706258, -0.43162197, -0.6356183, -0.97918683, -0.98091763, -0.9845511, -0.98359185] +), ..Default::default() - }; - let fifth_song = Song { + }, + Song { path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), + analysis: Analysis::new([0.2806753, -0.85013694, -0.66921043, -0.8938313, -0.6848732, -0.75377, -0.48747814, -0.793482, 0.44880342, 0.461563, -0.115760505, -0.535959, -0.5749081, -0.55055845, -0.37976396, -0.538705, -0.97972554, -0.97890633, -0.98290455, -0.98231846] +), ..Default::default() - }; + }, + Song { + path: Path::new("path-to-sixth").to_path_buf(), + analysis: Analysis::new([0.1545173, -0.8991263, -0.79770947, -0.87425447, -0.77811325, -0.71051484, -0.7369138, -0.8515074, 0.387398, 0.42035806, -0.30229717, -0.624056, -0.6458885, -0.66208386, -0.5866134, -0.7613628, -0.98656195, -0.98821944, -0.99072844, -0.98729765] +), + ..Default::default() + }, + Song { + path: Path::new("path-to-seventh").to_path_buf(), + analysis: Analysis::new([0.3853314, -0.8475499, -0.64330614, -0.85917395, -0.6624141, -0.6356613, -0.40988427, -0.7480691, 0.45981812, 0.47096932, -0.19245929, -0.5228787, -0.42246288, -0.52656835, -0.45702273, -0.569838, -0.97620565, -0.97741324, -0.9776932, -0.98088175] +), + ..Default::default() + }, + Song { + path: Path::new("path-to-eight").to_path_buf(), + analysis: Analysis::new([0.18926656, -0.86667925, -0.7294189, -0.856192, -0.7180501, -0.66697484, -0.6093149, -0.82118326, 0.3888924, 0.42430043, -0.4414854, -0.6957753, -0.7092425, -0.68237424, -0.55543846, -0.77678657, -0.98610276, -0.98707336, -0.99165493, -0.99011236] +), + ..Default::default() + }]; + + let mozart_piano_23 = [ + Song { + path: Path::new("path-to-ninth").to_path_buf(), + analysis: Analysis::new( + [0.38328362, -0.8752751, -0.8165319, -0.948534, -0.77668643, -0.9051969, -0.8473458, -0.88643366, 0.49641085, 0.5132351, -0.41367024, -0.5279201, -0.46787983, -0.49218357, -0.42164963, -0.6597451, -0.97317076, -0.9800342, -0.9832096, -0.98385316] +), + ..Default::default() + }, + Song { + path: Path::new("path-to-tenth").to_path_buf(), + analysis: Analysis::new( + [0.4301988, -0.89864063, -0.84993315, -0.9518692, -0.8329567, -0.9293889, -0.8605237, -0.8901016, 0.35011983, 0.3822446, -0.6384951, -0.7537949, -0.5867439, -0.57371, -0.5662942, -0.76130676, -0.9845436, -0.9833387, -0.9902381, -0.9905396] +), + ..Default::default() + }, + Song { + path: Path::new("path-to-eleventh").to_path_buf(), + analysis: Analysis::new( + [0.42334664, -0.8632808, -0.80268145, -0.91918564, -0.7522441, -0.8721291, -0.81877685, -0.8166921, 0.53626525, 0.540933, -0.34771818, -0.45362264, -0.35523874, -0.4072432, -0.25506926, -0.553644, -0.9624399, -0.9706371, -0.9753268, -0.9764576] +), + ..Default::default() + }]; - let mut songs = [ - &first_song, - &first_song_dupe, - &second_song, - &third_song, - &fourth_song, - &fifth_song, - ]; - closest_to_songs(&[&first_song], &mut songs, &ForestOptions::default()); - assert_eq!( - songs, - [ - &first_song, - &first_song_dupe, - &second_song, - &third_song, - &fourth_song, - &fifth_song, - ], - ); + let mut songs: Vec<&Song> = mozart_piano_19.iter().chain(kind_of_blue.iter()).chain(mozart_piano_23.iter()).collect(); + + // We train the algorithm on one of the Mozart concertos, and the expectation is that the + // tracks from the Miles Davis record will end up last. + let opts = ForestOptions { n_trees: 1000, sample_size: 200, max_tree_depth: None, extension_level: 10 }; + closest_to_songs(&mozart_piano_19.iter().collect::>(), &mut songs, &opts); + for e in &kind_of_blue { + assert!(songs[songs.len()-5..].contains(&e)); + } } } From 0a5029e1b8359d622d6587b264599edc3b5e4195 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Mon, 1 Apr 2024 20:02:16 +0200 Subject: [PATCH 11/12] Document suggestions and limitations for distance metric --- src/playlist.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/playlist.rs b/src/playlist.rs index 014d551..423c08b 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -17,6 +17,11 @@ use std::collections::HashMap; /// Trait for creating a distance metric, measuring the distance to a set of vectors. If this /// metric requires any kind of training, this should be done in the build function so that the /// returned DistanceMetric instance is already trained and ready to use. +/// +/// Currently, the best metric for measuring the distance to a set of songs is the extended +/// isolation forest (implemented on [ForestOptions]). For measuring the distance to a single song, +/// extended isolation forest doesn't work and [euclidean_distance] or [cosine_distance] are good +/// options. pub trait DistanceMetricBuilder { /// Build a distance metric that measures the distance to vectors. fn build<'a>(&'a self, vectors: &[Array1]) -> Box; From 43361d9956b4af9e1970d00a334ca7d6a37bf688 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Mon, 1 Apr 2024 20:30:14 +0200 Subject: [PATCH 12/12] cargo fmt --- src/playlist.rs | 380 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 307 insertions(+), 73 deletions(-) diff --git a/src/playlist.rs b/src/playlist.rs index 423c08b..b905862 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -855,88 +855,322 @@ mod test { fn test_forest_options() { // These songs contains analysis of actual music. Recordings of Mozart's piano concerto no. // 19, Mozart's piano concerto no. 23, and tracks Miles Davis' "Kind Of Blue". - let mozart_piano_19 = [Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.5522649, -0.8664422, -0.81236243, -0.9475107, -0.76129013, -0.90520144, -0.8474938, -0.8924977, 0.4956385, 0.5076021, -0.5037869, -0.61038315, -0.47157913, -0.48194122, -0.36397678, -0.6443357, -0.9713509, -0.9781786, -0.98285836, -0.983834] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.28091776, -0.86352056, -0.8175835, -0.9497457, -0.77833027, -0.91656536, -0.8477104, -0.889485, 0.41879785, 0.45311546, -0.6252063, -0.6838323, -0.5326821, -0.63320035, -0.5573063, -0.7433087, -0.9815542, -0.98570454, -0.98824924, -0.9903612] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([0.5978223, -0.84076107, -0.7841455, -0.886415, -0.72486377, -0.8015111, -0.79157853, -0.7739525, 0.517207, 0.535398, -0.30007458, -0.3972137, -0.41319674, -0.40709, -0.32283908, -0.5261506, -0.9656949, -0.9715169, -0.97524375, -0.9756616] -), - ..Default::default() - }]; + let mozart_piano_19 = [ + Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([ + 0.5522649, + -0.8664422, + -0.81236243, + -0.9475107, + -0.76129013, + -0.90520144, + -0.8474938, + -0.8924977, + 0.4956385, + 0.5076021, + -0.5037869, + -0.61038315, + -0.47157913, + -0.48194122, + -0.36397678, + -0.6443357, + -0.9713509, + -0.9781786, + -0.98285836, + -0.983834, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([ + 0.28091776, + -0.86352056, + -0.8175835, + -0.9497457, + -0.77833027, + -0.91656536, + -0.8477104, + -0.889485, + 0.41879785, + 0.45311546, + -0.6252063, + -0.6838323, + -0.5326821, + -0.63320035, + -0.5573063, + -0.7433087, + -0.9815542, + -0.98570454, + -0.98824924, + -0.9903612, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([ + 0.5978223, + -0.84076107, + -0.7841455, + -0.886415, + -0.72486377, + -0.8015111, + -0.79157853, + -0.7739525, + 0.517207, + 0.535398, + -0.30007458, + -0.3972137, + -0.41319674, + -0.40709, + -0.32283908, + -0.5261506, + -0.9656949, + -0.9715169, + -0.97524375, + -0.9756616, + ]), + ..Default::default() + }, + ]; let kind_of_blue = [ - Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([0.35871255, -0.8679545, -0.6833263, -0.87800264, -0.7235142, -0.73546195, -0.48577756, -0.7732977, 0.51237035, 0.5379869, -0.00649637, -0.534671, -0.5743973, -0.5706258, -0.43162197, -0.6356183, -0.97918683, -0.98091763, -0.9845511, -0.98359185] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([0.2806753, -0.85013694, -0.66921043, -0.8938313, -0.6848732, -0.75377, -0.48747814, -0.793482, 0.44880342, 0.461563, -0.115760505, -0.535959, -0.5749081, -0.55055845, -0.37976396, -0.538705, -0.97972554, -0.97890633, -0.98290455, -0.98231846] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-sixth").to_path_buf(), - analysis: Analysis::new([0.1545173, -0.8991263, -0.79770947, -0.87425447, -0.77811325, -0.71051484, -0.7369138, -0.8515074, 0.387398, 0.42035806, -0.30229717, -0.624056, -0.6458885, -0.66208386, -0.5866134, -0.7613628, -0.98656195, -0.98821944, -0.99072844, -0.98729765] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-seventh").to_path_buf(), - analysis: Analysis::new([0.3853314, -0.8475499, -0.64330614, -0.85917395, -0.6624141, -0.6356613, -0.40988427, -0.7480691, 0.45981812, 0.47096932, -0.19245929, -0.5228787, -0.42246288, -0.52656835, -0.45702273, -0.569838, -0.97620565, -0.97741324, -0.9776932, -0.98088175] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-eight").to_path_buf(), - analysis: Analysis::new([0.18926656, -0.86667925, -0.7294189, -0.856192, -0.7180501, -0.66697484, -0.6093149, -0.82118326, 0.3888924, 0.42430043, -0.4414854, -0.6957753, -0.7092425, -0.68237424, -0.55543846, -0.77678657, -0.98610276, -0.98707336, -0.99165493, -0.99011236] -), - ..Default::default() - }]; + Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([ + 0.35871255, + -0.8679545, + -0.6833263, + -0.87800264, + -0.7235142, + -0.73546195, + -0.48577756, + -0.7732977, + 0.51237035, + 0.5379869, + -0.00649637, + -0.534671, + -0.5743973, + -0.5706258, + -0.43162197, + -0.6356183, + -0.97918683, + -0.98091763, + -0.9845511, + -0.98359185, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([ + 0.2806753, + -0.85013694, + -0.66921043, + -0.8938313, + -0.6848732, + -0.75377, + -0.48747814, + -0.793482, + 0.44880342, + 0.461563, + -0.115760505, + -0.535959, + -0.5749081, + -0.55055845, + -0.37976396, + -0.538705, + -0.97972554, + -0.97890633, + -0.98290455, + -0.98231846, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-sixth").to_path_buf(), + analysis: Analysis::new([ + 0.1545173, + -0.8991263, + -0.79770947, + -0.87425447, + -0.77811325, + -0.71051484, + -0.7369138, + -0.8515074, + 0.387398, + 0.42035806, + -0.30229717, + -0.624056, + -0.6458885, + -0.66208386, + -0.5866134, + -0.7613628, + -0.98656195, + -0.98821944, + -0.99072844, + -0.98729765, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-seventh").to_path_buf(), + analysis: Analysis::new([ + 0.3853314, + -0.8475499, + -0.64330614, + -0.85917395, + -0.6624141, + -0.6356613, + -0.40988427, + -0.7480691, + 0.45981812, + 0.47096932, + -0.19245929, + -0.5228787, + -0.42246288, + -0.52656835, + -0.45702273, + -0.569838, + -0.97620565, + -0.97741324, + -0.9776932, + -0.98088175, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eight").to_path_buf(), + analysis: Analysis::new([ + 0.18926656, + -0.86667925, + -0.7294189, + -0.856192, + -0.7180501, + -0.66697484, + -0.6093149, + -0.82118326, + 0.3888924, + 0.42430043, + -0.4414854, + -0.6957753, + -0.7092425, + -0.68237424, + -0.55543846, + -0.77678657, + -0.98610276, + -0.98707336, + -0.99165493, + -0.99011236, + ]), + ..Default::default() + }, + ]; let mozart_piano_23 = [ - Song { - path: Path::new("path-to-ninth").to_path_buf(), - analysis: Analysis::new( - [0.38328362, -0.8752751, -0.8165319, -0.948534, -0.77668643, -0.9051969, -0.8473458, -0.88643366, 0.49641085, 0.5132351, -0.41367024, -0.5279201, -0.46787983, -0.49218357, -0.42164963, -0.6597451, -0.97317076, -0.9800342, -0.9832096, -0.98385316] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-tenth").to_path_buf(), - analysis: Analysis::new( - [0.4301988, -0.89864063, -0.84993315, -0.9518692, -0.8329567, -0.9293889, -0.8605237, -0.8901016, 0.35011983, 0.3822446, -0.6384951, -0.7537949, -0.5867439, -0.57371, -0.5662942, -0.76130676, -0.9845436, -0.9833387, -0.9902381, -0.9905396] -), - ..Default::default() - }, - Song { - path: Path::new("path-to-eleventh").to_path_buf(), - analysis: Analysis::new( - [0.42334664, -0.8632808, -0.80268145, -0.91918564, -0.7522441, -0.8721291, -0.81877685, -0.8166921, 0.53626525, 0.540933, -0.34771818, -0.45362264, -0.35523874, -0.4072432, -0.25506926, -0.553644, -0.9624399, -0.9706371, -0.9753268, -0.9764576] -), - ..Default::default() - }]; + Song { + path: Path::new("path-to-ninth").to_path_buf(), + analysis: Analysis::new([ + 0.38328362, + -0.8752751, + -0.8165319, + -0.948534, + -0.77668643, + -0.9051969, + -0.8473458, + -0.88643366, + 0.49641085, + 0.5132351, + -0.41367024, + -0.5279201, + -0.46787983, + -0.49218357, + -0.42164963, + -0.6597451, + -0.97317076, + -0.9800342, + -0.9832096, + -0.98385316, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-tenth").to_path_buf(), + analysis: Analysis::new([ + 0.4301988, + -0.89864063, + -0.84993315, + -0.9518692, + -0.8329567, + -0.9293889, + -0.8605237, + -0.8901016, + 0.35011983, + 0.3822446, + -0.6384951, + -0.7537949, + -0.5867439, + -0.57371, + -0.5662942, + -0.76130676, + -0.9845436, + -0.9833387, + -0.9902381, + -0.9905396, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eleventh").to_path_buf(), + analysis: Analysis::new([ + 0.42334664, + -0.8632808, + -0.80268145, + -0.91918564, + -0.7522441, + -0.8721291, + -0.81877685, + -0.8166921, + 0.53626525, + 0.540933, + -0.34771818, + -0.45362264, + -0.35523874, + -0.4072432, + -0.25506926, + -0.553644, + -0.9624399, + -0.9706371, + -0.9753268, + -0.9764576, + ]), + ..Default::default() + }, + ]; - let mut songs: Vec<&Song> = mozart_piano_19.iter().chain(kind_of_blue.iter()).chain(mozart_piano_23.iter()).collect(); + let mut songs: Vec<&Song> = mozart_piano_19 + .iter() + .chain(kind_of_blue.iter()) + .chain(mozart_piano_23.iter()) + .collect(); // We train the algorithm on one of the Mozart concertos, and the expectation is that the // tracks from the Miles Davis record will end up last. - let opts = ForestOptions { n_trees: 1000, sample_size: 200, max_tree_depth: None, extension_level: 10 }; - closest_to_songs(&mozart_piano_19.iter().collect::>(), &mut songs, &opts); + let opts = ForestOptions { + n_trees: 1000, + sample_size: 200, + max_tree_depth: None, + extension_level: 10, + }; + closest_to_songs( + &mozart_piano_19.iter().collect::>(), + &mut songs, + &opts, + ); for e in &kind_of_blue { - assert!(songs[songs.len()-5..].contains(&e)); + assert!(songs[songs.len() - 5..].contains(&e)); } } }