diff --git a/Cargo.lock b/Cargo.lock index 52f4773..c24a4d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,7 @@ dependencies = [ "bliss-audio-aubio-rs", "clap", "dirs", + "extended-isolation-forest", "ffmpeg-next", "ffmpeg-sys-next", "glob", @@ -396,6 +397,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "extended-isolation-forest" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db5193a74618ae2f7ea9c7feda2772192e0e3c04d9cbd2beb5ee9b0916d7eb3f" +dependencies = [ + "num-traits", + "rand 0.8.5", + "rand_distr", + "serde", +] + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -672,6 +685,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.0.1" @@ -889,6 +908,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1096,6 +1116,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rawpointer" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 401e17a..a142a95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ library = [ "dep:anyhow", "dep:serde_ini", "dep:serde_json", "dep:indicatif", ] -serde = ["dep:serde"] +serde = ["dep:serde", "extended-isolation-forest/serde"] [dependencies] # Until https://github.com/aubio/aubio/issues/336 is somehow solved @@ -54,6 +54,7 @@ thiserror = "1.0.40" strum = "0.24.1" strum_macros = "0.24.3" rcue = "0.1.3" +extended-isolation-forest = { version = "0.2.3", default-features = false } # Deps for the library feature serde = { version = "1.0", optional = true, features = ["derive"] } diff --git a/examples/distance.rs b/examples/distance.rs index 0fa2ac8..870eacd 100644 --- a/examples/distance.rs +++ b/examples/distance.rs @@ -1,4 +1,4 @@ -use bliss_audio::Song; +use bliss_audio::{playlist::euclidean_distance, Song}; use std::env; /** @@ -20,7 +20,7 @@ fn main() -> Result<(), String> { "d({:?}, {:?}) = {}", song1.path, song2.path, - song1.distance(&song2) + euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) ); Ok(()) } diff --git a/examples/library.rs b/examples/library.rs index f6e61d7..1cabb09 100644 --- a/examples/library.rs +++ b/examples/library.rs @@ -190,7 +190,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::<()>(song_path, playlist_length)?; + let songs = library.playlist_from::<()>(&[song_path], playlist_length)?; let song_paths = songs .into_iter() .map(|s| s.bliss_song.path.to_string_lossy().to_string()) diff --git a/examples/library_extra_info.rs b/examples/library_extra_info.rs index ec84cfd..a0d3167 100644 --- a/examples/library_extra_info.rs +++ b/examples/library_extra_info.rs @@ -208,7 +208,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::(song_path, playlist_length)?; + let songs = library.playlist_from::(&[song_path], playlist_length)?; let playlist = songs .into_iter() .map(|s| { diff --git a/examples/playlist.rs b/examples/playlist.rs index 86e118f..c106937 100644 --- a/examples/playlist.rs +++ b/examples/playlist.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use bliss_audio::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance}; +use bliss_audio::playlist::{closest_to_songs, dedup_playlist, euclidean_distance}; use bliss_audio::{analyze_paths, Song}; use clap::{App, Arg}; use glob::glob; @@ -77,7 +77,7 @@ fn main() -> Result<()> { .into_iter() .filter(|x| x == &first_song || paths.contains(&x.path.to_string_lossy().to_string())) .collect(); - closest_to_first_song(&first_song, &mut songs_to_chose_from, euclidean_distance); + closest_to_songs(&[first_song], &mut songs_to_chose_from, &euclidean_distance); dedup_playlist(&mut songs_to_chose_from, None); fs::write(analysis_path, serialized)?; diff --git a/src/lib.rs b/src/lib.rs index abfc207..2e6a51b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,13 +27,16 @@ //! //! ### Analyze & compute the distance between two songs //! ```no_run -//! use bliss_audio::{BlissResult, Song}; +//! use bliss_audio::{BlissResult, Song, playlist::euclidean_distance}; //! //! fn main() -> BlissResult<()> { //! let song1 = Song::from_path("/path/to/song1")?; //! let song2 = Song::from_path("/path/to/song2")?; //! -//! println!("Distance between song1 and song2 is {}", song1.distance(&song2)); +//! println!( +//! "Distance between song1 and song2 is {}", +//! euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) +//! ); //! Ok(()) //! } //! ``` @@ -42,7 +45,7 @@ //! ```no_run //! use bliss_audio::{ //! analyze_paths, -//! playlist::{closest_to_first_song, euclidean_distance}, +//! playlist::{closest_to_songs, euclidean_distance}, //! BlissResult, Song, //! }; //! @@ -53,7 +56,7 @@ //! // Assuming there is a first song //! let first_song = songs.first().unwrap().to_owned(); //! -//! closest_to_first_song(&first_song, &mut songs, euclidean_distance); +//! closest_to_songs(&[first_song], &mut songs, &euclidean_distance); //! //! println!("Playlist is:"); //! for song in songs { diff --git a/src/library.rs b/src/library.rs index 3fa8efa..79b1f1a 100644 --- a/src/library.rs +++ b/src/library.rs @@ -110,18 +110,16 @@ //! [Library] to implement bliss for a MPD player. use crate::analyze_paths_with_cores; use crate::cue::CueInfo; -use crate::playlist::closest_album_to_group_by_key; -use crate::playlist::closest_to_first_song_by_key; -use crate::playlist::dedup_playlist_by_key; -use crate::playlist::dedup_playlist_custom_distance_by_key; +use crate::playlist::closest_album_to_group; +use crate::playlist::closest_to_songs; +use crate::playlist::dedup_playlist_custom_distance; use crate::playlist::euclidean_distance; -use crate::playlist::DistanceMetric; +use crate::playlist::DistanceMetricBuilder; use anyhow::{bail, Context, Result}; #[cfg(not(test))] use dirs::data_local_dir; use indicatif::{ProgressBar, ProgressStyle}; use log::warn; -use noisy_float::prelude::*; use rusqlite::params; use rusqlite::params_from_iter; use rusqlite::Connection; @@ -340,6 +338,12 @@ pub struct LibrarySong { pub extra_info: T, } +impl AsRef for LibrarySong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } +} + // TODO add logging statement // TODO concrete examples // TODO example LibrarySong without any extra_info @@ -480,75 +484,62 @@ impl Library { Self::new(config) } - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`. + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// songs in the library at `song_path`. /// - /// It uses a simple euclidean distance between songs, and deduplicates songs - /// that are too close. + /// It uses the ExentedIsolationForest score as a distance between songs, and deduplicates + /// songs that are too close. + /// + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. pub fn playlist_from( &self, - song_path: &str, + song_paths: &[&str], playlist_length: usize, ) -> Result>> { - let first_song: LibrarySong = self.song_from_path(song_path)?; - let mut songs = self.songs_from_library()?; - closest_to_first_song_by_key( - &first_song, - &mut songs, - euclidean_distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); - songs.sort_by_cached_key(|song| n32(first_song.bliss_song.distance(&song.bliss_song))); - dedup_playlist_by_key(&mut songs, None, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); - songs.truncate(playlist_length); - Ok(songs) + self.playlist_from_custom( + song_paths, + playlist_length, + &euclidean_distance, + &mut closest_to_songs, + true, + ) } - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`, using distance metric `distance`, + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// song in the library at `song_paths`, using distance metric `distance`, /// the sorting function `sort_by` and deduplicating if `dedup` is set to /// `true`. /// /// You can use ready to use distance metrics such as - /// [euclidean_distance], and ready to use sorting functions like - /// [closest_to_first_song_by_key]. - /// - /// In most cases, you just want to use [Library::playlist_from]. - /// Use `playlist_from_custom` if you want to experiment with different - /// distance metrics / sorting functions. + /// [ExtendedIsolationForest], and ready to use sorting functions like + /// [closest_to_songs]. /// - /// Example: - /// `library.playlist_from_song_custom(song_path, 20, euclidean_distance, - /// closest_to_first_song_by_key, true)`. - /// TODO path here too - pub fn playlist_from_custom( + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. + pub fn playlist_from_custom< + T: Serialize + DeserializeOwned, + F: FnMut(&[LibrarySong], &mut [LibrarySong], &dyn DistanceMetricBuilder), + >( &self, - song_path: &str, + song_paths: &[&str], playlist_length: usize, - distance: G, - mut sort_by: F, + distance: &dyn DistanceMetricBuilder, + sort_by: &mut F, dedup: bool, - ) -> Result>> - where - F: FnMut(&LibrarySong, &mut Vec>, G, fn(&LibrarySong) -> Song), - G: DistanceMetric + Copy, - { - let first_song: LibrarySong = self.song_from_path(song_path).map_err(|_| { - BlissError::ProviderError(format!("song '{song_path}' has not been analyzed")) - })?; + ) -> Result>> { + let initial_songs: Vec> = song_paths + .iter() + .map(|s| { + self.song_from_path(s).map_err(|_| { + BlissError::ProviderError(format!("song '{s}' has not been analyzed")) + }) + }) + .collect::, BlissError>>()?; let mut songs = self.songs_from_library()?; - sort_by(&first_song, &mut songs, distance, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); + sort_by(&initial_songs, &mut songs, distance); if dedup { - dedup_playlist_custom_distance_by_key( - &mut songs, - None, - distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); + dedup_playlist_custom_distance(&mut songs, None, distance); } songs.truncate(playlist_length); Ok(songs) @@ -568,7 +559,7 @@ impl Library { let album = self.songs_from_album(&album_title)?; // Every song should be from the same album. Hopefully... let songs = self.songs_from_library()?; - let playlist = closest_album_to_group_by_key(album, songs, |s| s.bliss_song.to_owned())?; + let playlist = closest_album_to_group(album, songs)?; let mut album_count = 0; let mut index = 0; @@ -1890,11 +1881,19 @@ mod test { } } + fn first_factor_divided_by_30_distance(a: &Array1, b: &Array1) -> f32 { + ((a[1] - b[1]).abs() / 30.).floor() + } + + fn first_factor_distance(a: &Array1, b: &Array1) -> f32 { + (a[1] - b[1]).abs() + } + #[test] fn test_library_playlist_song_not_existing() { let (library, _temp_dir, _) = setup_test_library(); assert!(library - .playlist_from::("not-existing", 2) + .playlist_from::(&["not-existing"], 2) .is_err()); } @@ -1902,7 +1901,7 @@ mod test { fn test_library_playlist_crop() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 2).unwrap(); + library.playlist_from(&["/path/to/song2001"], 2).unwrap(); assert_eq!(2, songs.len()); } @@ -1910,7 +1909,7 @@ mod test { fn test_library_simple_playlist() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 20).unwrap(); + library.playlist_from(&["/path/to/song2001"], 20).unwrap(); assert_eq!( vec![ "/path/to/song2001", @@ -1931,14 +1930,12 @@ mod test { #[test] fn test_library_custom_playlist_distance() { let (library, _temp_dir, _) = setup_test_library(); - let distance = - |a: &Array1, b: &Array1| (a.get(1).unwrap() - b.get(1).unwrap()).abs(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -1959,15 +1956,12 @@ mod test { ) } - fn custom_sort( - _: &LibrarySong, - songs: &mut Vec>, - _distance: impl DistanceMetric, - key_fn: F, - ) where - F: Fn(&LibrarySong) -> Song, - { - songs.sort_by_key(|song| key_fn(song).path); + fn custom_sort( + _: &[LibrarySong], + songs: &mut [LibrarySong], + _distance: &dyn DistanceMetricBuilder, + ) { + songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); } #[test] @@ -1975,10 +1969,10 @@ mod test { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - euclidean_distance, - custom_sort, + &euclidean_distance, + &mut custom_sort, true, ) .unwrap(); @@ -2002,15 +1996,13 @@ mod test { #[test] fn test_library_custom_playlist_dedup() { let (library, _temp_dir, _) = setup_test_library(); - let distance = |a: &Array1, b: &Array1| { - ((a.get(1).unwrap() - b.get(1).unwrap()).abs() / 30.).floor() - }; + let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_divided_by_30_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -2026,14 +2018,12 @@ mod test { .collect::>(), ); - let distance = - |a: &Array1, b: &Array1| ((a.get(1).unwrap() - b.get(1).unwrap()).abs()); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_distance, + &mut closest_to_songs, false, ) .unwrap(); diff --git a/src/playlist.rs b/src/playlist.rs index 93477c1..b905862 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -7,17 +7,57 @@ //! They will yield different styles of playlists, so don't hesitate to //! experiment with them if the default (euclidean distance for now) doesn't //! suit you. -// TODO on the `by_key` functions: maybe Fn(&T) -> &Song is enough? Compared -// to -> Song use crate::{BlissError, BlissResult, Song, NUMBER_FEATURES}; +use extended_isolation_forest::{Forest, ForestOptions}; use ndarray::{Array, Array1, Array2, Axis}; use ndarray_stats::QuantileExt; use noisy_float::prelude::*; use std::collections::HashMap; -/// Convenience trait for user-defined distance metrics. -pub trait DistanceMetric: Fn(&Array1, &Array1) -> f32 {} -impl DistanceMetric for F where F: Fn(&Array1, &Array1) -> f32 {} +/// Trait for creating a distance metric, measuring the distance to a set of vectors. If this +/// metric requires any kind of training, this should be done in the build function so that the +/// returned DistanceMetric instance is already trained and ready to use. +/// +/// Currently, the best metric for measuring the distance to a set of songs is the extended +/// isolation forest (implemented on [ForestOptions]). For measuring the distance to a single song, +/// extended isolation forest doesn't work and [euclidean_distance] or [cosine_distance] are good +/// options. +pub trait DistanceMetricBuilder { + /// Build a distance metric that measures the distance to vectors. + fn build<'a>(&'a self, vectors: &[Array1]) -> Box; +} + +/// Measure the distance to a vector, from the vector(s) in the internal state of this metric. +pub trait DistanceMetric { + /// Return the distance from the set of vectors that this metric was built from. + fn distance(&self, vector: &Array1) -> f32; +} + +/// Convenience struct used for implementing DistanceMetric for plain functions. +pub struct FunctionDistanceMetric<'a, F: Fn(&Array1, &Array1) -> f32> { + func: &'a F, + state: Vec>, +} + +impl DistanceMetricBuilder for F +where + F: Fn(&Array1, &Array1) -> f32 + 'static, +{ + fn build<'a>(&'a self, vectors: &[Array1]) -> Box { + Box::new(FunctionDistanceMetric { + func: self, + state: vectors.iter().map(|s| s.to_owned()).collect(), + }) + } +} + +impl<'a, F: Fn(&Array1, &Array1) -> f32 + 'static> DistanceMetric + for FunctionDistanceMetric<'a, F> +{ + fn distance(&self, vector: &Array1) -> f32 { + self.state.iter().map(|v| (self.func)(v, vector)).sum() + } +} /// Return the [euclidean /// distance](https://en.wikipedia.org/wiki/Euclidean_distance#Higher_dimensions) @@ -39,55 +79,50 @@ pub fn cosine_distance(a: &Array1, b: &Array1) -> f32 { 1. - similarity } -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -pub fn closest_to_first_song( - first_song: &Song, - #[allow(clippy::ptr_arg)] songs: &mut Vec, - distance: impl DistanceMetric, -) { - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(song, &distance))); +fn feature_array1_to_array(f: &Array1) -> [f32; NUMBER_FEATURES] { + f.as_slice() + .expect("Couldn't convert feature vector to slice") + .try_into() + .expect("Couldn't convert slice to array") } -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -pub fn closest_to_first_song_by_key( - first_song: &T, - #[allow(clippy::ptr_arg)] songs: &mut Vec, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let first_song = key_fn(first_song); - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(&key_fn(song), &distance))); +impl DistanceMetricBuilder for ForestOptions { + fn build(&self, vectors: &[Array1]) -> Box { + let a = &*vectors + .iter() + .map(feature_array1_to_array) + .collect::>(); + + if self.sample_size > vectors.len() { + let mut opts = self.clone(); + opts.sample_size = self.sample_size.min(vectors.len()); + Box::new(Forest::from_slice(a, &opts).unwrap()) + } else { + Box::new(Forest::from_slice(a, self).unwrap()) + } + } } -/// Sort `songs` in place using the `distance` metric and ordering by -/// the smallest distance between each song. -/// -/// If the generated playlist is `[song1, song2, song3, song4]`, it means -/// song2 is closest to song1, song3 is closest to song2, and song4 is closest -/// to song3. -/// -/// Note that this has a tendency to go from one style to the other very fast, -/// and it can be slow on big libraries. -pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl DistanceMetric) { - let mut new_songs = Vec::with_capacity(songs.len()); - let mut song = first_song.to_owned(); - - while !songs.is_empty() { - let distances: Array1 = - Array::from_shape_fn(songs.len(), |i| song.custom_distance(&songs[i], &distance)); - let idx = distances.argmin().unwrap(); - song = songs[idx].to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); +impl DistanceMetric for Forest { + fn distance(&self, vector: &Array1) -> f32 { + self.score(&feature_array1_to_array(vector)) as f32 } - *songs = new_songs; +} + +/// Sort `candidate_songs` in place by putting songs close to `selected_songs` first +/// using the `distance` metric. +pub fn closest_to_songs>( + selected_songs: &[T], + candidate_songs: &mut [T], + metric_builder: &dyn DistanceMetricBuilder, +) { + let selected_songs = selected_songs + .iter() + .map(|c| c.as_ref().analysis.as_arr1()) + .collect::>(); + let metric = metric_builder.build(&selected_songs); + candidate_songs + .sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); } /// Sort `songs` in place using the `distance` metric and ordering by @@ -99,47 +134,29 @@ pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl Dis /// /// Note that this has a tendency to go from one style to the other very fast, /// and it can be slow on big libraries. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -// TODO: maybe Clone is not needed? -pub fn song_to_song_by_key( - first_song: &T, - songs: &mut Vec, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let mut new_songs: Vec = Vec::with_capacity(songs.len()); - let mut bliss_song = key_fn(&first_song.to_owned()); +pub fn song_to_song>( + from: &[T], + songs: &mut [T], + metric_builder: &dyn DistanceMetricBuilder, +) { + let mut vectors = from + .iter() + .map(|s| s.as_ref().analysis.as_arr1()) + .collect::>(); - while !songs.is_empty() { - let distances: Array1 = Array::from_shape_fn(songs.len(), |i| { - bliss_song.custom_distance(&key_fn(&songs[i]), &distance) - }); - let idx = distances.argmin().unwrap(); - let song = songs[idx].to_owned(); - bliss_song = key_fn(&songs[idx]).to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); + for i in 0..songs.len() { + { + let metric = metric_builder.build(&vectors); + let remaining_songs = &songs[i..]; + let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { + metric.distance(&remaining_songs[j].as_ref().analysis.as_arr1()) + }); + let idx = distances.argmin().unwrap(); + songs.swap(idx + i, i); + } + vectors.clear(); + vectors.push(songs[i].as_ref().analysis.as_arr1()); } - *songs = new_songs; -} - -/// Remove duplicate songs from a playlist, in place. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -pub fn dedup_playlist(songs: &mut Vec, distance_threshold: Option) { - dedup_playlist_custom_distance(songs, distance_threshold, euclidean_distance); } /// Remove duplicate songs from a playlist, in place. @@ -148,21 +165,13 @@ pub fn dedup_playlist(songs: &mut Vec, distance_threshold: Option) { /// non-empty title and artist name, or if they are close enough in terms /// of distance. /// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` you -/// want to deduplicate. -/// /// # Arguments /// /// * `songs`: The playlist to remove duplicates from. /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_by_key(songs: &mut Vec, distance_threshold: Option, key_fn: F) -where - F: Fn(&T) -> Song, -{ - dedup_playlist_custom_distance_by_key(songs, distance_threshold, euclidean_distance, key_fn); +pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Option) { + dedup_playlist_custom_distance(songs, distance_threshold, &euclidean_distance); } /// Remove duplicate songs from a playlist, in place, using a custom distance @@ -178,52 +187,17 @@ where /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. /// * `distance`: A custom distance metric. -pub fn dedup_playlist_custom_distance( - songs: &mut Vec, - distance_threshold: Option, - distance: impl DistanceMetric, -) { - songs.dedup_by(|s1, s2| { - n32(s1.custom_distance(s2, &distance)) < distance_threshold.unwrap_or(0.05) - || (s1.title.is_some() - && s2.title.is_some() - && s1.artist.is_some() - && s2.artist.is_some() - && s1.title == s2.title - && s1.artist == s2.artist) - }); -} - -/// Remove duplicate songs from a playlist, in place, using a custom distance -/// metric. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to deduplicate. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -/// * `distance`: A custom distance metric. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_custom_distance_by_key( +pub fn dedup_playlist_custom_distance>( songs: &mut Vec, distance_threshold: Option, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ + metric_builder: &dyn DistanceMetricBuilder, +) { songs.dedup_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); - n32(s1.custom_distance(&s2, &distance)) < distance_threshold.unwrap_or(0.05) + let s1 = s1.as_ref(); + let s2 = s2.as_ref(); + let vector = [s1.analysis.as_arr1()]; + let metric = metric_builder.build(&vector); + n32(metric.distance(&s2.analysis.as_arr1())) < distance_threshold.unwrap_or(0.05) || (s1.title.is_some() && s2.title.is_some() && s1.artist.is_some() @@ -253,136 +227,36 @@ pub fn dedup_playlist_custom_distance_by_key( /// A vector of songs, including `group` at the beginning, that you /// most likely want to plug in your audio player by using something like /// `ret.map(|song| song.path.to_owned()).collect::>()`. -pub fn closest_album_to_group(group: Vec, pool: Vec) -> BlissResult> { +pub fn closest_album_to_group + Clone>( + group: Vec, + pool: Vec, +) -> BlissResult> { let mut albums_analysis: HashMap<&str, Array2> = HashMap::new(); let mut albums = Vec::new(); // Remove songs from the group from the pool. let pool = pool .into_iter() - .filter(|s| !group.contains(s)) + .filter(|s| !group.iter().any(|gs| gs.as_ref() == s.as_ref())) .collect::>(); for song in &pool { - if let Some(album) = &song.album { + if let Some(album) = &song.as_ref().album { if let Some(analysis) = albums_analysis.get_mut(album as &str) { analysis - .push_row(song.analysis.as_arr1().view()) + .push_row(song.as_ref().analysis.as_arr1().view()) .map_err(|e| { BlissError::ProviderError(format!("while computing distances: {e}")) })?; } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); + let mut array = Array::zeros((1, song.as_ref().analysis.as_arr1().len())); + array.assign(&song.as_ref().analysis.as_arr1()); albums_analysis.insert(album, array); } } } let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - column.assign(&song.analysis.as_arr1()); - } - let first_analysis = group_analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - for (album, analysis) in albums_analysis.iter() { - let mean_analysis = analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - let album = album.to_owned(); - albums.push((album, mean_analysis.to_owned())); - } - - albums.sort_by_key(|(_, analysis)| n32(euclidean_distance(&first_analysis, analysis))); - let mut playlist = group; - for (album, _) in albums { - let mut al = pool - .iter() - .filter(|s| s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string()) - .map(|s| s.to_owned()) - .collect::>(); - al.sort_by(|s1, s2| { - let track_number1 = s1 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - let track_number2 = s2 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - if let Ok(x) = track_number1.parse::() { - if let Ok(y) = track_number2.parse::() { - return x.cmp(&y); - } - } - s1.track_number.cmp(&s2.track_number) - }); - playlist.extend_from_slice(&al); - } - Ok(playlist) -} - -/// Return a list of albums in a `pool` of songs that are similar to -/// songs in `group`, discarding songs that don't belong to an album. -/// It basically makes an "album" playlist from the `pool` of songs. -/// -/// `group` should be ordered by track number. -/// -/// Songs from `group` would usually just be songs from an album, but not -/// necessarily - they are discarded from `pool` no matter what. -/// -/// Order songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to order. -/// -/// # Arguments -/// -/// * `group` - A small group of songs, e.g. an album. -/// * `pool` - A pool of songs to find similar songs in, e.g. a user's song -/// library. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -/// -/// # Returns -/// -/// A vector of T, including `group` at the beginning, that you -/// most likely want to plug in your audio player by using something like -/// `ret.map(|song| song.path.to_owned()).collect::>()`. -// TODO: maybe Clone is not needed? -pub fn closest_album_to_group_by_key( - group: Vec, - pool: Vec, - key_fn: F, -) -> BlissResult> -where - F: Fn(&T) -> Song, -{ - let mut albums_analysis: HashMap> = HashMap::new(); - let mut albums = Vec::new(); - - // Remove songs from the group from the pool. - let pool = pool - .into_iter() - .filter(|s| !group.contains(s)) - .collect::>(); - for song in &pool { - let song = key_fn(song); - if let Some(album) = song.album { - if let Some(analysis) = albums_analysis.get_mut(&album as &str) { - analysis - .push_row(song.analysis.as_arr1().view()) - .map_err(|e| { - BlissError::ProviderError(format!("while computing distances: {e}")) - })?; - } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); - albums_analysis.insert(album.to_owned(), array); - } - } - } - let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); - for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - let song = key_fn(song); - column.assign(&song.analysis.as_arr1()); + column.assign(&song.as_ref().analysis.as_arr1()); } let first_analysis = group_analysis .mean_axis(Axis(0)) @@ -400,20 +274,17 @@ where for (album, _) in albums { let mut al = pool .iter() - .filter(|s| { - let s = key_fn(s); - s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string() - }) - .map(|s| s.to_owned()) + .filter(|s| s.as_ref().album.as_deref() == Some(album)) + .cloned() .collect::>(); al.sort_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); let track_number1 = s1 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); let track_number2 = s2 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); @@ -422,9 +293,9 @@ where return x.cmp(&y); } } - s1.track_number.cmp(&s2.track_number) + s1.as_ref().track_number.cmp(&s2.as_ref().track_number) }); - playlist.extend_from_slice(&al); + playlist.extend(al); } Ok(playlist) } @@ -442,6 +313,12 @@ mod test { bliss_song: Song, } + impl AsRef for CustomSong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } + } + #[test] fn test_dedup_playlist_custom_distance() { let first_song = Song { @@ -502,7 +379,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, euclidean_distance); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -519,7 +396,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), cosine_distance); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &euclidean_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -583,9 +460,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, None, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -602,9 +477,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, Some(20.), cosine_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &cosine_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -614,7 +487,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, Some(20.), |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, Some(20.)); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -624,7 +497,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, None, |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, None); assert_eq!( playlist, vec![ @@ -674,21 +547,21 @@ mod test { ..Default::default() }; let mut songs = vec![ - first_song.to_owned(), - third_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &third_song, + &first_song_dupe, + &second_song, + &fourth_song, ]; - song_to_song(&first_song, &mut songs, euclidean_distance); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); @@ -713,32 +586,30 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - second_song.to_owned(), + let mut songs: Vec<&CustomSong> = vec![ + &first_song, + &first_song_dupe, + &third_song, + &fourth_song, + &second_song, ]; - song_to_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song, - first_song_dupe, - second_song, - third_song, - fourth_song, + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); } #[test] - fn test_sort_closest_to_first_song() { + fn test_sort_closest_to_songs() { let first_song = Song { path: Path::new("path-to-first").to_path_buf(), analysis: Analysis::new([ @@ -783,15 +654,15 @@ mod test { ..Default::default() }; - let mut songs = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song(&first_song, &mut songs, euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); let first_song = CustomSong { bliss_song: first_song, @@ -819,28 +690,26 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, - vec![ - first_song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song + [ + &first_song, + &first_song_dupe, + &second_song, + &fourth_song, + &fifth_song, + &third_song ], ); } @@ -977,8 +846,331 @@ mod test { fourth_song.to_owned(), second_song.to_owned() ], - closest_album_to_group_by_key(group, pool.to_owned(), |s| s.bliss_song.to_owned()) - .unwrap(), + closest_album_to_group(group, pool.to_owned()).unwrap(), ); } + + // This test case is non-deterministic and could fail in rare cases. + #[test] + fn test_forest_options() { + // These songs contains analysis of actual music. Recordings of Mozart's piano concerto no. + // 19, Mozart's piano concerto no. 23, and tracks Miles Davis' "Kind Of Blue". + let mozart_piano_19 = [ + Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([ + 0.5522649, + -0.8664422, + -0.81236243, + -0.9475107, + -0.76129013, + -0.90520144, + -0.8474938, + -0.8924977, + 0.4956385, + 0.5076021, + -0.5037869, + -0.61038315, + -0.47157913, + -0.48194122, + -0.36397678, + -0.6443357, + -0.9713509, + -0.9781786, + -0.98285836, + -0.983834, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([ + 0.28091776, + -0.86352056, + -0.8175835, + -0.9497457, + -0.77833027, + -0.91656536, + -0.8477104, + -0.889485, + 0.41879785, + 0.45311546, + -0.6252063, + -0.6838323, + -0.5326821, + -0.63320035, + -0.5573063, + -0.7433087, + -0.9815542, + -0.98570454, + -0.98824924, + -0.9903612, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([ + 0.5978223, + -0.84076107, + -0.7841455, + -0.886415, + -0.72486377, + -0.8015111, + -0.79157853, + -0.7739525, + 0.517207, + 0.535398, + -0.30007458, + -0.3972137, + -0.41319674, + -0.40709, + -0.32283908, + -0.5261506, + -0.9656949, + -0.9715169, + -0.97524375, + -0.9756616, + ]), + ..Default::default() + }, + ]; + + let kind_of_blue = [ + Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([ + 0.35871255, + -0.8679545, + -0.6833263, + -0.87800264, + -0.7235142, + -0.73546195, + -0.48577756, + -0.7732977, + 0.51237035, + 0.5379869, + -0.00649637, + -0.534671, + -0.5743973, + -0.5706258, + -0.43162197, + -0.6356183, + -0.97918683, + -0.98091763, + -0.9845511, + -0.98359185, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([ + 0.2806753, + -0.85013694, + -0.66921043, + -0.8938313, + -0.6848732, + -0.75377, + -0.48747814, + -0.793482, + 0.44880342, + 0.461563, + -0.115760505, + -0.535959, + -0.5749081, + -0.55055845, + -0.37976396, + -0.538705, + -0.97972554, + -0.97890633, + -0.98290455, + -0.98231846, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-sixth").to_path_buf(), + analysis: Analysis::new([ + 0.1545173, + -0.8991263, + -0.79770947, + -0.87425447, + -0.77811325, + -0.71051484, + -0.7369138, + -0.8515074, + 0.387398, + 0.42035806, + -0.30229717, + -0.624056, + -0.6458885, + -0.66208386, + -0.5866134, + -0.7613628, + -0.98656195, + -0.98821944, + -0.99072844, + -0.98729765, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-seventh").to_path_buf(), + analysis: Analysis::new([ + 0.3853314, + -0.8475499, + -0.64330614, + -0.85917395, + -0.6624141, + -0.6356613, + -0.40988427, + -0.7480691, + 0.45981812, + 0.47096932, + -0.19245929, + -0.5228787, + -0.42246288, + -0.52656835, + -0.45702273, + -0.569838, + -0.97620565, + -0.97741324, + -0.9776932, + -0.98088175, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eight").to_path_buf(), + analysis: Analysis::new([ + 0.18926656, + -0.86667925, + -0.7294189, + -0.856192, + -0.7180501, + -0.66697484, + -0.6093149, + -0.82118326, + 0.3888924, + 0.42430043, + -0.4414854, + -0.6957753, + -0.7092425, + -0.68237424, + -0.55543846, + -0.77678657, + -0.98610276, + -0.98707336, + -0.99165493, + -0.99011236, + ]), + ..Default::default() + }, + ]; + + let mozart_piano_23 = [ + Song { + path: Path::new("path-to-ninth").to_path_buf(), + analysis: Analysis::new([ + 0.38328362, + -0.8752751, + -0.8165319, + -0.948534, + -0.77668643, + -0.9051969, + -0.8473458, + -0.88643366, + 0.49641085, + 0.5132351, + -0.41367024, + -0.5279201, + -0.46787983, + -0.49218357, + -0.42164963, + -0.6597451, + -0.97317076, + -0.9800342, + -0.9832096, + -0.98385316, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-tenth").to_path_buf(), + analysis: Analysis::new([ + 0.4301988, + -0.89864063, + -0.84993315, + -0.9518692, + -0.8329567, + -0.9293889, + -0.8605237, + -0.8901016, + 0.35011983, + 0.3822446, + -0.6384951, + -0.7537949, + -0.5867439, + -0.57371, + -0.5662942, + -0.76130676, + -0.9845436, + -0.9833387, + -0.9902381, + -0.9905396, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eleventh").to_path_buf(), + analysis: Analysis::new([ + 0.42334664, + -0.8632808, + -0.80268145, + -0.91918564, + -0.7522441, + -0.8721291, + -0.81877685, + -0.8166921, + 0.53626525, + 0.540933, + -0.34771818, + -0.45362264, + -0.35523874, + -0.4072432, + -0.25506926, + -0.553644, + -0.9624399, + -0.9706371, + -0.9753268, + -0.9764576, + ]), + ..Default::default() + }, + ]; + + let mut songs: Vec<&Song> = mozart_piano_19 + .iter() + .chain(kind_of_blue.iter()) + .chain(mozart_piano_23.iter()) + .collect(); + + // We train the algorithm on one of the Mozart concertos, and the expectation is that the + // tracks from the Miles Davis record will end up last. + let opts = ForestOptions { + n_trees: 1000, + sample_size: 200, + max_tree_depth: None, + extension_level: 10, + }; + closest_to_songs( + &mozart_piano_19.iter().collect::>(), + &mut songs, + &opts, + ); + for e in &kind_of_blue { + assert!(songs[songs.len() - 5..].contains(&e)); + } + } } diff --git a/src/song.rs b/src/song.rs index 673c7bc..33f8363 100644 --- a/src/song.rs +++ b/src/song.rs @@ -13,9 +13,6 @@ extern crate ndarray; use crate::chroma::ChromaDesc; use crate::cue::CueInfo; use crate::misc::LoudnessDesc; -#[cfg(doc)] -use crate::playlist; -use crate::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance, DistanceMetric}; use crate::temporal::BPMDesc; use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc}; use crate::{BlissError, BlissResult, SAMPLE_RATE}; @@ -79,6 +76,12 @@ pub struct Song { pub cue_info: Option, } +impl AsRef for Song { + fn as_ref(&self) -> &Song { + self + } +} + #[derive(Debug, EnumIter, EnumCount)] /// Indexes different fields of an [Analysis](Song::analysis). /// @@ -188,96 +191,9 @@ impl Analysis { pub fn as_vec(&self) -> Vec { self.internal_analysis.to_vec() } - - /// Compute distance between two analysis using a user-provided distance - /// metric. You most likely want to use `song.custom_distance` directly - /// rather than this function. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - distance(&self.as_arr1(), &other.as_arr1()) - } } impl Song { - #[allow(dead_code)] - /// Compute the distance between the current song and any given - /// Song. - /// - /// The smaller the number, the closer the songs; usually more useful - /// if compared between several songs - /// (e.g. if song1.distance(song2) < song1.distance(song3), then song1 is - /// closer to song2 than it is to song3. - /// - /// Currently uses the euclidean distance, but this can change in an - /// upcoming release if another metric performs better. - pub fn distance(&self, other: &Self) -> f32 { - self.analysis - .custom_distance(&other.analysis, euclidean_distance) - } - - /// Compute distance between two songs using a user-provided distance - /// metric. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - self.analysis.custom_distance(&other.analysis, distance) - } - - /// Orders songs in `pool` by proximity to `self`, using the distance - /// metric `distance` to compute the order. - /// Basically return a playlist from songs in `pool`, starting - /// from `self`, using `distance` (some distance metrics can - /// be found in the [playlist] module). - /// - /// Note that contrary to [Song::closest_from_pool], `self` is NOT added - /// to the beginning of the returned vector. - /// - /// No deduplication is ran either; if you're looking for something easy - /// that works "out of the box", use [Song::closest_from_pool]. - pub fn closest_from_pool_custom( - &self, - pool: Vec, - distance: impl DistanceMetric, - ) -> Vec { - let mut pool = pool; - closest_to_first_song(self, &mut pool, distance); - pool - } - - /// Order songs in `pool` by proximity to `self`. - /// Convenience method to return a playlist from songs in `pool`, - /// starting from `self`. - /// - /// The distance is already chosen, deduplication is ran, and the first song - /// is added to the top of the playlist, to make everything easier. - /// - /// If you want more control over which distance metric is chosen, - /// run deduplication manually, etc, use [Song::closest_from_pool_custom]. - pub fn closest_from_pool(&self, pool: Vec) -> Vec { - let mut playlist = vec![self.to_owned()]; - playlist.extend_from_slice(&pool); - closest_to_first_song(self, &mut playlist, euclidean_distance); - dedup_playlist(&mut playlist, None); - playlist - } - /// Returns a decoded [Song] given a file path, or an error if the song /// could not be analyzed for some reason. /// @@ -897,34 +813,6 @@ mod tests { assert!(sample_array.len() as f32 / (sample_array.capacity() as f32) < 1.); } - #[test] - fn test_analysis_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.distance(&b), 1.9469079) - } - - #[test] - fn test_analysis_distance_indiscernible() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., - 20., - ]); - assert_eq!(a.distance(&a), 0.) - } - #[test] fn test_decode_errors() { assert_eq!( @@ -973,106 +861,6 @@ mod tests { format!("{:?}", song.analysis), ); } - - fn dummy_distance(_: &Array1, _: &Array1) -> f32 { - 0. - } - - #[test] - fn test_custom_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.custom_distance(&b, dummy_distance), 0.); - } - - #[test] - fn test_closest_from_pool() { - let song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - let first_song_dupe = Song { - path: Path::new("path-to-dupe").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., - ]), - ..Default::default() - }; - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., - ]), - ..Default::default() - }; - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - let fifth_song = Song { - path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - - let songs = vec![ - song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), - ]; - let playlist = song.closest_from_pool(songs.to_owned()); - assert_eq!( - playlist, - vec![ - song.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - ], - ); - let playlist = song.closest_from_pool_custom(songs, euclidean_distance); - assert_eq!( - playlist, - vec![ - song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song - ], - ); - } } #[cfg(all(feature = "bench", test))]