From 41714640d950d96ed561e578109195eff5dd3843 Mon Sep 17 00:00:00 2001 From: Simon Teixidor Date: Mon, 1 Apr 2024 21:26:11 +0200 Subject: [PATCH] Playlists from multiple tracks (#72) * Enable playlists based on multiple songs This commit introduces functionality for generating playlist based on a set of songs. For good performance, I also introduce a new distance measure, extended isolation distance. While the previous distance metrics, euclidean distance and cosine distance, could be made to measure distance to a set of songs, the performance will not be as good. * Remove *_by_key family of functions Remove code duplication by making these functions generic over AsRef instead of having separate versions. * Get rid of circular dependency between playlist and song The playlist module depends on the song module, which in turn depends on the playlist module. This is confusing, and in this case also completely unecessary. * Unify distance metrics Allow euclidean distance and cosine distance to handle a set of songs as input. In doing so, we can also remove the ability to generate playlists from a single song, as that is just a special case of generating a playlist from many songs anyway. * Review comment: Improve documentation for single song use case * Add test for ForestOption as DistanceMetricBuilder. * Remove unecessary double-reference * Add missing test assertion. * Remove comment that is no longer relevant. * Fix extended isolation forest test case * Document suggestions and limitations for distance metric * cargo fmt --- CHANGELOG.md | 7 + Cargo.lock | 30 ++ Cargo.toml | 3 +- examples/distance.rs | 4 +- examples/library.rs | 2 +- examples/library_extra_info.rs | 2 +- examples/playlist.rs | 4 +- src/lib.rs | 11 +- src/library.rs | 170 ++++--- src/playlist.rs | 838 ++++++++++++++++++++------------- src/song.rs | 224 +-------- 11 files changed, 653 insertions(+), 642 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acbc0b..db5a8ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ #Changelog +## bliss 0.7.0 +* Add the possibility to make playlists based on multiple songs using extended + isolation forest (Thanks @SimonTeixidor!) +* Remove *_by_key family of functions (Thanks @SimonTeixidor!) +* Remove circular dependency between playlist and song by removing distances + from the `Song` struct (Thanks @SimonTeixidor!) + ## bliss 0.6.11 * Bump rust-ffmpeg to 6.1.1 to fix build for raspberry pis. diff --git a/Cargo.lock b/Cargo.lock index cac73c0..8974bf9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,7 @@ dependencies = [ "bliss-audio-aubio-rs", "clap", "dirs", + "extended-isolation-forest", "ffmpeg-next", "ffmpeg-sys-next", "glob", @@ -390,6 +391,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "extended-isolation-forest" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db5193a74618ae2f7ea9c7feda2772192e0e3c04d9cbd2beb5ee9b0916d7eb3f" +dependencies = [ + "num-traits", + "rand 0.8.5", + "rand_distr", + "serde", +] + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -666,6 +679,12 @@ dependencies = [ "windows-targets 0.52.4", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -881,6 +900,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1088,6 +1108,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rawpointer" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 401e17a..a142a95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ library = [ "dep:anyhow", "dep:serde_ini", "dep:serde_json", "dep:indicatif", ] -serde = ["dep:serde"] +serde = ["dep:serde", "extended-isolation-forest/serde"] [dependencies] # Until https://github.com/aubio/aubio/issues/336 is somehow solved @@ -54,6 +54,7 @@ thiserror = "1.0.40" strum = "0.24.1" strum_macros = "0.24.3" rcue = "0.1.3" +extended-isolation-forest = { version = "0.2.3", default-features = false } # Deps for the library feature serde = { version = "1.0", optional = true, features = ["derive"] } diff --git a/examples/distance.rs b/examples/distance.rs index 0fa2ac8..870eacd 100644 --- a/examples/distance.rs +++ b/examples/distance.rs @@ -1,4 +1,4 @@ -use bliss_audio::Song; +use bliss_audio::{playlist::euclidean_distance, Song}; use std::env; /** @@ -20,7 +20,7 @@ fn main() -> Result<(), String> { "d({:?}, {:?}) = {}", song1.path, song2.path, - song1.distance(&song2) + euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) ); Ok(()) } diff --git a/examples/library.rs b/examples/library.rs index f6e61d7..1cabb09 100644 --- a/examples/library.rs +++ b/examples/library.rs @@ -190,7 +190,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::<()>(song_path, playlist_length)?; + let songs = library.playlist_from::<()>(&[song_path], playlist_length)?; let song_paths = songs .into_iter() .map(|s| s.bliss_song.path.to_string_lossy().to_string()) diff --git a/examples/library_extra_info.rs b/examples/library_extra_info.rs index ec84cfd..a0d3167 100644 --- a/examples/library_extra_info.rs +++ b/examples/library_extra_info.rs @@ -208,7 +208,7 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::(song_path, playlist_length)?; + let songs = library.playlist_from::(&[song_path], playlist_length)?; let playlist = songs .into_iter() .map(|s| { diff --git a/examples/playlist.rs b/examples/playlist.rs index 86e118f..c106937 100644 --- a/examples/playlist.rs +++ b/examples/playlist.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use bliss_audio::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance}; +use bliss_audio::playlist::{closest_to_songs, dedup_playlist, euclidean_distance}; use bliss_audio::{analyze_paths, Song}; use clap::{App, Arg}; use glob::glob; @@ -77,7 +77,7 @@ fn main() -> Result<()> { .into_iter() .filter(|x| x == &first_song || paths.contains(&x.path.to_string_lossy().to_string())) .collect(); - closest_to_first_song(&first_song, &mut songs_to_chose_from, euclidean_distance); + closest_to_songs(&[first_song], &mut songs_to_chose_from, &euclidean_distance); dedup_playlist(&mut songs_to_chose_from, None); fs::write(analysis_path, serialized)?; diff --git a/src/lib.rs b/src/lib.rs index abfc207..2e6a51b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,13 +27,16 @@ //! //! ### Analyze & compute the distance between two songs //! ```no_run -//! use bliss_audio::{BlissResult, Song}; +//! use bliss_audio::{BlissResult, Song, playlist::euclidean_distance}; //! //! fn main() -> BlissResult<()> { //! let song1 = Song::from_path("/path/to/song1")?; //! let song2 = Song::from_path("/path/to/song2")?; //! -//! println!("Distance between song1 and song2 is {}", song1.distance(&song2)); +//! println!( +//! "Distance between song1 and song2 is {}", +//! euclidean_distance(&song1.analysis.as_arr1(), &song2.analysis.as_arr1()) +//! ); //! Ok(()) //! } //! ``` @@ -42,7 +45,7 @@ //! ```no_run //! use bliss_audio::{ //! analyze_paths, -//! playlist::{closest_to_first_song, euclidean_distance}, +//! playlist::{closest_to_songs, euclidean_distance}, //! BlissResult, Song, //! }; //! @@ -53,7 +56,7 @@ //! // Assuming there is a first song //! let first_song = songs.first().unwrap().to_owned(); //! -//! closest_to_first_song(&first_song, &mut songs, euclidean_distance); +//! closest_to_songs(&[first_song], &mut songs, &euclidean_distance); //! //! println!("Playlist is:"); //! for song in songs { diff --git a/src/library.rs b/src/library.rs index 3fa8efa..79b1f1a 100644 --- a/src/library.rs +++ b/src/library.rs @@ -110,18 +110,16 @@ //! [Library] to implement bliss for a MPD player. use crate::analyze_paths_with_cores; use crate::cue::CueInfo; -use crate::playlist::closest_album_to_group_by_key; -use crate::playlist::closest_to_first_song_by_key; -use crate::playlist::dedup_playlist_by_key; -use crate::playlist::dedup_playlist_custom_distance_by_key; +use crate::playlist::closest_album_to_group; +use crate::playlist::closest_to_songs; +use crate::playlist::dedup_playlist_custom_distance; use crate::playlist::euclidean_distance; -use crate::playlist::DistanceMetric; +use crate::playlist::DistanceMetricBuilder; use anyhow::{bail, Context, Result}; #[cfg(not(test))] use dirs::data_local_dir; use indicatif::{ProgressBar, ProgressStyle}; use log::warn; -use noisy_float::prelude::*; use rusqlite::params; use rusqlite::params_from_iter; use rusqlite::Connection; @@ -340,6 +338,12 @@ pub struct LibrarySong { pub extra_info: T, } +impl AsRef for LibrarySong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } +} + // TODO add logging statement // TODO concrete examples // TODO example LibrarySong without any extra_info @@ -480,75 +484,62 @@ impl Library { Self::new(config) } - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`. + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// songs in the library at `song_path`. /// - /// It uses a simple euclidean distance between songs, and deduplicates songs - /// that are too close. + /// It uses the ExentedIsolationForest score as a distance between songs, and deduplicates + /// songs that are too close. + /// + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. pub fn playlist_from( &self, - song_path: &str, + song_paths: &[&str], playlist_length: usize, ) -> Result>> { - let first_song: LibrarySong = self.song_from_path(song_path)?; - let mut songs = self.songs_from_library()?; - closest_to_first_song_by_key( - &first_song, - &mut songs, - euclidean_distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); - songs.sort_by_cached_key(|song| n32(first_song.bliss_song.distance(&song.bliss_song))); - dedup_playlist_by_key(&mut songs, None, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); - songs.truncate(playlist_length); - Ok(songs) + self.playlist_from_custom( + song_paths, + playlist_length, + &euclidean_distance, + &mut closest_to_songs, + true, + ) } - /// Build a playlist of `playlist_length` items from an already analyzed - /// song in the library at `song_path`, using distance metric `distance`, + /// Build a playlist of `playlist_length` items from a set of already analyzed + /// song in the library at `song_paths`, using distance metric `distance`, /// the sorting function `sort_by` and deduplicating if `dedup` is set to /// `true`. /// /// You can use ready to use distance metrics such as - /// [euclidean_distance], and ready to use sorting functions like - /// [closest_to_first_song_by_key]. - /// - /// In most cases, you just want to use [Library::playlist_from]. - /// Use `playlist_from_custom` if you want to experiment with different - /// distance metrics / sorting functions. + /// [ExtendedIsolationForest], and ready to use sorting functions like + /// [closest_to_songs]. /// - /// Example: - /// `library.playlist_from_song_custom(song_path, 20, euclidean_distance, - /// closest_to_first_song_by_key, true)`. - /// TODO path here too - pub fn playlist_from_custom( + /// Generating a playlist from a single song is also possible, and is just the special case + /// where song_paths is a slice of length 1. + pub fn playlist_from_custom< + T: Serialize + DeserializeOwned, + F: FnMut(&[LibrarySong], &mut [LibrarySong], &dyn DistanceMetricBuilder), + >( &self, - song_path: &str, + song_paths: &[&str], playlist_length: usize, - distance: G, - mut sort_by: F, + distance: &dyn DistanceMetricBuilder, + sort_by: &mut F, dedup: bool, - ) -> Result>> - where - F: FnMut(&LibrarySong, &mut Vec>, G, fn(&LibrarySong) -> Song), - G: DistanceMetric + Copy, - { - let first_song: LibrarySong = self.song_from_path(song_path).map_err(|_| { - BlissError::ProviderError(format!("song '{song_path}' has not been analyzed")) - })?; + ) -> Result>> { + let initial_songs: Vec> = song_paths + .iter() + .map(|s| { + self.song_from_path(s).map_err(|_| { + BlissError::ProviderError(format!("song '{s}' has not been analyzed")) + }) + }) + .collect::, BlissError>>()?; let mut songs = self.songs_from_library()?; - sort_by(&first_song, &mut songs, distance, |s: &LibrarySong| { - s.bliss_song.to_owned() - }); + sort_by(&initial_songs, &mut songs, distance); if dedup { - dedup_playlist_custom_distance_by_key( - &mut songs, - None, - distance, - |s: &LibrarySong| s.bliss_song.to_owned(), - ); + dedup_playlist_custom_distance(&mut songs, None, distance); } songs.truncate(playlist_length); Ok(songs) @@ -568,7 +559,7 @@ impl Library { let album = self.songs_from_album(&album_title)?; // Every song should be from the same album. Hopefully... let songs = self.songs_from_library()?; - let playlist = closest_album_to_group_by_key(album, songs, |s| s.bliss_song.to_owned())?; + let playlist = closest_album_to_group(album, songs)?; let mut album_count = 0; let mut index = 0; @@ -1890,11 +1881,19 @@ mod test { } } + fn first_factor_divided_by_30_distance(a: &Array1, b: &Array1) -> f32 { + ((a[1] - b[1]).abs() / 30.).floor() + } + + fn first_factor_distance(a: &Array1, b: &Array1) -> f32 { + (a[1] - b[1]).abs() + } + #[test] fn test_library_playlist_song_not_existing() { let (library, _temp_dir, _) = setup_test_library(); assert!(library - .playlist_from::("not-existing", 2) + .playlist_from::(&["not-existing"], 2) .is_err()); } @@ -1902,7 +1901,7 @@ mod test { fn test_library_playlist_crop() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 2).unwrap(); + library.playlist_from(&["/path/to/song2001"], 2).unwrap(); assert_eq!(2, songs.len()); } @@ -1910,7 +1909,7 @@ mod test { fn test_library_simple_playlist() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = - library.playlist_from("/path/to/song2001", 20).unwrap(); + library.playlist_from(&["/path/to/song2001"], 20).unwrap(); assert_eq!( vec![ "/path/to/song2001", @@ -1931,14 +1930,12 @@ mod test { #[test] fn test_library_custom_playlist_distance() { let (library, _temp_dir, _) = setup_test_library(); - let distance = - |a: &Array1, b: &Array1| (a.get(1).unwrap() - b.get(1).unwrap()).abs(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -1959,15 +1956,12 @@ mod test { ) } - fn custom_sort( - _: &LibrarySong, - songs: &mut Vec>, - _distance: impl DistanceMetric, - key_fn: F, - ) where - F: Fn(&LibrarySong) -> Song, - { - songs.sort_by_key(|song| key_fn(song).path); + fn custom_sort( + _: &[LibrarySong], + songs: &mut [LibrarySong], + _distance: &dyn DistanceMetricBuilder, + ) { + songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); } #[test] @@ -1975,10 +1969,10 @@ mod test { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - euclidean_distance, - custom_sort, + &euclidean_distance, + &mut custom_sort, true, ) .unwrap(); @@ -2002,15 +1996,13 @@ mod test { #[test] fn test_library_custom_playlist_dedup() { let (library, _temp_dir, _) = setup_test_library(); - let distance = |a: &Array1, b: &Array1| { - ((a.get(1).unwrap() - b.get(1).unwrap()).abs() / 30.).floor() - }; + let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_divided_by_30_distance, + &mut closest_to_songs, true, ) .unwrap(); @@ -2026,14 +2018,12 @@ mod test { .collect::>(), ); - let distance = - |a: &Array1, b: &Array1| ((a.get(1).unwrap() - b.get(1).unwrap()).abs()); let songs: Vec> = library .playlist_from_custom( - "/path/to/song2001", + &["/path/to/song2001"], 20, - distance, - closest_to_first_song_by_key, + &first_factor_distance, + &mut closest_to_songs, false, ) .unwrap(); diff --git a/src/playlist.rs b/src/playlist.rs index 93477c1..b905862 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -7,17 +7,57 @@ //! They will yield different styles of playlists, so don't hesitate to //! experiment with them if the default (euclidean distance for now) doesn't //! suit you. -// TODO on the `by_key` functions: maybe Fn(&T) -> &Song is enough? Compared -// to -> Song use crate::{BlissError, BlissResult, Song, NUMBER_FEATURES}; +use extended_isolation_forest::{Forest, ForestOptions}; use ndarray::{Array, Array1, Array2, Axis}; use ndarray_stats::QuantileExt; use noisy_float::prelude::*; use std::collections::HashMap; -/// Convenience trait for user-defined distance metrics. -pub trait DistanceMetric: Fn(&Array1, &Array1) -> f32 {} -impl DistanceMetric for F where F: Fn(&Array1, &Array1) -> f32 {} +/// Trait for creating a distance metric, measuring the distance to a set of vectors. If this +/// metric requires any kind of training, this should be done in the build function so that the +/// returned DistanceMetric instance is already trained and ready to use. +/// +/// Currently, the best metric for measuring the distance to a set of songs is the extended +/// isolation forest (implemented on [ForestOptions]). For measuring the distance to a single song, +/// extended isolation forest doesn't work and [euclidean_distance] or [cosine_distance] are good +/// options. +pub trait DistanceMetricBuilder { + /// Build a distance metric that measures the distance to vectors. + fn build<'a>(&'a self, vectors: &[Array1]) -> Box; +} + +/// Measure the distance to a vector, from the vector(s) in the internal state of this metric. +pub trait DistanceMetric { + /// Return the distance from the set of vectors that this metric was built from. + fn distance(&self, vector: &Array1) -> f32; +} + +/// Convenience struct used for implementing DistanceMetric for plain functions. +pub struct FunctionDistanceMetric<'a, F: Fn(&Array1, &Array1) -> f32> { + func: &'a F, + state: Vec>, +} + +impl DistanceMetricBuilder for F +where + F: Fn(&Array1, &Array1) -> f32 + 'static, +{ + fn build<'a>(&'a self, vectors: &[Array1]) -> Box { + Box::new(FunctionDistanceMetric { + func: self, + state: vectors.iter().map(|s| s.to_owned()).collect(), + }) + } +} + +impl<'a, F: Fn(&Array1, &Array1) -> f32 + 'static> DistanceMetric + for FunctionDistanceMetric<'a, F> +{ + fn distance(&self, vector: &Array1) -> f32 { + self.state.iter().map(|v| (self.func)(v, vector)).sum() + } +} /// Return the [euclidean /// distance](https://en.wikipedia.org/wiki/Euclidean_distance#Higher_dimensions) @@ -39,55 +79,50 @@ pub fn cosine_distance(a: &Array1, b: &Array1) -> f32 { 1. - similarity } -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -pub fn closest_to_first_song( - first_song: &Song, - #[allow(clippy::ptr_arg)] songs: &mut Vec, - distance: impl DistanceMetric, -) { - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(song, &distance))); +fn feature_array1_to_array(f: &Array1) -> [f32; NUMBER_FEATURES] { + f.as_slice() + .expect("Couldn't convert feature vector to slice") + .try_into() + .expect("Couldn't convert slice to array") } -/// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -pub fn closest_to_first_song_by_key( - first_song: &T, - #[allow(clippy::ptr_arg)] songs: &mut Vec, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let first_song = key_fn(first_song); - songs.sort_by_cached_key(|song| n32(first_song.custom_distance(&key_fn(song), &distance))); +impl DistanceMetricBuilder for ForestOptions { + fn build(&self, vectors: &[Array1]) -> Box { + let a = &*vectors + .iter() + .map(feature_array1_to_array) + .collect::>(); + + if self.sample_size > vectors.len() { + let mut opts = self.clone(); + opts.sample_size = self.sample_size.min(vectors.len()); + Box::new(Forest::from_slice(a, &opts).unwrap()) + } else { + Box::new(Forest::from_slice(a, self).unwrap()) + } + } } -/// Sort `songs` in place using the `distance` metric and ordering by -/// the smallest distance between each song. -/// -/// If the generated playlist is `[song1, song2, song3, song4]`, it means -/// song2 is closest to song1, song3 is closest to song2, and song4 is closest -/// to song3. -/// -/// Note that this has a tendency to go from one style to the other very fast, -/// and it can be slow on big libraries. -pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl DistanceMetric) { - let mut new_songs = Vec::with_capacity(songs.len()); - let mut song = first_song.to_owned(); - - while !songs.is_empty() { - let distances: Array1 = - Array::from_shape_fn(songs.len(), |i| song.custom_distance(&songs[i], &distance)); - let idx = distances.argmin().unwrap(); - song = songs[idx].to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); +impl DistanceMetric for Forest { + fn distance(&self, vector: &Array1) -> f32 { + self.score(&feature_array1_to_array(vector)) as f32 } - *songs = new_songs; +} + +/// Sort `candidate_songs` in place by putting songs close to `selected_songs` first +/// using the `distance` metric. +pub fn closest_to_songs>( + selected_songs: &[T], + candidate_songs: &mut [T], + metric_builder: &dyn DistanceMetricBuilder, +) { + let selected_songs = selected_songs + .iter() + .map(|c| c.as_ref().analysis.as_arr1()) + .collect::>(); + let metric = metric_builder.build(&selected_songs); + candidate_songs + .sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); } /// Sort `songs` in place using the `distance` metric and ordering by @@ -99,47 +134,29 @@ pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl Dis /// /// Note that this has a tendency to go from one style to the other very fast, /// and it can be slow on big libraries. -/// -/// Sort songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -// TODO: maybe Clone is not needed? -pub fn song_to_song_by_key( - first_song: &T, - songs: &mut Vec, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ - let mut new_songs: Vec = Vec::with_capacity(songs.len()); - let mut bliss_song = key_fn(&first_song.to_owned()); +pub fn song_to_song>( + from: &[T], + songs: &mut [T], + metric_builder: &dyn DistanceMetricBuilder, +) { + let mut vectors = from + .iter() + .map(|s| s.as_ref().analysis.as_arr1()) + .collect::>(); - while !songs.is_empty() { - let distances: Array1 = Array::from_shape_fn(songs.len(), |i| { - bliss_song.custom_distance(&key_fn(&songs[i]), &distance) - }); - let idx = distances.argmin().unwrap(); - let song = songs[idx].to_owned(); - bliss_song = key_fn(&songs[idx]).to_owned(); - new_songs.push(song.to_owned()); - songs.retain(|s| s != &song); + for i in 0..songs.len() { + { + let metric = metric_builder.build(&vectors); + let remaining_songs = &songs[i..]; + let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { + metric.distance(&remaining_songs[j].as_ref().analysis.as_arr1()) + }); + let idx = distances.argmin().unwrap(); + songs.swap(idx + i, i); + } + vectors.clear(); + vectors.push(songs[i].as_ref().analysis.as_arr1()); } - *songs = new_songs; -} - -/// Remove duplicate songs from a playlist, in place. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -pub fn dedup_playlist(songs: &mut Vec, distance_threshold: Option) { - dedup_playlist_custom_distance(songs, distance_threshold, euclidean_distance); } /// Remove duplicate songs from a playlist, in place. @@ -148,21 +165,13 @@ pub fn dedup_playlist(songs: &mut Vec, distance_threshold: Option) { /// non-empty title and artist name, or if they are close enough in terms /// of distance. /// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` you -/// want to deduplicate. -/// /// # Arguments /// /// * `songs`: The playlist to remove duplicates from. /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_by_key(songs: &mut Vec, distance_threshold: Option, key_fn: F) -where - F: Fn(&T) -> Song, -{ - dedup_playlist_custom_distance_by_key(songs, distance_threshold, euclidean_distance, key_fn); +pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Option) { + dedup_playlist_custom_distance(songs, distance_threshold, &euclidean_distance); } /// Remove duplicate songs from a playlist, in place, using a custom distance @@ -178,52 +187,17 @@ where /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. /// * `distance`: A custom distance metric. -pub fn dedup_playlist_custom_distance( - songs: &mut Vec, - distance_threshold: Option, - distance: impl DistanceMetric, -) { - songs.dedup_by(|s1, s2| { - n32(s1.custom_distance(s2, &distance)) < distance_threshold.unwrap_or(0.05) - || (s1.title.is_some() - && s2.title.is_some() - && s1.artist.is_some() - && s2.artist.is_some() - && s1.title == s2.title - && s1.artist == s2.artist) - }); -} - -/// Remove duplicate songs from a playlist, in place, using a custom distance -/// metric. -/// -/// Two songs are considered duplicates if they either have the same, -/// non-empty title and artist name, or if they are close enough in terms -/// of distance. -/// -/// Dedup songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to deduplicate. -/// -/// # Arguments -/// -/// * `songs`: The playlist to remove duplicates from. -/// * `distance_threshold`: The distance threshold under which two songs are -/// considered identical. If `None`, a default value of 0.05 will be used. -/// * `distance`: A custom distance metric. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -pub fn dedup_playlist_custom_distance_by_key( +pub fn dedup_playlist_custom_distance>( songs: &mut Vec, distance_threshold: Option, - distance: impl DistanceMetric, - key_fn: F, -) where - F: Fn(&T) -> Song, -{ + metric_builder: &dyn DistanceMetricBuilder, +) { songs.dedup_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); - n32(s1.custom_distance(&s2, &distance)) < distance_threshold.unwrap_or(0.05) + let s1 = s1.as_ref(); + let s2 = s2.as_ref(); + let vector = [s1.analysis.as_arr1()]; + let metric = metric_builder.build(&vector); + n32(metric.distance(&s2.analysis.as_arr1())) < distance_threshold.unwrap_or(0.05) || (s1.title.is_some() && s2.title.is_some() && s1.artist.is_some() @@ -253,136 +227,36 @@ pub fn dedup_playlist_custom_distance_by_key( /// A vector of songs, including `group` at the beginning, that you /// most likely want to plug in your audio player by using something like /// `ret.map(|song| song.path.to_owned()).collect::>()`. -pub fn closest_album_to_group(group: Vec, pool: Vec) -> BlissResult> { +pub fn closest_album_to_group + Clone>( + group: Vec, + pool: Vec, +) -> BlissResult> { let mut albums_analysis: HashMap<&str, Array2> = HashMap::new(); let mut albums = Vec::new(); // Remove songs from the group from the pool. let pool = pool .into_iter() - .filter(|s| !group.contains(s)) + .filter(|s| !group.iter().any(|gs| gs.as_ref() == s.as_ref())) .collect::>(); for song in &pool { - if let Some(album) = &song.album { + if let Some(album) = &song.as_ref().album { if let Some(analysis) = albums_analysis.get_mut(album as &str) { analysis - .push_row(song.analysis.as_arr1().view()) + .push_row(song.as_ref().analysis.as_arr1().view()) .map_err(|e| { BlissError::ProviderError(format!("while computing distances: {e}")) })?; } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); + let mut array = Array::zeros((1, song.as_ref().analysis.as_arr1().len())); + array.assign(&song.as_ref().analysis.as_arr1()); albums_analysis.insert(album, array); } } } let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - column.assign(&song.analysis.as_arr1()); - } - let first_analysis = group_analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - for (album, analysis) in albums_analysis.iter() { - let mean_analysis = analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - let album = album.to_owned(); - albums.push((album, mean_analysis.to_owned())); - } - - albums.sort_by_key(|(_, analysis)| n32(euclidean_distance(&first_analysis, analysis))); - let mut playlist = group; - for (album, _) in albums { - let mut al = pool - .iter() - .filter(|s| s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string()) - .map(|s| s.to_owned()) - .collect::>(); - al.sort_by(|s1, s2| { - let track_number1 = s1 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - let track_number2 = s2 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - if let Ok(x) = track_number1.parse::() { - if let Ok(y) = track_number2.parse::() { - return x.cmp(&y); - } - } - s1.track_number.cmp(&s2.track_number) - }); - playlist.extend_from_slice(&al); - } - Ok(playlist) -} - -/// Return a list of albums in a `pool` of songs that are similar to -/// songs in `group`, discarding songs that don't belong to an album. -/// It basically makes an "album" playlist from the `pool` of songs. -/// -/// `group` should be ordered by track number. -/// -/// Songs from `group` would usually just be songs from an album, but not -/// necessarily - they are discarded from `pool` no matter what. -/// -/// Order songs with a key extraction function, useful for when you have a -/// structure like `CustomSong { bliss_song: Song, something_else: bool }` -/// you want to order. -/// -/// # Arguments -/// -/// * `group` - A small group of songs, e.g. an album. -/// * `pool` - A pool of songs to find similar songs in, e.g. a user's song -/// library. -/// * `key_fn`: A function used to retrieve the bliss [Song] from `T`. -/// -/// # Returns -/// -/// A vector of T, including `group` at the beginning, that you -/// most likely want to plug in your audio player by using something like -/// `ret.map(|song| song.path.to_owned()).collect::>()`. -// TODO: maybe Clone is not needed? -pub fn closest_album_to_group_by_key( - group: Vec, - pool: Vec, - key_fn: F, -) -> BlissResult> -where - F: Fn(&T) -> Song, -{ - let mut albums_analysis: HashMap> = HashMap::new(); - let mut albums = Vec::new(); - - // Remove songs from the group from the pool. - let pool = pool - .into_iter() - .filter(|s| !group.contains(s)) - .collect::>(); - for song in &pool { - let song = key_fn(song); - if let Some(album) = song.album { - if let Some(analysis) = albums_analysis.get_mut(&album as &str) { - analysis - .push_row(song.analysis.as_arr1().view()) - .map_err(|e| { - BlissError::ProviderError(format!("while computing distances: {e}")) - })?; - } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); - albums_analysis.insert(album.to_owned(), array); - } - } - } - let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); - for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { - let song = key_fn(song); - column.assign(&song.analysis.as_arr1()); + column.assign(&song.as_ref().analysis.as_arr1()); } let first_analysis = group_analysis .mean_axis(Axis(0)) @@ -400,20 +274,17 @@ where for (album, _) in albums { let mut al = pool .iter() - .filter(|s| { - let s = key_fn(s); - s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string() - }) - .map(|s| s.to_owned()) + .filter(|s| s.as_ref().album.as_deref() == Some(album)) + .cloned() .collect::>(); al.sort_by(|s1, s2| { - let s1 = key_fn(s1); - let s2 = key_fn(s2); let track_number1 = s1 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); let track_number2 = s2 + .as_ref() .track_number .to_owned() .unwrap_or_else(|| String::from("")); @@ -422,9 +293,9 @@ where return x.cmp(&y); } } - s1.track_number.cmp(&s2.track_number) + s1.as_ref().track_number.cmp(&s2.as_ref().track_number) }); - playlist.extend_from_slice(&al); + playlist.extend(al); } Ok(playlist) } @@ -442,6 +313,12 @@ mod test { bliss_song: Song, } + impl AsRef for CustomSong { + fn as_ref(&self) -> &Song { + &self.bliss_song + } + } + #[test] fn test_dedup_playlist_custom_distance() { let first_song = Song { @@ -502,7 +379,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, euclidean_distance); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -519,7 +396,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), cosine_distance); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &euclidean_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -583,9 +460,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, None, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); assert_eq!( playlist, vec![ @@ -602,9 +477,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance_by_key(&mut playlist, Some(20.), cosine_distance, |s| { - s.bliss_song.to_owned() - }); + dedup_playlist_custom_distance(&mut playlist, Some(20.), &cosine_distance); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -614,7 +487,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, Some(20.), |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, Some(20.)); assert_eq!(playlist, vec![first_song.to_owned()]); let mut playlist = vec![ first_song.to_owned(), @@ -624,7 +497,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_by_key(&mut playlist, None, |s| s.bliss_song.to_owned()); + dedup_playlist(&mut playlist, None); assert_eq!( playlist, vec![ @@ -674,21 +547,21 @@ mod test { ..Default::default() }; let mut songs = vec![ - first_song.to_owned(), - third_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &third_song, + &first_song_dupe, + &second_song, + &fourth_song, ]; - song_to_song(&first_song, &mut songs, euclidean_distance); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); @@ -713,32 +586,30 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - second_song.to_owned(), + let mut songs: Vec<&CustomSong> = vec![ + &first_song, + &first_song_dupe, + &third_song, + &fourth_song, + &second_song, ]; - song_to_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + song_to_song(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, vec![ - first_song, - first_song_dupe, - second_song, - third_song, - fourth_song, + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, ], ); } #[test] - fn test_sort_closest_to_first_song() { + fn test_sort_closest_to_songs() { let first_song = Song { path: Path::new("path-to-first").to_path_buf(), analysis: Analysis::new([ @@ -783,15 +654,15 @@ mod test { ..Default::default() }; - let mut songs = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song(&first_song, &mut songs, euclidean_distance); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); let first_song = CustomSong { bliss_song: first_song, @@ -819,28 +690,26 @@ mod test { something: true, }; - let mut songs: Vec = vec![ - first_song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), + let mut songs = [ + &first_song, + &first_song_dupe, + &second_song, + &third_song, + &fourth_song, + &fifth_song, ]; - closest_to_first_song_by_key(&first_song, &mut songs, euclidean_distance, |s| { - s.bliss_song.to_owned() - }); + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); assert_eq!( songs, - vec![ - first_song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song + [ + &first_song, + &first_song_dupe, + &second_song, + &fourth_song, + &fifth_song, + &third_song ], ); } @@ -977,8 +846,331 @@ mod test { fourth_song.to_owned(), second_song.to_owned() ], - closest_album_to_group_by_key(group, pool.to_owned(), |s| s.bliss_song.to_owned()) - .unwrap(), + closest_album_to_group(group, pool.to_owned()).unwrap(), ); } + + // This test case is non-deterministic and could fail in rare cases. + #[test] + fn test_forest_options() { + // These songs contains analysis of actual music. Recordings of Mozart's piano concerto no. + // 19, Mozart's piano concerto no. 23, and tracks Miles Davis' "Kind Of Blue". + let mozart_piano_19 = [ + Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([ + 0.5522649, + -0.8664422, + -0.81236243, + -0.9475107, + -0.76129013, + -0.90520144, + -0.8474938, + -0.8924977, + 0.4956385, + 0.5076021, + -0.5037869, + -0.61038315, + -0.47157913, + -0.48194122, + -0.36397678, + -0.6443357, + -0.9713509, + -0.9781786, + -0.98285836, + -0.983834, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([ + 0.28091776, + -0.86352056, + -0.8175835, + -0.9497457, + -0.77833027, + -0.91656536, + -0.8477104, + -0.889485, + 0.41879785, + 0.45311546, + -0.6252063, + -0.6838323, + -0.5326821, + -0.63320035, + -0.5573063, + -0.7433087, + -0.9815542, + -0.98570454, + -0.98824924, + -0.9903612, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([ + 0.5978223, + -0.84076107, + -0.7841455, + -0.886415, + -0.72486377, + -0.8015111, + -0.79157853, + -0.7739525, + 0.517207, + 0.535398, + -0.30007458, + -0.3972137, + -0.41319674, + -0.40709, + -0.32283908, + -0.5261506, + -0.9656949, + -0.9715169, + -0.97524375, + -0.9756616, + ]), + ..Default::default() + }, + ]; + + let kind_of_blue = [ + Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([ + 0.35871255, + -0.8679545, + -0.6833263, + -0.87800264, + -0.7235142, + -0.73546195, + -0.48577756, + -0.7732977, + 0.51237035, + 0.5379869, + -0.00649637, + -0.534671, + -0.5743973, + -0.5706258, + -0.43162197, + -0.6356183, + -0.97918683, + -0.98091763, + -0.9845511, + -0.98359185, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([ + 0.2806753, + -0.85013694, + -0.66921043, + -0.8938313, + -0.6848732, + -0.75377, + -0.48747814, + -0.793482, + 0.44880342, + 0.461563, + -0.115760505, + -0.535959, + -0.5749081, + -0.55055845, + -0.37976396, + -0.538705, + -0.97972554, + -0.97890633, + -0.98290455, + -0.98231846, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-sixth").to_path_buf(), + analysis: Analysis::new([ + 0.1545173, + -0.8991263, + -0.79770947, + -0.87425447, + -0.77811325, + -0.71051484, + -0.7369138, + -0.8515074, + 0.387398, + 0.42035806, + -0.30229717, + -0.624056, + -0.6458885, + -0.66208386, + -0.5866134, + -0.7613628, + -0.98656195, + -0.98821944, + -0.99072844, + -0.98729765, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-seventh").to_path_buf(), + analysis: Analysis::new([ + 0.3853314, + -0.8475499, + -0.64330614, + -0.85917395, + -0.6624141, + -0.6356613, + -0.40988427, + -0.7480691, + 0.45981812, + 0.47096932, + -0.19245929, + -0.5228787, + -0.42246288, + -0.52656835, + -0.45702273, + -0.569838, + -0.97620565, + -0.97741324, + -0.9776932, + -0.98088175, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eight").to_path_buf(), + analysis: Analysis::new([ + 0.18926656, + -0.86667925, + -0.7294189, + -0.856192, + -0.7180501, + -0.66697484, + -0.6093149, + -0.82118326, + 0.3888924, + 0.42430043, + -0.4414854, + -0.6957753, + -0.7092425, + -0.68237424, + -0.55543846, + -0.77678657, + -0.98610276, + -0.98707336, + -0.99165493, + -0.99011236, + ]), + ..Default::default() + }, + ]; + + let mozart_piano_23 = [ + Song { + path: Path::new("path-to-ninth").to_path_buf(), + analysis: Analysis::new([ + 0.38328362, + -0.8752751, + -0.8165319, + -0.948534, + -0.77668643, + -0.9051969, + -0.8473458, + -0.88643366, + 0.49641085, + 0.5132351, + -0.41367024, + -0.5279201, + -0.46787983, + -0.49218357, + -0.42164963, + -0.6597451, + -0.97317076, + -0.9800342, + -0.9832096, + -0.98385316, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-tenth").to_path_buf(), + analysis: Analysis::new([ + 0.4301988, + -0.89864063, + -0.84993315, + -0.9518692, + -0.8329567, + -0.9293889, + -0.8605237, + -0.8901016, + 0.35011983, + 0.3822446, + -0.6384951, + -0.7537949, + -0.5867439, + -0.57371, + -0.5662942, + -0.76130676, + -0.9845436, + -0.9833387, + -0.9902381, + -0.9905396, + ]), + ..Default::default() + }, + Song { + path: Path::new("path-to-eleventh").to_path_buf(), + analysis: Analysis::new([ + 0.42334664, + -0.8632808, + -0.80268145, + -0.91918564, + -0.7522441, + -0.8721291, + -0.81877685, + -0.8166921, + 0.53626525, + 0.540933, + -0.34771818, + -0.45362264, + -0.35523874, + -0.4072432, + -0.25506926, + -0.553644, + -0.9624399, + -0.9706371, + -0.9753268, + -0.9764576, + ]), + ..Default::default() + }, + ]; + + let mut songs: Vec<&Song> = mozart_piano_19 + .iter() + .chain(kind_of_blue.iter()) + .chain(mozart_piano_23.iter()) + .collect(); + + // We train the algorithm on one of the Mozart concertos, and the expectation is that the + // tracks from the Miles Davis record will end up last. + let opts = ForestOptions { + n_trees: 1000, + sample_size: 200, + max_tree_depth: None, + extension_level: 10, + }; + closest_to_songs( + &mozart_piano_19.iter().collect::>(), + &mut songs, + &opts, + ); + for e in &kind_of_blue { + assert!(songs[songs.len() - 5..].contains(&e)); + } + } } diff --git a/src/song.rs b/src/song.rs index 673c7bc..33f8363 100644 --- a/src/song.rs +++ b/src/song.rs @@ -13,9 +13,6 @@ extern crate ndarray; use crate::chroma::ChromaDesc; use crate::cue::CueInfo; use crate::misc::LoudnessDesc; -#[cfg(doc)] -use crate::playlist; -use crate::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance, DistanceMetric}; use crate::temporal::BPMDesc; use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc}; use crate::{BlissError, BlissResult, SAMPLE_RATE}; @@ -79,6 +76,12 @@ pub struct Song { pub cue_info: Option, } +impl AsRef for Song { + fn as_ref(&self) -> &Song { + self + } +} + #[derive(Debug, EnumIter, EnumCount)] /// Indexes different fields of an [Analysis](Song::analysis). /// @@ -188,96 +191,9 @@ impl Analysis { pub fn as_vec(&self) -> Vec { self.internal_analysis.to_vec() } - - /// Compute distance between two analysis using a user-provided distance - /// metric. You most likely want to use `song.custom_distance` directly - /// rather than this function. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - distance(&self.as_arr1(), &other.as_arr1()) - } } impl Song { - #[allow(dead_code)] - /// Compute the distance between the current song and any given - /// Song. - /// - /// The smaller the number, the closer the songs; usually more useful - /// if compared between several songs - /// (e.g. if song1.distance(song2) < song1.distance(song3), then song1 is - /// closer to song2 than it is to song3. - /// - /// Currently uses the euclidean distance, but this can change in an - /// upcoming release if another metric performs better. - pub fn distance(&self, other: &Self) -> f32 { - self.analysis - .custom_distance(&other.analysis, euclidean_distance) - } - - /// Compute distance between two songs using a user-provided distance - /// metric. - /// - /// For this function to be integrated properly with the rest - /// of bliss' parts, it should be a valid distance metric, i.e.: - /// 1. For X, Y real vectors, d(X, Y) = 0 ⇔ X = Y - /// 2. For X, Y real vectors, d(X, Y) >= 0 - /// 3. For X, Y real vectors, d(X, Y) = d(Y, X) - /// 4. For X, Y, Z real vectors d(X, Y) ≤ d(X + Z) + d(Z, Y) - /// - /// Note that almost all distance metrics you will find obey these - /// properties, so don't sweat it too much. - pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - self.analysis.custom_distance(&other.analysis, distance) - } - - /// Orders songs in `pool` by proximity to `self`, using the distance - /// metric `distance` to compute the order. - /// Basically return a playlist from songs in `pool`, starting - /// from `self`, using `distance` (some distance metrics can - /// be found in the [playlist] module). - /// - /// Note that contrary to [Song::closest_from_pool], `self` is NOT added - /// to the beginning of the returned vector. - /// - /// No deduplication is ran either; if you're looking for something easy - /// that works "out of the box", use [Song::closest_from_pool]. - pub fn closest_from_pool_custom( - &self, - pool: Vec, - distance: impl DistanceMetric, - ) -> Vec { - let mut pool = pool; - closest_to_first_song(self, &mut pool, distance); - pool - } - - /// Order songs in `pool` by proximity to `self`. - /// Convenience method to return a playlist from songs in `pool`, - /// starting from `self`. - /// - /// The distance is already chosen, deduplication is ran, and the first song - /// is added to the top of the playlist, to make everything easier. - /// - /// If you want more control over which distance metric is chosen, - /// run deduplication manually, etc, use [Song::closest_from_pool_custom]. - pub fn closest_from_pool(&self, pool: Vec) -> Vec { - let mut playlist = vec![self.to_owned()]; - playlist.extend_from_slice(&pool); - closest_to_first_song(self, &mut playlist, euclidean_distance); - dedup_playlist(&mut playlist, None); - playlist - } - /// Returns a decoded [Song] given a file path, or an error if the song /// could not be analyzed for some reason. /// @@ -897,34 +813,6 @@ mod tests { assert!(sample_array.len() as f32 / (sample_array.capacity() as f32) < 1.); } - #[test] - fn test_analysis_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.distance(&b), 1.9469079) - } - - #[test] - fn test_analysis_distance_indiscernible() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., - 20., - ]); - assert_eq!(a.distance(&a), 0.) - } - #[test] fn test_decode_errors() { assert_eq!( @@ -973,106 +861,6 @@ mod tests { format!("{:?}", song.analysis), ); } - - fn dummy_distance(_: &Array1, _: &Array1) -> f32 { - 0. - } - - #[test] - fn test_custom_distance() { - let mut a = Song::default(); - a.analysis = Analysis::new([ - 0.16391512, 0.11326739, 0.96868552, 0.8353934, 0.49867523, 0.76532606, 0.63448005, - 0.82506196, 0.71457147, 0.62395476, 0.69680329, 0.9855766, 0.41369333, 0.13900452, - 0.68001012, 0.11029723, 0.97192943, 0.57727861, 0.07994821, 0.88993185, - ]); - - let mut b = Song::default(); - b.analysis = Analysis::new([ - 0.5075758, 0.36440256, 0.28888011, 0.43032829, 0.62387977, 0.61894916, 0.99676086, - 0.11913155, 0.00640396, 0.15943407, 0.33829514, 0.34947174, 0.82927523, 0.18987604, - 0.54437275, 0.22076826, 0.91232151, 0.29233168, 0.32846024, 0.04522147, - ]); - assert_eq!(a.custom_distance(&b, dummy_distance), 0.); - } - - #[test] - fn test_closest_from_pool() { - let song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - let first_song_dupe = Song { - path: Path::new("path-to-dupe").to_path_buf(), - analysis: Analysis::new([ - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - ]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., - ]), - ..Default::default() - }; - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., - ]), - ..Default::default() - }; - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - let fifth_song = Song { - path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([ - 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., - ]), - ..Default::default() - }; - - let songs = vec![ - song.to_owned(), - first_song_dupe.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - fourth_song.to_owned(), - fifth_song.to_owned(), - ]; - let playlist = song.closest_from_pool(songs.to_owned()); - assert_eq!( - playlist, - vec![ - song.to_owned(), - second_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - ], - ); - let playlist = song.closest_from_pool_custom(songs, euclidean_distance); - assert_eq!( - playlist, - vec![ - song, - first_song_dupe, - second_song, - fourth_song, - fifth_song, - third_song - ], - ); - } } #[cfg(all(feature = "bench", test))]