From 67b2bf24781f46af981c54a833a3addbfbfe79c3 Mon Sep 17 00:00:00 2001 From: Chris Cochrun Date: Tue, 24 Feb 2026 15:18:51 -0600 Subject: [PATCH] adding genius lyrics searcher --- Cargo.lock | 13 +++- Cargo.toml | 1 + src/core/song_search.rs | 166 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 171 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 976a902..514c9e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4488,6 +4488,7 @@ dependencies = [ "ron 0.8.1", "scraper", "serde", + "serde_json", "sqlx", "strum", "strum_macros", @@ -6988,16 +6989,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "indexmap 2.12.1", "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -10204,6 +10205,12 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zstd" version = "0.13.3" diff --git a/Cargo.toml b/Cargo.toml index 4606ba5..09ada1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ derive_more = { version = "2.1.1", features = ["debug"] } reqwest = "0.13.1" scraper = "0.25.0" itertools = "0.14.0" +serde_json = "1.0.149" # rfd = { version = "0.15.4", default-features = false, features = ["xdg-portal"] } diff --git a/src/core/song_search.rs b/src/core/song_search.rs index 47bb292..e43b441 100644 --- a/src/core/song_search.rs +++ b/src/core/song_search.rs @@ -1,7 +1,20 @@ use itertools::Itertools; use miette::{IntoDiagnostic, Result, miette}; +use reqwest::header; +use serde::{Deserialize, Serialize}; +use serde_json::Value; -#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq)] +#[derive( + Clone, + Debug, + Default, + PartialEq, + PartialOrd, + Ord, + Eq, + Serialize, + Deserialize, +)] pub struct OnlineSong { pub lyrics: String, pub title: String, @@ -10,7 +23,114 @@ pub struct OnlineSong { pub link: String, } -pub async fn search_online_song_links( +pub async fn search_genius_links( + query: impl AsRef + std::fmt::Display, +) -> Result> { + let auth_token = env!("GENIUS_TOKEN"); + let mut headers = header::HeaderMap::new(); + headers.insert( + header::AUTHORIZATION, + header::HeaderValue::from_static(auth_token), + ); + let client = reqwest::Client::builder() + .default_headers(headers) + .build() + .into_diagnostic()?; + let response = client + .get(format!("https://api.genius.com/search?q={query}")) + .send() + .await + .into_diagnostic()? + .error_for_status() + .into_diagnostic()? + .text() + .await + .into_diagnostic()?; + let json: Value = + serde_json::from_str(&response).into_diagnostic()?; + let hits = json + .get("response") + .expect("respose") + .get("hits") + .expect("hits") + .as_array() + .expect("array"); + Ok(hits + .iter() + .map(|hit| { + let result = hit.get("result").expect("result"); + let title = result + .get("full_title") + .expect("title") + .as_str() + .expect("title") + .to_string(); + let title = title.replace("\u{a0}", " "); + let author = result + .get("artist_names") + .expect("artists") + .as_str() + .expect("artists") + .to_string(); + let link = result + .get("url") + .expect("url") + .as_str() + .expect("url") + .to_string(); + OnlineSong { + lyrics: String::new(), + title, + author, + site: String::from("https://genius.com"), + link, + } + }) + .collect()) +} + +pub async fn get_genius_lyrics( + mut song: OnlineSong, +) -> Result { + let html = reqwest::get(&song.link) + .await + .into_diagnostic()? + .error_for_status() + .into_diagnostic()? + .text() + .await + .into_diagnostic()?; + let document = scraper::Html::parse_document(&html); + let Ok(lyrics_root_selector) = scraper::Selector::parse( + r#"div[data-lyrics-container="true"]"#, + ) else { + return Err(miette!("error in finding lyrics_root")); + }; + + let lyrics = document + .select(&lyrics_root_selector) + .map(|root| { + // dbg!(&root); + root.inner_html() + }) + .collect::(); + let lyrics = lyrics.find("[").map_or_else( + || { + lyrics.find("").map_or( + lyrics.clone(), + |position| { + lyrics.split_at(position + 18).1.to_string() + }, + ) + }, + |position| lyrics.split_at(position).1.to_string(), + ); + let lyrics = lyrics.replace("
", "\n"); + song.lyrics = lyrics; + Ok(song) +} + +pub async fn search_lyrics_com_links( query: impl AsRef + std::fmt::Display, ) -> Result> { let html = @@ -53,7 +173,7 @@ pub async fn search_online_song_links( // id value or not in the future and I'd like to keep the code understanding // of what this variable might be. #[allow(clippy::no_effect_underscore_binding)] -pub async fn link_to_online_song( +pub async fn lyrics_com_link_to_song( links: Vec + std::fmt::Display>, ) -> Result> { let mut songs = vec![]; @@ -112,6 +232,40 @@ mod test { use super::*; use pretty_assertions::assert_eq; + #[tokio::test] + async fn test_genius() -> Result<(), String> { + let song = OnlineSong { + lyrics: String::new(), + title: "Death Was Arrested by North Point Worship (Ft. Seth Condrey)".to_string(), + author: "North Point Worship (Ft. Seth Condrey)".to_string(), + site: "https://genius.com".to_string(), + link: "https://genius.com/North-point-worship-death-was-arrested-lyrics".to_string(), + }; + let hits = search_genius_links("Death was arrested") + .await + .map_err(|e| e.to_string())?; + + let titles: Vec = + hits.iter().map(|song| song.title.clone()).collect(); + dbg!(titles); + for hit in hits { + let new_song = get_genius_lyrics(hit) + .await + .map_err(|e| e.to_string())?; + dbg!(&new_song); + if !new_song.lyrics.starts_with("[Verse 1]") { + assert!(new_song.lyrics.len() > 10); + } else { + assert!(new_song.lyrics.contains("[Verse 2]")); + if !new_song.lyrics.contains("[Chorus]") { + assert!(new_song.lyrics.contains("[Chorus 1]")) + } + } + } + + Ok(()) + } + #[tokio::test] async fn test_search_to_song() -> Result<(), String> { let song = OnlineSong { @@ -121,10 +275,10 @@ mod test { site: "https://www.lyrics.com".to_string(), link: "https://www.lyrics.com/lyric/35090938/North+Point+InsideOut/Death+Was+Arrested".to_string(), }; - let links = search_online_song_links("Death was arrested") + let links = search_lyrics_com_links("Death was arrested") .await .map_err(|e| format!("{e}"))?; - let songs = link_to_online_song(links) + let songs = lyrics_com_link_to_song(links) .await .map_err(|e| format!("{e}"))?; if let Some(first) = songs.iter().find_or_first(|song| { @@ -156,7 +310,7 @@ mod test { #[tokio::test] async fn test_online_search() { let search = - search_online_song_links("Death was arrested").await; + search_lyrics_com_links("Death was arrested").await; match search { Ok(songs) => { assert_eq!(