From cd65ff2e00b5b094da774feb306837b8ff710af2 Mon Sep 17 00:00:00 2001 From: Manuel Schmid <9307310+mashb1t@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:14:46 +0000 Subject: [PATCH] fix: add handling for empty response for audio features API endpoint audio features endpoint is now deprecated and used by AI compalies to crawl data from spotify => always returns fixed 401.This commit adds a fallback to empty data if this situation is encountered while allowing the feature to be used when Spotify allows data fetching again. --- php/src/app/Crawler/SpotifyCrawler.php | 38 ++++++++++++++++---------- php/src/app/Factory.php | 20 +++++++------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/php/src/app/Crawler/SpotifyCrawler.php b/php/src/app/Crawler/SpotifyCrawler.php index 0301ac0..e191281 100644 --- a/php/src/app/Crawler/SpotifyCrawler.php +++ b/php/src/app/Crawler/SpotifyCrawler.php @@ -81,9 +81,9 @@ public function crawlAll(string $username): void $session = $spotifySession->getUnderlyingObject(); $spotifyWebApi = $this->factory->getSpotifyWebAPI($session); - logs('crawler')->debug("starting spotify crawler for username $username"); + logs('crawler')->info("starting spotify crawler for username $username"); $this->crawlTrackHistoryAndAudioFeatures($username, $spotifyWebApi); - logs('crawler')->debug("finished spotify crawler for username $username"); + logs('crawler')->info("finished spotify crawler for username $username"); $this->sessionHandler->saveSession($spotifySession, $username); } @@ -94,10 +94,10 @@ public function crawlAll(string $username): void protected function crawlTrackHistoryAndAudioFeatures(string $username, SpotifyWebAPI $spotifyWebApi): void { // TODO add "after" instead of limit if last crawl was last hour - $recentTracks = $spotifyWebApi->getMyRecentTracks([ + $recentTracks = $spotifyWebApi->getMyRecentTracks([ 'limit' => config('services.spotify.crawl_bulk_limit'), ])->items; - logs('crawler')->debug("retrieved recent logs for user $username"); + logs('crawler')->info("retrieved recent logs for user $username"); $recentTracksIds = []; foreach ($recentTracks as $recentTrack) { @@ -112,12 +112,13 @@ protected function crawlTrackHistoryAndAudioFeatures(string $username, SpotifyWe $artistIds[$artist->id] = $artist->id; } } - + logs('crawler')->debug("getting artists for user $username"); $artistsById = $this->getArtistsById($spotifyWebApi, $artistIds); + logs('crawler')->debug("getting audio features for user $username"); $audioFeatures = $this->getAudioFeatures($spotifyWebApi, $recentTracksIds); - + logs('crawler')->debug("writing history for user $username"); foreach ($recentTracks as $recentTrack) { - $this->writeTrackHistoryPoint($audioFeatures[$recentTrack->track->id], $recentTrack, $username); + $this->writeTrackHistoryPoint($audioFeatures[$recentTrack->track->id] ?? null, $recentTrack, $username); $this->writeGenreHistoryPoints($recentTrack, $artistsById, $username); } @@ -132,13 +133,17 @@ protected function crawlTrackHistoryAndAudioFeatures(string $username, SpotifyWe */ protected function getArtistsById(SpotifyWebAPI $spotifyWebApi, array $artistIds): array { + logs('crawler')->debug("getting artists for user $username"); $artistsFromAPI = $this->getCachedArtistsAndCleanupIds($artistIds); + $cache = Cache::tags([ ServiceEnum::Spotify->value, ServiceEnum::Spotify->value . CacheKeyEnum::CACHE_KEY_SEPARATOR . CacheKeyEnum::Artist->value ]); - if (count($artistIds) > 0) { + $newArtistIdCount = count($artistIds); + if (newArtistIdCount > 0) { + logs('crawler')->debug("retrieving uncached data for $newArtistIdCount artists"); // artistIds count could be more than crawl_bulk_limit $artistIdsChunks = array_chunk($artistIds, config('services.spotify.crawl_bulk_limit')); @@ -211,13 +216,16 @@ protected function getAudioFeatures(SpotifyWebAPI $spotifyWebApi, array $trackId logs('crawler')->debug("found audio feature for $trackId in cache"); } } - if (count($trackIds) > 0) { - $response = $spotifyWebApi->getMultipleAudioFeatures(array_values($trackIds)); - foreach ($response->audio_features as $audioFeature) { - $cache->put($audioFeature->id, $audioFeature, config('services.spotify.cache_ttl')); - $audioFeatures[$audioFeature->id] = $audioFeature; - logs('crawler')->debug("set audio feature for $audioFeature->id to cache"); + try { + $response = $spotifyWebApi->getMultipleAudioFeatures(array_values($trackIds)); + foreach ($response->audio_features as $audioFeature) { + $cache->put($audioFeature->id, $audioFeature, config('services.spotify.cache_ttl')); + $audioFeatures[$audioFeature->id] = $audioFeature; + logs('crawler')->debug("set audio feature for $audioFeature->id to cache"); + } + } catch (Exception $e) { + logs('crawler')->warning('error while fetching audio feature, skipping. Exception: ' . $e->getMessage()); } } @@ -227,7 +235,7 @@ protected function getAudioFeatures(SpotifyWebAPI $spotifyWebApi, array $trackId /** * @throws Exception */ - protected function writeTrackHistoryPoint(stdClass $audioFeature, stdClass $track, string $username): void + protected function writeTrackHistoryPoint(?stdClass $audioFeature, stdClass $track, string $username): void { $point = $this->factory->getTrackHistoryPoint( $username, ServiceEnum::Spotify->value, $audioFeature, $track diff --git a/php/src/app/Factory.php b/php/src/app/Factory.php index 4a5d6e8..0cb4967 100644 --- a/php/src/app/Factory.php +++ b/php/src/app/Factory.php @@ -60,7 +60,7 @@ public function getSpotifyWebAPI(Session $session): SpotifyWebAPI public function getTrackHistoryPoint( string $username, string $service, - stdClass $audioFeature, + ?stdClass $audioFeature, stdClass $track ): Point { $artists = []; @@ -81,15 +81,15 @@ public function getTrackHistoryPoint( ->addField('track', $track->track->name) ->addField('track_id', $track->track->id) ->addField('duration_ms', (int)$track->track->duration_ms) - ->addField('danceability', (float)$audioFeature->danceability) - ->addField('energy', (float)$audioFeature->energy) - ->addField('key', (int)$audioFeature->key) - ->addField('speechiness', (float)$audioFeature->speechiness) - ->addField('acousticness', (float)$audioFeature->acousticness) - ->addField('instrumentalness', (float)$audioFeature->instrumentalness) - ->addField('liveness', (float)$audioFeature->liveness) - ->addField('valence', (float)$audioFeature->valence) - ->addField('tempo', round((float)$audioFeature->tempo)) + #->addField('danceability', (float)$audioFeature->danceability) + #->addField('energy', (float)$audioFeature->energy) + #->addField('key', (int)$audioFeature->key) + #->addField('speechiness', (float)$audioFeature->speechiness) + #->addField('acousticness', (float)$audioFeature->acousticness) + #->addField('instrumentalness', (float)$audioFeature->instrumentalness) + #->addField('liveness', (float)$audioFeature->liveness) + #->addField('valence', (float)$audioFeature->valence) + #->addField('tempo', round((float)$audioFeature->tempo)) ->time($playedAtDateTime); }