Merge remote-tracking branch 'xavierbeynon/master'

This commit is contained in:
Philipp Hagemeister 2015-01-10 02:03:46 +01:00
commit 51897bb77c
2 changed files with 81 additions and 25 deletions

View File

@ -26,7 +26,7 @@ from .arte import (
ArteTVEmbedIE, ArteTVEmbedIE,
) )
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .audiomack import AudiomackIE from .audiomack import AudiomackIE, AudiomackAlbumIE
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .azubu import AzubuIE from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE

View File

@ -17,12 +17,13 @@ class AudiomackIE(InfoExtractor):
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict': 'info_dict':
{ {
'id': 'roosh-williams/extraordinary', 'id': '310086',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Roosh Williams - Extraordinary' 'artist': 'Roosh Williams',
'title': 'Extraordinary'
} }
}, },
# hosted on soundcloud via audiomack # audiomack wrapper around soundcloud song
{ {
'add_ie': ['Soundcloud'], 'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
@ -37,33 +38,88 @@ class AudiomackIE(InfoExtractor):
}, },
] ]
@staticmethod
def create_song_dictionary(api_response, album_url_tag, track_no=0):
# All keys are the same in audiomack api and InfoExtractor format
entry = {key: api_response[key] for key in ['title', 'artist', 'id', 'url'] if key in api_response}
# Fudge values in the face of missing metadata
if 'id' not in entry:
entry['id'] = track_no
if 'title' not in entry:
entry['title'] = album_url_tag
return entry
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) # URLs end with [uploader name]/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
album_url_tag = self._match_id(url)
# Request the extended version of the api for extra fields like artist and title
api_response = self._download_json( api_response = self._download_json(
"http://www.audiomack.com/api/music/url/song/%s?_=%d" % ( 'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
video_id, time.time()), album_url_tag, time.time()),
video_id) album_url_tag)
if "url" not in api_response: # API is inconsistent with errors
raise ExtractorError("Unable to deduce api url of song") if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
realurl = api_response["url"] raise ExtractorError('Invalid url %s', url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# - if so, pass the work off to the soundcloud extractor # if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(realurl): if SoundcloudIE.suitable(api_response['url']):
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'} return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
webpage = self._download_webpage(url, video_id) return self.create_song_dictionary(api_response, album_url_tag)
artist = self._html_search_regex(
r'<span class="artist">(.*?)</span>', webpage, "artist")
songtitle = self._html_search_regex(
r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
webpage, "title")
title = artist + " - " + songtitle
return {
'id': video_id, class AudiomackAlbumIE(InfoExtractor):
'title': title, _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
'url': realurl, IE_NAME = 'audiomack:album'
_TESTS = [
# Standard album playlist
{
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
'playlist_count': 15,
'info_dict':
{
'id': '812251',
'title': 'Tha Tour: Part 2 (Official Mixtape)'
} }
},
# Album playlist ripped from fakeshoredrive with no metadata
{
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
'playlist_count': 10
}
]
def _real_extract(self, url):
# URLs end with [uploader name]/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
album_url_tag = self._match_id(url)
result = {'_type': 'playlist', 'entries': []}
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
# Therefore we don't know how many songs the album has and must infi-loop until failure
track_no = 0
while True:
# Get song's metadata
api_response = self._download_json('http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
% (album_url_tag, track_no, time.time()), album_url_tag)
# Total failure, only occurs when url is totally wrong
# Won't happen in middle of valid playlist (next case)
if 'url' not in api_response or 'error' in api_response:
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
# URL is good but song id doesn't exist - usually means end of playlist
elif not api_response['url']:
break
else:
# Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
if apikey in api_response and resultkey not in result:
result[resultkey] = api_response[apikey]
result['entries'].append(AudiomackIE.create_song_dictionary(api_response, album_url_tag, track_no))
track_no += 1
return result