From c4a91be726bd2892931a061ef6703b9bfce2a2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 00:02:15 +0200 Subject: [PATCH 01/33] Save subtitles using the same code for all the options --- youtube_dl/YoutubeDL.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c76f1118e..4a8cafdb4 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -483,25 +483,13 @@ class YoutubeDL(object): self.report_error(u'Cannot write description file ' + descfn) return - if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: + subtitles_are_requested = any([self.params.get('writesubtitles', False), + self.params.get('writeautomaticsub'), + self.params.get('allsubtitles', False)]) + + if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE - subtitle = info_dict['subtitles'][0] - (sub_error, sub_lang, sub) = subtitle - sub_format = self.params.get('subtitlesformat') - if sub_error: - self.report_warning("Some error while getting the subtitles") - else: - try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format - self.report_writesubtitles(sub_filename) - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) - except (OSError, IOError): - self.report_error(u'Cannot write subtitles file ' + descfn) - return - - if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles = info_dict['subtitles'] sub_format = self.params.get('subtitlesformat') for subtitle in subtitles: From 5d51a883c2049e0186074ded9405b01f79470d57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 11:03:44 +0200 Subject: [PATCH 02/33] Use a dictionary for storing the subtitles The errors while getting the subtitles are reported as warnings, if no subtitles are found return and empty dict. --- test/test_youtube_subtitles.py | 26 ++++++------- youtube_dl/YoutubeDL.py | 23 +++++------ youtube_dl/extractor/common.py | 3 +- youtube_dl/extractor/youtube.py | 67 +++++++++++++++++---------------- 4 files changed, 59 insertions(+), 60 deletions(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 86e09c9b1..fe0eac680 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase): DL.params['writesubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - sub = info_dict[0]['subtitles'][0] - self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') + sub = info_dict[0]['subtitles']['en'] + self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') def test_youtube_subtitles_it(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['subtitleslang'] = 'it' IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - sub = info_dict[0]['subtitles'][0] - self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') + sub = info_dict[0]['subtitles']['it'] + self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d') def test_youtube_onlysubtitles(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['onlysubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - sub = info_dict[0]['subtitles'][0] - self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') + sub = info_dict[0]['subtitles']['en'] + self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') def test_youtube_allsubtitles(self): DL = FakeYDL() DL.params['allsubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') subtitles = info_dict[0]['subtitles'] - self.assertEqual(len(subtitles), 13) + self.assertEqual(len(subtitles.keys()), 13) def test_youtube_subtitles_sbv_format(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['subtitlesformat'] = 'sbv' IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - sub = info_dict[0]['subtitles'][0] - self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') + sub = info_dict[0]['subtitles']['en'] + self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b') def test_youtube_subtitles_vtt_format(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['subtitlesformat'] = 'vtt' IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - sub = info_dict[0]['subtitles'][0] - self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') + sub = info_dict[0]['subtitles']['en'] + self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7') def test_youtube_list_subtitles(self): DL = FakeYDL() DL.params['listsubtitles'] = True @@ -88,8 +88,8 @@ class TestYoutubeSubtitles(unittest.TestCase): DL.params['subtitleslang'] = 'it' IE = YoutubeIE(DL) info_dict = IE.extract('8YoUxe5ncPo') - sub = info_dict[0]['subtitles'][0] - self.assertTrue(sub[2] is not None) + sub = info_dict[0]['subtitles']['it'] + self.assertTrue(sub is not None) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4a8cafdb4..be6ceafcc 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -492,19 +492,16 @@ class YoutubeDL(object): # that way it will silently go on when used with unsupporting IE subtitles = info_dict['subtitles'] sub_format = self.params.get('subtitlesformat') - for subtitle in subtitles: - (sub_error, sub_lang, sub) = subtitle - if sub_error: - self.report_warning("Some error while getting the subtitles") - else: - try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format - self.report_writesubtitles(sub_filename) - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) - except (OSError, IOError): - self.report_error(u'Cannot write subtitles file ' + descfn) - return + for sub_lang in subtitles.keys(): + sub = subtitles[sub_lang] + try: + sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + self.report_writesubtitles(sub_filename) + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: + subfile.write(sub) + except (OSError, IOError): + self.report_error(u'Cannot write subtitles file ' + descfn) + return if self.params.get('writeinfojson', False): infofn = filename + u'.info.json' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da50abfc1..e2e192bef 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -47,7 +47,8 @@ class InfoExtractor(object): uploader_id: Nickname or id of the video uploader. location: Physical location of the video. player_url: SWF Player URL (used for rtmpdump). - subtitles: The subtitle file contents. + subtitles: The subtitle file contents as a dictionary in the format + {language: subtitles}. view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 12e8fc25d..78500b0f7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -209,11 +209,13 @@ class YoutubeIE(InfoExtractor): try: sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None) + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) if not sub_lang_list: - return (u'video doesn\'t have subtitles', None) + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} return sub_lang_list def _list_available_subtitles(self, video_id): @@ -222,8 +224,7 @@ class YoutubeIE(InfoExtractor): def _request_subtitle(self, sub_lang, sub_name, video_id, format): """ - Return tuple: - (error_message, sub_lang, sub) + Return the subtitle as a string or None if they are not found """ self.report_video_subtitles_request(video_id, sub_lang, format) params = compat_urllib_parse.urlencode({ @@ -236,10 +237,12 @@ class YoutubeIE(InfoExtractor): try: sub = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None, None) + self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) + return if not sub: - return (u'Did not fetch video subtitles', None, None) - return (None, sub_lang, sub) + self._downloader.report_warning(u'Did not fetch video subtitles') + return + return sub def _request_automatic_caption(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an @@ -250,7 +253,8 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r';ytplayer.config = ({.*?});', webpage) err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang if mobj is None: - return [(err_msg, None, None)] + self._downloader.report_warning(err_msg) + return {} player_config = json.loads(mobj.group(1)) try: args = player_config[u'args'] @@ -265,19 +269,20 @@ class YoutubeIE(InfoExtractor): }) subtitles_url = caption_url + '&' + params sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') - return [(None, sub_lang, sub)] + return {sub_lang: sub} except KeyError: - return [(err_msg, None, None)] + self._downloader.report_warning(err_msg) + return {} def _extract_subtitle(self, video_id): """ - Return a list with a tuple: - [(error_message, sub_lang, sub)] + Return a dictionary: {language: subtitles} or {} if the subtitles + couldn't be found """ sub_lang_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] + if not sub_lang_list: #There was some error, it didn't get the available subtitles + return {} if self._downloader.params.get('subtitleslang', False): sub_lang = self._downloader.params.get('subtitleslang') elif 'en' in sub_lang_list: @@ -285,20 +290,28 @@ class YoutubeIE(InfoExtractor): else: sub_lang = list(sub_lang_list.keys())[0] if not sub_lang in sub_lang_list: - return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] + self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) + return {} subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - return [subtitle] + if subtitle: + self.to_screen('sub %s' % subtitle[:20]) + return {sub_lang: subtitle} + else: + return {} def _extract_all_subtitles(self, video_id): + """ + Return a dicitonary: {language: subtitles} or {} if the subtitles + couldn't be found + """ sub_lang_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] - subtitles = [] + subtitles = {} for sub_lang in sub_lang_list: subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - subtitles.append(subtitle) + if subtitle: + subtitles[sub_lang] = subtitle return subtitles def _print_formats(self, formats): @@ -523,23 +536,11 @@ class YoutubeIE(InfoExtractor): if self._downloader.params.get('writesubtitles', False): video_subtitles = self._extract_subtitle(video_id) - if video_subtitles: - (sub_error, sub_lang, sub) = video_subtitles[0] - if sub_error: - self._downloader.report_warning(sub_error) - - if self._downloader.params.get('writeautomaticsub', False): + elif self._downloader.params.get('writeautomaticsub', False): video_subtitles = self._request_automatic_caption(video_id, video_webpage) - (sub_error, sub_lang, sub) = video_subtitles[0] - if sub_error: - self._downloader.report_warning(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) - for video_subtitle in video_subtitles: - (sub_error, sub_lang, sub) = video_subtitle - if sub_error: - self._downloader.report_warning(sub_error) if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(video_id) From 88ae5991cd777f05b437dbe7b4399f1ff25d6b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 11:39:34 +0200 Subject: [PATCH 03/33] YoutubeIE: use the same function for getting the subtitles for the "--write-sub" and "--all-sub" options --- youtube_dl/extractor/youtube.py | 46 +++++++++++---------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78500b0f7..30036524f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -273,8 +273,8 @@ class YoutubeIE(InfoExtractor): except KeyError: self._downloader.report_warning(err_msg) return {} - - def _extract_subtitle(self, video_id): + + def _extract_subtitles(self, video_id): """ Return a dictionary: {language: subtitles} or {} if the subtitles couldn't be found @@ -283,30 +283,17 @@ class YoutubeIE(InfoExtractor): sub_format = self._downloader.params.get('subtitlesformat') if not sub_lang_list: #There was some error, it didn't get the available subtitles return {} - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' - else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) - return {} - - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - if subtitle: - self.to_screen('sub %s' % subtitle[:20]) - return {sub_lang: subtitle} - else: - return {} - - def _extract_all_subtitles(self, video_id): - """ - Return a dicitonary: {language: subtitles} or {} if the subtitles - couldn't be found - """ - sub_lang_list = self._get_available_subtitles(video_id) - sub_format = self._downloader.params.get('subtitlesformat') + if self._downloader.params.get('writesubtitles', False): + if self._downloader.params.get('subtitleslang', False): + sub_lang = self._downloader.params.get('subtitleslang') + elif 'en' in sub_lang_list: + sub_lang = 'en' + else: + sub_lang = list(sub_lang_list.keys())[0] + if not sub_lang in sub_lang_list: + self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) + return {} + sub_lang_list = {sub_lang: sub_lang_list[sub_lang]} subtitles = {} for sub_lang in sub_lang_list: subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) @@ -534,14 +521,11 @@ class YoutubeIE(InfoExtractor): # subtitles video_subtitles = None - if self._downloader.params.get('writesubtitles', False): - video_subtitles = self._extract_subtitle(video_id) + if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_subtitles(video_id) elif self._downloader.params.get('writeautomaticsub', False): video_subtitles = self._request_automatic_caption(video_id, video_webpage) - if self._downloader.params.get('allsubtitles', False): - video_subtitles = self._extract_all_subtitles(video_id) - if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(video_id) return From 2f799533ae680dc788c8b4f6ce41272cf89689cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Jun 2013 22:11:18 +0200 Subject: [PATCH 04/33] YoutubeIE: don't crash when trying to get automatic captions if the videos has standard subtitles. --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 30036524f..2b03226f6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -270,7 +270,9 @@ class YoutubeIE(InfoExtractor): subtitles_url = caption_url + '&' + params sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') return {sub_lang: sub} - except KeyError: + # An extractor error can be raise by the download process if there are + # no automatic captions but there are subtitles + except (KeyError, ExtractorError): self._downloader.report_warning(err_msg) return {} From 6804038d065e0eeffd9fca2dc55b3262a9191c10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 20 Jul 2013 12:59:47 +0200 Subject: [PATCH 05/33] Don't try to write the subtitles if it's None --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index be6ceafcc..e69d844b8 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -494,6 +494,8 @@ class YoutubeDL(object): sub_format = self.params.get('subtitlesformat') for sub_lang in subtitles.keys(): sub = subtitles[sub_lang] + if sub is None: + continue try: sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format self.report_writesubtitles(sub_filename) From 2b9213cdc1fd02da75a905e8838e09738b2bc6b8 Mon Sep 17 00:00:00 2001 From: Emilien Kenler Date: Mon, 12 Aug 2013 10:48:40 +0200 Subject: [PATCH 06/33] Update generator Signed-off-by: Emilien Kenler --- devscripts/youtube_genalgo.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 31d6ec952..edad4c7ba 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -11,21 +11,24 @@ tests = [ # 90 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), + # 89 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'", + "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"), # 88 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), - # 87 - vflART1Nf 2013/07/24 + # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", - "tyuioplkjhgfdsazxcv"), - # 86 - vflm_D8eE 2013/07/31 + "/?;:|}][{=+-_)(*&^$#@!MNBVCXZArDFGHJKLPOIUY.<", - ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK.<"), # 85 - vflSAFCP9 2013/07/19 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), + "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), # 83 - vflTWC9KW 2013/08/01 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), From bda2c49d75e9cd34e8ece2fa3e5375365a84f290 Mon Sep 17 00:00:00 2001 From: Emilien Kenler Date: Sun, 18 Aug 2013 11:10:39 +0200 Subject: [PATCH 07/33] Update algo - see #1254 Signed-off-by: Emilien Kenler --- devscripts/youtube_genalgo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index edad4c7ba..dca963e8f 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -19,19 +19,19 @@ tests = [ "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", - "/?;:|}][{=+-_)(*&^$#@!MNBVCXZArDFGHJKLPOIUY.<"), # 86 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), - # 85 - vflSAFCP9 2013/07/19 + # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", - "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), + ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), - # 83 - vflTWC9KW 2013/08/01 + # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), + ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), From 836a086ce9d48338444f010f690119a9a3998517 Mon Sep 17 00:00:00 2001 From: Allan Zhou Date: Mon, 19 Aug 2013 18:22:25 -0700 Subject: [PATCH 08/33] Add YouTube DASH formats to YouTubeIE --- youtube_dl/extractor/youtube.py | 96 ++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 843a973ca..248105d7f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -155,11 +155,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Listed in order of quality _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13', '95', '94', '93', '92', '132', '151', - '85', '84', '102', '83', '101', '82', '100', + '85', '84', '102', '83', '101', '82', '100', # 3D + '138', '137', '136', '135', '134', '133', '160', # Dash video mp4 + '141', '140', '139', # Dash auido mp4 + '248', '247', '246', '245', '244', '243', '242', # Dash video webm + '172', '171', # Dash audio webm ] _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13', '95', '94', '93', '92', '132', '151', '85', '102', '84', '101', '83', '100', '82', + '248', '247', '246', '245', '244', '243', '242', # Dash video webm + '172', '171', # Dash audio webm + '138', '137', '136', '135', '134', '133', '160', # Dash video mp4 + '141', '140', '139', # Dash auido mp4 ] _video_extensions = { '13': '3gp', @@ -181,7 +189,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '100': 'webm', '101': 'webm', '102': 'webm', - + # videos that use m3u8 '92': 'mp4', '93': 'mp4', @@ -190,6 +198,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '96': 'mp4', '132': 'mp4', '151': 'mp4', + + # Dash mp4 + '133': 'mp4', + '134': 'mp4', + '135': 'mp4', + '136': 'mp4', + '137': 'mp4', + '138': 'mp4', + '139': 'mp4', + '140': 'mp4', + '141': 'mp4', + '160': 'mp4', + + # Dash webm + '171': 'webm', + '172': 'webm', + '242': 'webm', + '243': 'webm', + '244': 'webm', + '245': 'webm', + '246': 'webm', + '247': 'webm', + '248': 'webm', } _video_dimensions = { '5': '240x400', @@ -217,11 +248,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '96': '1080p', '100': '360p', '101': '480p', - '102': '720p', + '102': '720p', '132': '240p', '151': '72p', + '133': '240p', + '134': '360p', + '135': '480p', + '136': '720p', + '137': '1080p', + '138': '>1080p', + '139': '48k', + '140': '128k', + '141': '256k', + '160': '192p', + '171': '128k', + '172': '256k', + '242': '240p', + '243': '360p', + '244': '480p', + '245': '480p', + '246': '480p', + '247': '720p', + '248': '1080p', } - _3d_itags = ['85', '84', '102', '83', '101', '82', '100'] + _special_itags = { + '82': '3D', + '83': '3D', + '84': '3D', + '85': '3D', + '100': '3D', + '101': '3D', + '102': '3D', + '133': 'DASH Video', + '134': 'DASH Video', + '135': 'DASH Video', + '136': 'DASH Video', + '137': 'DASH Video', + '138': 'DASH Video', + '139': 'DASH Audio', + '140': 'DASH Audio', + '141': 'DASH Audio', + '160': 'DASH Video', + '171': 'DASH Audio', + '172': 'DASH Audio', + '242': 'DASH Video', + '243': 'DASH Video', + '244': 'DASH Video', + '245': 'DASH Video', + '246': 'DASH Video', + '247': 'DASH Video', + '248': 'DASH Video', + } + IE_NAME = u'youtube' _TESTS = [ { @@ -472,7 +550,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for x in formats: print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'), - ' (3D)' if x in self._3d_itags else '')) + ' ('+self._special_itags[x]+')' if x in self._special_itags else '')) def _extract_id(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -699,6 +777,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] + m_s = re.search(r'[&,]s=', args['adaptive_fmts']) + if m_s is not None: + video_info['url_encoded_fmt_stream_map'][0] += ','+args['adaptive_fmts'] + else: + video_info['url_encoded_fmt_stream_map'][0] += ','+video_info['adaptive_fmts'][0] + except ValueError: pass @@ -758,7 +842,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, self._video_dimensions.get(format_param, '???'), - ' (3D)' if format_param in self._3d_itags else '') + ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') results.append({ 'id': video_id, From 211fbc1328edda1752fce9dc5ed604b98f9dc865 Mon Sep 17 00:00:00 2001 From: Allan Zhou Date: Mon, 19 Aug 2013 18:57:55 -0700 Subject: [PATCH 09/33] fix failed tests --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 248105d7f..bdd399d3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -777,10 +777,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re.search(r'[&,]s=', args['adaptive_fmts']) - if m_s is not None: + m_s = re.search(r'[&,]s=', args['adaptive_fmts'] if 'adaptive_fmts' in args else '') + if m_s is not None and 'adaptive_fmts' in args: video_info['url_encoded_fmt_stream_map'][0] += ','+args['adaptive_fmts'] - else: + elif 'adaptive_fmts' in video_info: video_info['url_encoded_fmt_stream_map'][0] += ','+video_info['adaptive_fmts'][0] except ValueError: From cde846b3d3f59029fc07ecd97e49cfae050af3c9 Mon Sep 17 00:00:00 2001 From: Allan Zhou Date: Tue, 20 Aug 2013 21:42:49 -0700 Subject: [PATCH 10/33] fix code style --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bdd399d3e..1cd2d40f1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -777,7 +777,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re.search(r'[&,]s=', args['adaptive_fmts'] if 'adaptive_fmts' in args else '') + m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) if m_s is not None and 'adaptive_fmts' in args: video_info['url_encoded_fmt_stream_map'][0] += ','+args['adaptive_fmts'] elif 'adaptive_fmts' in video_info: From b7a68384078ec0d97fb3c8e4a3100e9c60f340d0 Mon Sep 17 00:00:00 2001 From: Allan Zhou Date: Tue, 20 Aug 2013 21:57:32 -0700 Subject: [PATCH 11/33] address review comment --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1cd2d40f1..e573b021d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -778,7 +778,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) - if m_s is not None and 'adaptive_fmts' in args: + if 'url_encoded_fmt_stream_map' not in video_info or not video_info['url_encoded_fmt_stream_map']: + video_info['url_encoded_fmt_stream_map'] = [''] + if m_s is not None: video_info['url_encoded_fmt_stream_map'][0] += ','+args['adaptive_fmts'] elif 'adaptive_fmts' in video_info: video_info['url_encoded_fmt_stream_map'][0] += ','+video_info['adaptive_fmts'][0] From 37b6d5f684d409365bbac6d3f2b8074b57e643a8 Mon Sep 17 00:00:00 2001 From: Allan Zhou Date: Tue, 20 Aug 2013 23:51:05 -0700 Subject: [PATCH 12/33] fix hls test --- youtube_dl/extractor/youtube.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e573b021d..1599dd484 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -778,13 +778,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) - if 'url_encoded_fmt_stream_map' not in video_info or not video_info['url_encoded_fmt_stream_map']: - video_info['url_encoded_fmt_stream_map'] = [''] if m_s is not None: - video_info['url_encoded_fmt_stream_map'][0] += ','+args['adaptive_fmts'] + if 'url_encoded_fmt_stream_map' in video_info: + video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] + else: + video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] elif 'adaptive_fmts' in video_info: - video_info['url_encoded_fmt_stream_map'][0] += ','+video_info['adaptive_fmts'][0] - + if 'url_encoded_fmt_stream_map' in video_info: + video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] + else: + video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] except ValueError: pass From 939fbd26ac90ce22d872d111360b63163dda5dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 22 Aug 2013 19:45:24 +0200 Subject: [PATCH 13/33] [youtube] fix the order of DASH formats --- youtube_dl/extractor/youtube.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7d6d07b17..c539b6891 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -155,19 +155,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Listed in order of quality _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13', '95', '94', '93', '92', '132', '151', - '85', '84', '102', '83', '101', '82', '100', # 3D - '138', '137', '136', '135', '134', '133', '160', # Dash video mp4 - '141', '140', '139', # Dash auido mp4 - '248', '247', '246', '245', '244', '243', '242', # Dash video webm - '172', '171', # Dash audio webm + # 3D + '85', '84', '102', '83', '101', '82', '100', + # Dash video + '138', '137', '248', '136', '247', '135', '246', + '245', '244', '134', '243', '133', '242', '160', + # Dash audio + '141', '172', '140', '171', '139', ] _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13', '95', '94', '93', '92', '132', '151', '85', '102', '84', '101', '83', '100', '82', - '248', '247', '246', '245', '244', '243', '242', # Dash video webm - '172', '171', # Dash audio webm - '138', '137', '136', '135', '134', '133', '160', # Dash video mp4 - '141', '140', '139', # Dash auido mp4 + # Dash video + '138', '248', '137', '247', '136', '246', '245', + '244', '135', '243', '134', '242', '133', '160', + # Dash audio + '172', '141', '171', '140', '139', ] _video_extensions = { '13': '3gp', From 3669cdba103a293eb96dd8e7791ef184a7bf71d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 22 Aug 2013 22:35:15 +0200 Subject: [PATCH 14/33] [youtube] update algo for length 82 (fixes #1296) --- devscripts/youtube_genalgo.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index dca963e8f..504ca1b2c 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -34,7 +34,7 @@ tests = [ ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", - "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), + "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), # 81 - vflLC8JvQ 2013/07/25 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c539b6891..e402ef17f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -431,7 +431,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: - return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] + return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 79: From 1865ed31b955795f9859df5c1c400d172ae9a28a Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Thu, 8 Aug 2013 09:53:25 +0200 Subject: [PATCH 15/33] [subtitles] separated subtitle options in their own group --- youtube_dl/__init__.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3f77dba69..f4890f1a6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,6 +120,7 @@ def parseOpts(overrideArguments=None): selection = optparse.OptionGroup(parser, 'Video Selection') authentication = optparse.OptionGroup(parser, 'Authentication Options') video_format = optparse.OptionGroup(parser, 'Video Format Options') + subtitles = optparse.OptionGroup(parser, 'Subtitle Options') downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -186,25 +187,26 @@ def parseOpts(overrideArguments=None): action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') video_format.add_option('-F', '--list-formats', action='store_true', dest='listformats', help='list all available formats (currently youtube only)') - video_format.add_option('--write-sub', '--write-srt', + + subtitles.add_option('--write-sub', '--write-srt', action='store_true', dest='writesubtitles', help='write subtitle file (currently youtube only)', default=False) - video_format.add_option('--write-auto-sub', '--write-automatic-sub', + subtitles.add_option('--write-auto-sub', '--write-automatic-sub', action='store_true', dest='writeautomaticsub', help='write automatic subtitle file (currently youtube only)', default=False) - video_format.add_option('--only-sub', + subtitles.add_option('--only-sub', action='store_true', dest='skip_download', help='[deprecated] alias of --skip-download', default=False) - video_format.add_option('--all-subs', + subtitles.add_option('--all-subs', action='store_true', dest='allsubtitles', - help='downloads all the available subtitles of the video (currently youtube only)', default=False) - video_format.add_option('--list-subs', + help='downloads all the available subtitles of the video', default=False) + subtitles.add_option('--list-subs', action='store_true', dest='listsubtitles', - help='lists all available subtitles for the video (currently youtube only)', default=False) - video_format.add_option('--sub-format', + help='lists all available subtitles for the video', default=False) + subtitles.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') - video_format.add_option('--sub-lang', '--srt-lang', + help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') + subtitles.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') @@ -329,6 +331,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(filesystem) parser.add_option_group(verbosity) parser.add_option_group(video_format) + parser.add_option_group(subtitles) parser.add_option_group(authentication) parser.add_option_group(postproc) @@ -344,7 +347,7 @@ def parseOpts(overrideArguments=None): userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') systemConf = _readOptions('/etc/youtube-dl.conf') userConf = _readOptions(userConfFile) - commandLineConf = sys.argv[1:] + commandLineConf = sys.argv[1:] argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) if opts.verbose: @@ -378,7 +381,7 @@ def _real_main(argv=None): # Set user agent if opts.user_agent is not None: std_headers['User-Agent'] = opts.user_agent - + # Set referer if opts.referer is not None: std_headers['Referer'] = opts.referer From 10204dc89893e49ab9d0247eb17bc79c58e2d485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 22 Aug 2013 23:23:52 +0200 Subject: [PATCH 16/33] [videofyme] Add an additional quality (they change between downloads of the info) and update md5 sum of the test video --- youtube_dl/extractor/videofyme.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py index 04106672b..94f64ffa5 100644 --- a/youtube_dl/extractor/videofyme.py +++ b/youtube_dl/extractor/videofyme.py @@ -14,7 +14,7 @@ class VideofyMeIE(InfoExtractor): _TEST = { u'url': u'http://www.videofy.me/thisisvideofyme/1100701', u'file': u'1100701.mp4', - u'md5': u'2046dd5758541d630bfa93e741e2fd79', + u'md5': u'c77d700bdc16ae2e9f3c26019bd96143', u'info_dict': { u'title': u'This is VideofyMe', u'description': None, @@ -32,9 +32,8 @@ class VideofyMeIE(InfoExtractor): config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) video = config.find('video') sources = video.find('sources') - url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') - if url_node is None: - url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off') + url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) + for key in ['on', 'av', 'off']] if node is not None) video_url = url_node.find('url').text return {'id': video_id, From 02bcf0d3894d5e188515213db6c0234173fad4b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 22 Aug 2013 23:29:42 +0200 Subject: [PATCH 17/33] release 2013.08.22 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 58e26bc49..3536e923f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.21' +__version__ = '2013.08.22' From 066090dd3f15886be40310f5a0702fe485c1512e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 11:33:56 +0200 Subject: [PATCH 18/33] [youtube] add algo for length 80 and update player info --- devscripts/youtube_genalgo.py | 5 ++++- youtube_dl/extractor/youtube.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 504ca1b2c..663ccc422 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -32,12 +32,15 @@ tests = [ # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), - # 82 + # 82 - vflZK4ZYR 2013/08/23 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), # 81 - vflLC8JvQ 2013/07/25 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), + # 80 - vflZK4ZYR 2013/08/23 (sporadic) + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>", + "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"), # 79 - vflLC8JvQ 2013/07/25 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e402ef17f..2a3e5031c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -434,6 +434,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] + elif len(s) == 80: + return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] elif len(s) == 79: return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] From df3df7fb6458132a05a4cb64ab37701e7b6a39f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 13:14:22 +0200 Subject: [PATCH 19/33] [youtube] Fix download of subtitles with '--all-subs' If _extract_subtitles is called the option 'write subtitles' is always true. --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c4d763ee1..446d53f64 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -534,7 +534,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub_format = self._downloader.params.get('subtitlesformat') if not sub_lang_list: #There was some error, it didn't get the available subtitles return {} - if self._downloader.params.get('writesubtitles', False): + if self._downloader.params.get('allsubtitles', False): + pass + else: if self._downloader.params.get('subtitleslang', False): sub_lang = self._downloader.params.get('subtitleslang') elif 'en' in sub_lang_list: From d4051a8e051a06ddeab905a4b5fcc7ddb70952bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 20 Jul 2013 12:48:57 +0200 Subject: [PATCH 20/33] Add a post processor for embedding subtitles in mp4 videos (closes #1052) --- youtube_dl/PostProcessor.py | 235 +++++++++++++++++++++++++++++++++++- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 4 + youtube_dl/utils.py | 3 + 4 files changed, 241 insertions(+), 3 deletions(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index fddf58606..336a42559 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] return dict((program, executable(program)) for program in programs) - def run_ffmpeg(self, path, out_path, opts): + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): if not self._exes['ffmpeg'] and not self._exes['avconv']: raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') - cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] + + files_cmd = [] + for path in input_paths: + files_cmd.extend(['-i', encodeFilename(path)]) + cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd + opts + [encodeFilename(self._ffmpeg_filename_argument(out_path))]) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout,stderr = p.communicate() if p.returncode != 0: @@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor): msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) + def run_ffmpeg(self, path, out_path, opts): + self.run_ffmpeg_multiple_files([path], out_path, opts) + def _ffmpeg_filename_argument(self, fn): # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details if fn.startswith(u'-'): @@ -232,3 +240,226 @@ class FFmpegVideoConvertor(FFmpegPostProcessor): information['format'] = self._preferedformat information['ext'] = self._preferedformat return False,information + + +class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt + _lang_map = { + 'aa': 'aar', + 'ab': 'abk', + 'ae': 'ave', + 'af': 'afr', + 'ak': 'aka', + 'am': 'amh', + 'an': 'arg', + 'ar': 'ara', + 'as': 'asm', + 'av': 'ava', + 'ay': 'aym', + 'az': 'aze', + 'ba': 'bak', + 'be': 'bel', + 'bg': 'bul', + 'bh': 'bih', + 'bi': 'bis', + 'bm': 'bam', + 'bn': 'ben', + 'bo': 'bod', + 'br': 'bre', + 'bs': 'bos', + 'ca': 'cat', + 'ce': 'che', + 'ch': 'cha', + 'co': 'cos', + 'cr': 'cre', + 'cs': 'ces', + 'cu': 'chu', + 'cv': 'chv', + 'cy': 'cym', + 'da': 'dan', + 'de': 'deu', + 'dv': 'div', + 'dz': 'dzo', + 'ee': 'ewe', + 'el': 'ell', + 'en': 'eng', + 'eo': 'epo', + 'es': 'spa', + 'et': 'est', + 'eu': 'eus', + 'fa': 'fas', + 'ff': 'ful', + 'fi': 'fin', + 'fj': 'fij', + 'fo': 'fao', + 'fr': 'fra', + 'fy': 'fry', + 'ga': 'gle', + 'gd': 'gla', + 'gl': 'glg', + 'gn': 'grn', + 'gu': 'guj', + 'gv': 'glv', + 'ha': 'hau', + 'he': 'heb', + 'hi': 'hin', + 'ho': 'hmo', + 'hr': 'hrv', + 'ht': 'hat', + 'hu': 'hun', + 'hy': 'hye', + 'hz': 'her', + 'ia': 'ina', + 'id': 'ind', + 'ie': 'ile', + 'ig': 'ibo', + 'ii': 'iii', + 'ik': 'ipk', + 'io': 'ido', + 'is': 'isl', + 'it': 'ita', + 'iu': 'iku', + 'ja': 'jpn', + 'jv': 'jav', + 'ka': 'kat', + 'kg': 'kon', + 'ki': 'kik', + 'kj': 'kua', + 'kk': 'kaz', + 'kl': 'kal', + 'km': 'khm', + 'kn': 'kan', + 'ko': 'kor', + 'kr': 'kau', + 'ks': 'kas', + 'ku': 'kur', + 'kv': 'kom', + 'kw': 'cor', + 'ky': 'kir', + 'la': 'lat', + 'lb': 'ltz', + 'lg': 'lug', + 'li': 'lim', + 'ln': 'lin', + 'lo': 'lao', + 'lt': 'lit', + 'lu': 'lub', + 'lv': 'lav', + 'mg': 'mlg', + 'mh': 'mah', + 'mi': 'mri', + 'mk': 'mkd', + 'ml': 'mal', + 'mn': 'mon', + 'mr': 'mar', + 'ms': 'msa', + 'mt': 'mlt', + 'my': 'mya', + 'na': 'nau', + 'nb': 'nob', + 'nd': 'nde', + 'ne': 'nep', + 'ng': 'ndo', + 'nl': 'nld', + 'nn': 'nno', + 'no': 'nor', + 'nr': 'nbl', + 'nv': 'nav', + 'ny': 'nya', + 'oc': 'oci', + 'oj': 'oji', + 'om': 'orm', + 'or': 'ori', + 'os': 'oss', + 'pa': 'pan', + 'pi': 'pli', + 'pl': 'pol', + 'ps': 'pus', + 'pt': 'por', + 'qu': 'que', + 'rm': 'roh', + 'rn': 'run', + 'ro': 'ron', + 'ru': 'rus', + 'rw': 'kin', + 'sa': 'san', + 'sc': 'srd', + 'sd': 'snd', + 'se': 'sme', + 'sg': 'sag', + 'si': 'sin', + 'sk': 'slk', + 'sl': 'slv', + 'sm': 'smo', + 'sn': 'sna', + 'so': 'som', + 'sq': 'sqi', + 'sr': 'srp', + 'ss': 'ssw', + 'st': 'sot', + 'su': 'sun', + 'sv': 'swe', + 'sw': 'swa', + 'ta': 'tam', + 'te': 'tel', + 'tg': 'tgk', + 'th': 'tha', + 'ti': 'tir', + 'tk': 'tuk', + 'tl': 'tgl', + 'tn': 'tsn', + 'to': 'ton', + 'tr': 'tur', + 'ts': 'tso', + 'tt': 'tat', + 'tw': 'twi', + 'ty': 'tah', + 'ug': 'uig', + 'uk': 'ukr', + 'ur': 'urd', + 'uz': 'uzb', + 've': 'ven', + 'vi': 'vie', + 'vo': 'vol', + 'wa': 'wln', + 'wo': 'wol', + 'xh': 'xho', + 'yi': 'yid', + 'yo': 'yor', + 'za': 'zha', + 'zh': 'zho', + 'zu': 'zul', + } + + def __init__(self, downloader=None, subtitlesformat='srt'): + super(FFmpegEmbedSubtitlePP, self).__init__(downloader) + self._subformat = subtitlesformat + + @classmethod + def _conver_lang_code(cls, code): + """Convert language code from ISO 639-1 to ISO 639-2/T""" + return cls._lang_map.get(code[:2]) + + def run(self, information): + if information['ext'] != u'mp4': + self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') + return True, information + sub_langs = [key for key in information['subtitles']] + + filename = information['filepath'] + input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] + + opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] + for (i, lang) in enumerate(sub_langs): + opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) + lang_code = self._conver_lang_code(lang) + if lang_code is not None: + opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + opts.extend(['-f', 'mp4']) + + temp_filename = filename + u'.temp' + self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return True, information diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 74e7b8de5..1fd610a6e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -497,7 +497,7 @@ class YoutubeDL(object): if sub is None: continue try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + sub_filename = subtitles_filename(filename, sub_lang, sub_format) self.report_writesubtitles(sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f4890f1a6..441ca6b6a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -323,6 +323,8 @@ def parseOpts(overrideArguments=None): help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, help='do not overwrite post-processed files; the post-processed files are overwritten by default') + postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, + help='embed subtitles in the video (only for mp4 videos)') parser.add_option_group(general) @@ -611,6 +613,8 @@ def _real_main(argv=None): ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) if opts.recodevideo: ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) + if opts.embedsubtitles: + ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) # Update version if opts.update_self: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5dd5b2923..52cfb8a6d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'): else: return default_ext +def subtitles_filename(filename, sub_lang, sub_format): + return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or From ce34e9ce5ec5cc403cf407ce8046b6271835cd6c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 23 Aug 2013 16:33:41 +0200 Subject: [PATCH 21/33] XHamsterIE: Fix video extension Cut off GET parameter --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 0f1feeffd..5bedc25ce 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -36,7 +36,7 @@ class XHamsterIE(InfoExtractor): video_url = compat_urllib_parse.unquote(mobj.group('file')) else: video_url = mobj.group('server')+'/key='+mobj.group('file') - video_extension = video_url.split('.')[-1] + video_extension = video_url.split('.')[-1].split('?')[0] video_title = self._html_search_regex(r'(?P<title>.+?) - xHamster\.com', webpage, u'title') From 4353cf51a0d5a812ab8a6bda59be7c58149fb40e Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 23 Aug 2013 16:40:20 +0200 Subject: [PATCH 22/33] XHamsterIE: Add video description --- youtube_dl/extractor/xhamster.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 5bedc25ce..0fd077583 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -3,7 +3,7 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, - + unescapeHTML, ExtractorError, ) @@ -41,10 +41,12 @@ class XHamsterIE(InfoExtractor): video_title = self._html_search_regex(r'(?P<title>.+?) - xHamster\.com', webpage, u'title') - # Can't see the description anywhere in the UI - # video_description = self._html_search_regex(r'Description: (?P[^<]+)', - # webpage, u'description', fatal=False) - # if video_description: video_description = unescapeHTML(video_description) + # Only a few videos have an description + mobj = re.search('Description: (?P[^<]+)', webpage) + if mobj: + video_description = unescapeHTML(mobj.group('description')) + else: + video_description = None mobj = re.search(r'hint=\'(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) if mobj: @@ -64,7 +66,7 @@ class XHamsterIE(InfoExtractor): 'url': video_url, 'ext': video_extension, 'title': video_title, - # 'description': video_description, + 'description': video_description, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, 'thumbnail': video_thumbnail From fc483bb6af4bce923c65a67618019a72071f30ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 17:23:34 +0200 Subject: [PATCH 23/33] [xhamster] use determine_ext --- youtube_dl/extractor/xhamster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 0fd077583..88b8b6be0 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_parse, unescapeHTML, + determine_ext, ExtractorError, ) @@ -36,7 +37,6 @@ class XHamsterIE(InfoExtractor): video_url = compat_urllib_parse.unquote(mobj.group('file')) else: video_url = mobj.group('server')+'/key='+mobj.group('file') - video_extension = video_url.split('.')[-1].split('?')[0] video_title = self._html_search_regex(r'(?P<title>.+?) - xHamster\.com', webpage, u'title') @@ -64,7 +64,7 @@ class XHamsterIE(InfoExtractor): return [{ 'id': video_id, 'url': video_url, - 'ext': video_extension, + 'ext': determine_ext(video_url), 'title': video_title, 'description': video_description, 'upload_date': video_upload_date, From 9af73dc4fc7dc54def861f94f9db4399b8bf928e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 18:17:43 +0200 Subject: [PATCH 24/33] Print a message before embedding the subtitles --- youtube_dl/PostProcessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 336a42559..c02ed7148 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -458,6 +458,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): opts.extend(['-f', 'mp4']) temp_filename = filename + u'.temp' + self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) From aa6a10c44a8e2e86f709c5301f9ea6ac3f01f002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 18:34:57 +0200 Subject: [PATCH 25/33] Allow to specify multiple subtitles languages separated by commas (closes #518) --- test/test_youtube_subtitles.py | 13 +++++++++++-- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 12 ++++++++---- youtube_dl/extractor/youtube.py | 29 ++++++++++++++++------------- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index fe0eac680..641206277 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -40,7 +40,7 @@ class TestYoutubeSubtitles(unittest.TestCase): def test_youtube_subtitles_it(self): DL = FakeYDL() DL.params['writesubtitles'] = True - DL.params['subtitleslang'] = 'it' + DL.params['subtitleslangs'] = ['it'] IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') sub = info_dict[0]['subtitles']['it'] @@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase): def test_youtube_automatic_captions(self): DL = FakeYDL() DL.params['writeautomaticsub'] = True - DL.params['subtitleslang'] = 'it' + DL.params['subtitleslangs'] = ['it'] IE = YoutubeIE(DL) info_dict = IE.extract('8YoUxe5ncPo') sub = info_dict[0]['subtitles']['it'] self.assertTrue(sub is not None) + def test_youtube_multiple_langs(self): + DL = FakeYDL() + DL.params['writesubtitles'] = True + langs = ['it', 'fr', 'de'] + DL.params['subtitleslangs'] = langs + IE = YoutubeIE(DL) + subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles'] + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1fd610a6e..3fc4ec378 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -76,7 +76,7 @@ class YoutubeDL(object): allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) - subtitleslang: Language of the subtitles to download + subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 441ca6b6a..614429073 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -83,6 +83,9 @@ def parseOpts(overrideArguments=None): return "".join(opts) + def _comma_separated_values_options_callback(option, opt_str, value, parser): + setattr(parser.values, option.dest, value.split(',')) + def _find_term_columns(): columns = os.environ.get('COLUMNS', None) if columns: @@ -206,9 +209,10 @@ def parseOpts(overrideArguments=None): subtitles.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') - subtitles.add_option('--sub-lang', '--srt-lang', - action='store', dest='subtitleslang', metavar='LANG', - help='language of the subtitles to download (optional) use IETF language tags like \'en\'') + subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', + action='callback', dest='subtitleslang', metavar='LANGS', type='str', + default=[], callback=_comma_separated_values_options_callback, + help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') @@ -573,7 +577,7 @@ def _real_main(argv=None): 'allsubtitles': opts.allsubtitles, 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, - 'subtitleslang': opts.subtitleslang, + 'subtitleslangs': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 446d53f64..5f843a871 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -496,7 +496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _request_automatic_caption(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" - sub_lang = self._downloader.params.get('subtitleslang') or 'en' + sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0] sub_format = self._downloader.params.get('subtitlesformat') self.to_screen(u'%s: Looking for automatic captions' % video_id) mobj = re.search(r';ytplayer.config = ({.*?});', webpage) @@ -530,23 +530,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): Return a dictionary: {language: subtitles} or {} if the subtitles couldn't be found """ - sub_lang_list = self._get_available_subtitles(video_id) + available_subs_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') - if not sub_lang_list: #There was some error, it didn't get the available subtitles + if not available_subs_list: #There was some error, it didn't get the available subtitles return {} if self._downloader.params.get('allsubtitles', False): - pass + sub_lang_list = available_subs_list else: - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' + if self._downloader.params.get('subtitleslangs', False): + reqested_langs = self._downloader.params.get('subtitleslangs') + elif 'en' in available_subs_list: + reqested_langs = ['en'] else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) - return {} - sub_lang_list = {sub_lang: sub_lang_list[sub_lang]} + reqested_langs = [list(available_subs_list.keys())[0]] + + sub_lang_list = {} + for sub_lang in reqested_langs: + if not sub_lang in available_subs_list: + self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) + continue + sub_lang_list[sub_lang] = available_subs_list[sub_lang] subtitles = {} for sub_lang in sub_lang_list: subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) From 306170518f016ac7ebf8311d7d2a23937aeaf1b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 22:36:59 +0200 Subject: [PATCH 26/33] [youtube] update algo for length 86 (fixes #1302) --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 663ccc422..014324439 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -20,9 +20,9 @@ tests = [ # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), - # 86 + # 86 - vflh9ybst 2013/08/23 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), + "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5f843a871..e4987b2b3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -423,7 +423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[5:20] + s[2] + s[21:] + return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86] elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: From 8ae7be3ef4873509241d98fd1adc2b82956f8def Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Aug 2013 23:09:53 +0200 Subject: [PATCH 27/33] release 2013.08.23 --- README.md | 16 ++++++++++------ youtube_dl/version.py | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 560bcdca1..75068fe56 100644 --- a/README.md +++ b/README.md @@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like. --max-quality FORMAT highest quality format to download -F, --list-formats list all available formats (currently youtube only) + +## Subtitle Options: --write-sub write subtitle file (currently youtube only) --write-auto-sub write automatic subtitle file (currently youtube only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the - video (currently youtube only) + video --list-subs lists all available subtitles for the video - (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) - (currently youtube only) - --sub-lang LANG language of the subtitles to download (optional) - use IETF language tags like 'en' + --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube + only) + --sub-lang LANGS languages of the subtitles to download (optional) + separated by commas, use IETF language tags like + 'en,pt' ## Authentication Options: -u, --username USERNAME account username @@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like. processing; the video is erased by default --no-post-overwrites do not overwrite post-processed files; the post- processed files are overwritten by default + --embed-subs embed subtitles in the video (only for mp4 + videos) # CONFIGURATION diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3536e923f..c10ebd4e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.22' +__version__ = '2013.08.23' From d2d1eb5b0a3ad2aa9d4867ba09b4b922b170f36a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Aug 2013 23:57:23 +0200 Subject: [PATCH 28/33] Switch to domain yt-dl.org --- devscripts/gh-pages/add-version.py | 26 +++++++++++++++----------- devscripts/gh-pages/update-feed.py | 3 +-- devscripts/release.sh | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py index 6af8bb9d8..116420ef2 100755 --- a/devscripts/gh-pages/add-version.py +++ b/devscripts/gh-pages/add-version.py @@ -6,28 +6,32 @@ import hashlib import urllib.request if len(sys.argv) <= 1: - print('Specify the version number as parameter') - sys.exit() + print('Specify the version number as parameter') + sys.exit() version = sys.argv[1] with open('update/LATEST_VERSION', 'w') as f: - f.write(version) + f.write(version) versions_info = json.load(open('update/versions.json')) if 'signature' in versions_info: - del versions_info['signature'] + del versions_info['signature'] new_version = {} -filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version} +filenames = { + 'bin': 'youtube-dl', + 'exe': 'youtube-dl.exe', + 'tar': 'youtube-dl-%s.tar.gz' % version} for key, filename in filenames.items(): - print('Downloading and checksumming %s...' %filename) - url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename) - data = urllib.request.urlopen(url).read() - sha256sum = hashlib.sha256(data).hexdigest() - new_version[key] = (url, sha256sum) + print('Downloading and checksumming %s...' % filename) + url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) + data = urllib.request.urlopen(url).read() + sha256sum = hashlib.sha256(data).hexdigest() + new_version[key] = (url, sha256sum) versions_info['versions'][version] = new_version versions_info['latest'] = version -json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) \ No newline at end of file +with open('update/versions.json', 'w') as jsonf: + json.dump(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index cfff05fc8..16571a924 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -22,7 +22,7 @@ entry_template=textwrap.dedent(""" @@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str) with open('update/releases.atom','w',encoding='utf-8') as atom_file: atom_file.write(atom_template) - diff --git a/devscripts/release.sh b/devscripts/release.sh index 46c31e437..24c9ad8d8 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) git checkout HEAD -- youtube-dl youtube-dl.exe -/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." +/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" From f2aeefe29c72e7c6165a35ba8153b52f96ee42af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Aug 2013 10:48:12 +0200 Subject: [PATCH 29/33] [youtube] update algo for length 84 --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 014324439..6f1d6ef99 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ tests = [ # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), - # 84 + # 84 - vflh9ybst 2013/08/23 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), + "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"), # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e4987b2b3..af01c9da0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: - return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] + return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84] elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: From 9460db832ce2dc61456b2ad82e3b6190465a2133 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 21:10:03 +0200 Subject: [PATCH 30/33] [ro220] Add support for 220.ro --- youtube_dl/extractor/__init__.py | 3 +++ youtube_dl/extractor/ro220.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/ro220.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b4db8f0bf..39c530ba3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -57,6 +57,7 @@ from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE +from .ro220 import Ro220IE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE from .sina import SinaIE @@ -116,12 +117,14 @@ _ALL_CLASSES = [ ] _ALL_CLASSES.append(GenericIE) + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ return [klass() for klass in _ALL_CLASSES] + def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name+'IE'] diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py new file mode 100644 index 000000000..c32f64d99 --- /dev/null +++ b/youtube_dl/extractor/ro220.py @@ -0,0 +1,42 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + compat_parse_qs, +) + + +class Ro220IE(InfoExtractor): + IE_NAME = '220.ro' + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' + _TEST = { + u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", + u'file': u'LYV6doKo7f.mp4', + u'md5': u'03af18b73a07b4088753930db7a34add', + u'info_dict': { + u"title": u"Luati-le Banii sez 4 ep 1", + u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + flashVars_str = self._search_regex( + r' Date: Sat, 24 Aug 2013 22:49:22 +0200 Subject: [PATCH 31/33] Install our own HTTPS handler as well (#1309) --- youtube_dl/utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52cfb8a6d..ab1049cc0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -476,7 +476,7 @@ def formatSeconds(secs): def make_HTTPS_handler(opts): if sys.version_info < (3,2): # Python's 2.x handler is very simplistic - return compat_urllib_request.HTTPSHandler() + return YoutubeDLHandlerHTTPS() else: import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) @@ -485,7 +485,7 @@ def make_HTTPS_handler(opts): context.verify_mode = (ssl.CERT_NONE if opts.no_check_certificate else ssl.CERT_REQUIRED) - return compat_urllib_request.HTTPSHandler(context=context) + return YoutubeDLHandlerHTTPS(context=context) class ExtractorError(Exception): """Error during info extraction.""" @@ -569,7 +569,8 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected -class YoutubeDLHandler(compat_urllib_request.HTTPHandler): + +class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler """Handler for HTTP requests and responses. This class, when installed with an OpenerDirector, automatically adds @@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): ret.code = code return ret - def http_request(self, req): - for h,v in std_headers.items(): + def _http_request(self, req): + for h, v in std_headers.items(): if h in req.headers: del req.headers[h] req.add_header(h, v) @@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['Youtubedl-user-agent'] return req - def http_response(self, req, resp): + def _http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get('Content-encoding', '') == 'gzip': @@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp.msg = old_resp.msg return resp - https_request = http_request - https_response = http_response + +class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler): + http_request = YoutubeDLHandler_Template._http_request + http_response = YoutubeDLHandler_Template._http_response + + +class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler): + https_request = YoutubeDLHandler_Template._http_request + https_response = YoutubeDLHandler_Template._http_response + def unified_strdate(date_str): """Return a string with the date in the format YYYYMMDD""" From 0838239e8e454c55903f3d69560cf53e25ce69f4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:49:52 +0200 Subject: [PATCH 32/33] [generic] Support double slash URLs (Fixes #1309) --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..ccbbdd255 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_error, compat_urllib_parse, + compat_urllib_parse_urlparse, compat_urllib_request, ExtractorError, ) from .brightcove import BrightcoveIE + class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor): u'file': u'13601338388002.mp4', u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', u'info_dict': { - u"uploader": u"www.hodiho.fr", + u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" } }, @@ -161,6 +163,8 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_url = compat_urllib_parse.unquote(mobj.group(1)) + if video_url.startswith('//'): + video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: From 9585f890f8c0eff70eb874c7962dc30baea1049c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:56:37 +0200 Subject: [PATCH 33/33] [generic] add support for relative URLs (Fixes #1308) --- youtube_dl/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ccbbdd255..8488dca05 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -165,6 +165,8 @@ class GenericIE(InfoExtractor): video_url = compat_urllib_parse.unquote(mobj.group(1)) if video_url.startswith('//'): video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url + if '://' not in video_url: + video_url = url + ('' if url.endswith('/') else '/') + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: