From: Philipp Hagemeister Date: Mon, 15 Sep 2014 21:26:54 +0000 (+0200) Subject: Merge commit '98703c7fbfcf06348220aa63f9422cdd792cfe1a' X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=2eebf060af9fe284cbcb839886b27030553fb48d;hp=98703c7fbfcf06348220aa63f9422cdd792cfe1a;p=youtube-dl.git Merge commit '98703c7fbfcf06348220aa63f9422cdd792cfe1a' --- diff --git a/.gitignore b/.gitignore index b8128fab1..e44977ca3 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ MANIFEST README.txt youtube-dl.1 youtube-dl.bash-completion +youtube-dl.fish youtube-dl youtube-dl.exe youtube-dl.tar.gz diff --git a/LATEST_VERSION b/LATEST_VERSION deleted file mode 100644 index a334573b6..000000000 --- a/LATEST_VERSION +++ /dev/null @@ -1 +0,0 @@ -2012.12.99 diff --git a/MANIFEST.in b/MANIFEST.in index d43cc1f3b..5743f605a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,5 +2,6 @@ include README.md include test/*.py include test/*.json include youtube-dl.bash-completion +include youtube-dl.fish include youtube-dl.1 recursive-include docs Makefile conf.py *.rst diff --git a/Makefile b/Makefile index 088a9320b..6272b826c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion +all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish cleanall: clean rm -f youtube-dl youtube-dl.exe @@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions + install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish test: #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test @@ -36,9 +38,9 @@ test: tar: youtube-dl.tar.gz -.PHONY: all clean install test tar bash-completion pypi-files +.PHONY: all clean install test tar bash-completion pypi-files fish-completion -pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 +pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish youtube-dl: youtube_dl/*.py youtube_dl/*/*.py zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py @@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co bash-completion: youtube-dl.bash-completion -youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion +youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in + python devscripts/fish-completion.py + +fish-completion: youtube-dl.fish + +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- -- \ bin devscripts test youtube_dl docs \ LICENSE README.md README.txt \ - Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ + Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ + youtube-dl.fish setup.py \ youtube-dl diff --git a/README.md b/README.md index ca366039e..5cc959ac5 100644 --- a/README.md +++ b/README.md @@ -345,6 +345,25 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 # FAQ +### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists + +YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. + +If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. + +Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl: + +``` +sudo apt-get remove -y youtube-dl +sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl +sudo chmod a+x /usr/local/bin/youtube-dl +hash -r +``` + +### Do I always have to pass in `--max-quality FORMAT`, or `-citw`? + +By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`. + ### Can you please put the -b option back? Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in new file mode 100644 index 000000000..eb79765da --- /dev/null +++ b/devscripts/fish-completion.in @@ -0,0 +1,5 @@ + +{{commands}} + + +complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py new file mode 100755 index 000000000..f4aaf0201 --- /dev/null +++ b/devscripts/fish-completion.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import optparse +import os +from os.path import dirname as dirn +import sys + +sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl +from youtube_dl.utils import shell_quote + +FISH_COMPLETION_FILE = 'youtube-dl.fish' +FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' + +EXTRA_ARGS = { + 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], + + # Options that need a file parameter + 'download-archive': ['--require-parameter'], + 'cookies': ['--require-parameter'], + 'load-info': ['--require-parameter'], + 'batch-file': ['--require-parameter'], +} + +def build_completion(opt_parser): + commands = [] + + for group in opt_parser.option_groups: + for option in group.option_list: + long_option = option.get_opt_string().strip('-') + help_msg = shell_quote([option.help]) + complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option] + if option._short_opts: + complete_cmd += ['--short-option', option._short_opts[0].strip('-')] + if option.help != optparse.SUPPRESS_HELP: + complete_cmd += ['--description', option.help] + complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) + commands.append(shell_quote(complete_cmd)) + + with open(FISH_COMPLETION_TEMPLATE) as f: + template = f.read() + filled_template = template.replace('{{commands}}', '\n'.join(commands)) + with open(FISH_COMPLETION_FILE, 'w') as f: + f.write(filled_template) + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/devscripts/release.sh b/devscripts/release.sh index 453087e5f..691517ceb 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -73,7 +73,6 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -git checkout HEAD -- youtube-dl youtube-dl.exe /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done diff --git a/setup.py b/setup.py index 03e7b358e..cf6b92b0f 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': else: files_spec = [ ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('etc/fish/completions', ['youtube-dl.fish']), ('share/doc/youtube_dl', ['README.txt']), ('share/man/man1', ['youtube-dl.1']) ] diff --git a/test/helper.py b/test/helper.py index 01b11f661..7f3ab8438 100644 --- a/test/helper.py +++ b/test/helper.py @@ -103,7 +103,8 @@ def expect_info_dict(self, expected_dict, got_dict): self.assertTrue( isinstance(got, compat_str), - 'Expected a %r object, but got %r' % (compat_str, type(got))) + u'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, info_field)) self.assertTrue( match_rex.match(got), u'field %s (value: %r) should match %r' % (info_field, got, match_str)) diff --git a/test/parameters.json b/test/parameters.json index 487a46d56..098cd0cd0 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -27,7 +27,6 @@ "rejecttitle": null, "retries": 10, "simulate": false, - "skip_download": false, "subtitleslang": null, "subtitlesformat": "srt", "test": true, diff --git a/test/test_all_urls.py b/test/test_all_urls.py index b1ad30bf1..84b05da39 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -109,7 +109,9 @@ class TestAllURLsMatching(unittest.TestCase): if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) else: - self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) + self.assertFalse( + ie.suitable(url), + '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) @@ -141,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) - def test_ComedyCentralShows(self): - self.assertMatch( - 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', - ['ComedyCentralShows']) - self.assertMatch( - 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', - ['ComedyCentralShows']) - def test_yahoo_https(self): # https://github.com/rg3/youtube-dl/issues/2701 self.assertMatch( diff --git a/test/test_cache.py b/test/test_cache.py new file mode 100644 index 000000000..a16160142 --- /dev/null +++ b/test/test_cache.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import shutil + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL +from youtube_dl.cache import Cache + + +def _is_empty(d): + return not bool(os.listdir(d)) + + +def _mkdir(d): + if not os.path.exists(d): + os.mkdir(d) + + +class TestCache(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + _mkdir(TESTDATA_DIR) + self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') + self.tearDown() + + def tearDown(self): + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_cache(self): + ydl = FakeYDL({ + 'cachedir': self.test_dir, + }) + c = Cache(ydl) + obj = {'x': 1, 'y': ['ä', '\\a', True]} + self.assertEqual(c.load('test_cache', 'k.'), None) + c.store('test_cache', 'k.', obj) + self.assertEqual(c.load('test_cache', 'k2'), None) + self.assertFalse(_is_empty(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), obj) + self.assertEqual(c.load('test_cache', 'y'), None) + self.assertEqual(c.load('test_cache2', 'k.'), None) + c.remove() + self.assertFalse(os.path.exists(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), None) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 6422ef119..2b8ac6975 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -28,6 +28,7 @@ from youtube_dl.utils import ( compat_HTTPError, DownloadError, ExtractorError, + format_bytes, UnavailableVideoError, ) from youtube_dl.extractor import get_info_extractor @@ -103,8 +104,11 @@ def generator(test_case): def get_tc_filename(tc): return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) - def try_rm_tcs_files(): - for tc in test_cases: + res_dict = None + def try_rm_tcs_files(tcs=None): + if tcs is None: + tcs = test_cases + for tc in tcs: tc_filename = get_tc_filename(tc) try_rm(tc_filename) try_rm(tc_filename + '.part') @@ -148,24 +152,47 @@ def generator(test_case): self.assertEqual( len(res_dict['entries']), test_case['playlist_count'], - 'Expected at %d in playlist %s, but got %d.') + 'Expected %d entries in playlist %s, but got %d.' % ( + test_case['playlist_count'], + test_case['url'], + len(res_dict['entries']), + )) + if 'playlist_duration_sum' in test_case: + got_duration = sum(e['duration'] for e in res_dict['entries']) + self.assertEqual( + test_case['playlist_duration_sum'], got_duration) for tc in test_cases: tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(tc_filename in finished_hook_called) + expected_minsize = tc.get('file_minsize', 10000) + if expected_minsize is not None: + if params.get('test'): + expected_minsize = max(expected_minsize, 10000) + got_fsize = os.path.getsize(tc_filename) + assertGreaterEqual( + self, got_fsize, expected_minsize, + 'Expected %s to be at least %s, but it\'s only %s ' % + (tc_filename, format_bytes(expected_minsize), + format_bytes(got_fsize))) + if 'md5' in tc: + md5_for_file = _file_md5(tc_filename) + self.assertEqual(md5_for_file, tc['md5']) info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue(os.path.exists(info_json_fn)) - if 'md5' in tc: - md5_for_file = _file_md5(tc_filename) - self.assertEqual(md5_for_file, tc['md5']) with io.open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, tc.get('info_dict', {}), info_dict) finally: try_rm_tcs_files() + if is_playlist and res_dict is not None: + # Remove all other files that may have been extracted if the + # extractor returns full results even with extract_flat + res_tcs = [{'info_dict': e} for e in res_dict['entries']] + try_rm_tcs_files(res_tcs) return test_template diff --git a/test/test_playlists.py b/test/test_playlists.py deleted file mode 100644 index 0137b8399..000000000 --- a/test/test_playlists.py +++ /dev/null @@ -1,395 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -## DEPRECATED FILE! -# Add new tests to the extractors themselves, like this: -# _TEST = { -# 'url': 'http://example.com/playlist/42', -# 'playlist_mincount': 99, -# 'info_dict': { -# 'id': '42', -# 'title': 'Playlist number forty-two', -# } -# } - -from __future__ import unicode_literals - -# Allow direct execution -import os -import sys -import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from test.helper import ( - assertRegexpMatches, - assertGreaterEqual, - expect_info_dict, - FakeYDL, -) - -from youtube_dl.extractor import ( - AcademicEarthCourseIE, - DailymotionPlaylistIE, - DailymotionUserIE, - VimeoChannelIE, - VimeoUserIE, - VimeoAlbumIE, - VimeoGroupsIE, - VineUserIE, - UstreamChannelIE, - SoundcloudSetIE, - SoundcloudUserIE, - SoundcloudPlaylistIE, - TeacherTubeUserIE, - LivestreamIE, - LivestreamOriginalIE, - NHLVideocenterIE, - BambuserChannelIE, - BandcampAlbumIE, - SmotriCommunityIE, - SmotriUserIE, - IviCompilationIE, - ImdbListIE, - KhanAcademyIE, - EveryonesMixtapeIE, - RutubeChannelIE, - RutubePersonIE, - GoogleSearchIE, - GenericIE, - TEDIE, - ToypicsUserIE, - XTubeUserIE, - InstagramUserIE, - CSpanIE, - AolIE, - GameOnePlaylistIE, -) - - -class TestPlaylists(unittest.TestCase): - def assertIsPlaylist(self, info): - """Make sure the info has '_type' set to 'playlist'""" - self.assertEqual(info['_type'], 'playlist') - - def test_dailymotion_playlist(self): - dl = FakeYDL() - ie = DailymotionPlaylistIE(dl) - result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'SPORT') - self.assertTrue(len(result['entries']) > 20) - - def test_dailymotion_user(self): - dl = FakeYDL() - ie = DailymotionUserIE(dl) - result = ie.extract('https://www.dailymotion.com/user/nqtv') - self.assertIsPlaylist(result) - assertGreaterEqual(self, len(result['entries']), 100) - self.assertEqual(result['title'], 'Rémi Gaillard') - - def test_vimeo_channel(self): - dl = FakeYDL() - ie = VimeoChannelIE(dl) - result = ie.extract('http://vimeo.com/channels/tributes') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Vimeo Tributes') - self.assertTrue(len(result['entries']) > 24) - - def test_vimeo_user(self): - dl = FakeYDL() - ie = VimeoUserIE(dl) - result = ie.extract('http://vimeo.com/nkistudio/videos') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Nki') - self.assertTrue(len(result['entries']) > 65) - - def test_vimeo_album(self): - dl = FakeYDL() - ie = VimeoAlbumIE(dl) - result = ie.extract('http://vimeo.com/album/2632481') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Staff Favorites: November 2013') - self.assertTrue(len(result['entries']) > 12) - - def test_vimeo_groups(self): - dl = FakeYDL() - ie = VimeoGroupsIE(dl) - result = ie.extract('http://vimeo.com/groups/rolexawards') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Rolex Awards for Enterprise') - self.assertTrue(len(result['entries']) > 72) - - def test_vine_user(self): - dl = FakeYDL() - ie = VineUserIE(dl) - result = ie.extract('https://vine.co/Visa') - self.assertIsPlaylist(result) - assertGreaterEqual(self, len(result['entries']), 47) - - def test_ustream_channel(self): - dl = FakeYDL() - ie = UstreamChannelIE(dl) - result = ie.extract('http://www.ustream.tv/channel/channeljapan') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '10874166') - assertGreaterEqual(self, len(result['entries']), 54) - - def test_soundcloud_set(self): - dl = FakeYDL() - ie = SoundcloudSetIE(dl) - result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'The Royal Concept EP') - assertGreaterEqual(self, len(result['entries']), 6) - - def test_soundcloud_user(self): - dl = FakeYDL() - ie = SoundcloudUserIE(dl) - result = ie.extract('https://soundcloud.com/the-concept-band') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '9615865') - assertGreaterEqual(self, len(result['entries']), 12) - - def test_soundcloud_likes(self): - dl = FakeYDL() - ie = SoundcloudUserIE(dl) - result = ie.extract('https://soundcloud.com/the-concept-band/likes') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '9615865') - assertGreaterEqual(self, len(result['entries']), 1) - - def test_soundcloud_playlist(self): - dl = FakeYDL() - ie = SoundcloudPlaylistIE(dl) - result = ie.extract('http://api.soundcloud.com/playlists/4110309') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '4110309') - self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]') - assertRegexpMatches( - self, result['description'], r'.*?TILT Brass - Bowery Poetry Club') - self.assertEqual(len(result['entries']), 6) - - def test_livestream_event(self): - dl = FakeYDL() - ie = LivestreamIE(dl) - result = ie.extract('http://new.livestream.com/tedx/cityenglish') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'TEDCity2.0 (English)') - assertGreaterEqual(self, len(result['entries']), 4) - - def test_livestreamoriginal_folder(self): - dl = FakeYDL() - ie = LivestreamOriginalIE(dl) - result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') - assertGreaterEqual(self, len(result['entries']), 28) - - def test_nhl_videocenter(self): - dl = FakeYDL() - ie = NHLVideocenterIE(dl) - result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '999') - self.assertEqual(result['title'], 'Highlights') - self.assertEqual(len(result['entries']), 12) - - def test_bambuser_channel(self): - dl = FakeYDL() - ie = BambuserChannelIE(dl) - result = ie.extract('http://bambuser.com/channel/pixelversity') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'pixelversity') - assertGreaterEqual(self, len(result['entries']), 60) - - def test_bandcamp_album(self): - dl = FakeYDL() - ie = BandcampAlbumIE(dl) - result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Hierophany of the Open Grave') - assertGreaterEqual(self, len(result['entries']), 9) - - def test_smotri_community(self): - dl = FakeYDL() - ie = SmotriCommunityIE(dl) - result = ie.extract('http://smotri.com/community/video/kommuna') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'kommuna') - self.assertEqual(result['title'], 'КПРФ') - assertGreaterEqual(self, len(result['entries']), 4) - - def test_smotri_user(self): - dl = FakeYDL() - ie = SmotriUserIE(dl) - result = ie.extract('http://smotri.com/user/inspector') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'inspector') - self.assertEqual(result['title'], 'Inspector') - assertGreaterEqual(self, len(result['entries']), 9) - - def test_AcademicEarthCourse(self): - dl = FakeYDL() - ie = AcademicEarthCourseIE(dl) - result = ie.extract('http://academicearth.org/playlists/laws-of-nature/') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'laws-of-nature') - self.assertEqual(result['title'], 'Laws of Nature') - self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") - self.assertEqual(len(result['entries']), 4) - - def test_ivi_compilation(self): - dl = FakeYDL() - ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dvoe_iz_lartsa') - self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') - assertGreaterEqual(self, len(result['entries']), 24) - - def test_ivi_compilation_season(self): - dl = FakeYDL() - ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') - self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') - assertGreaterEqual(self, len(result['entries']), 12) - - def test_imdb_list(self): - dl = FakeYDL() - ie = ImdbListIE(dl) - result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'JFs9NWw6XI0') - self.assertEqual(result['title'], 'March 23, 2012 Releases') - self.assertEqual(len(result['entries']), 7) - - def test_khanacademy_topic(self): - dl = FakeYDL() - ie = KhanAcademyIE(dl) - result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'cryptography') - self.assertEqual(result['title'], 'Journey into cryptography') - self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') - assertGreaterEqual(self, len(result['entries']), 3) - - def test_EveryonesMixtape(self): - dl = FakeYDL() - ie = EveryonesMixtapeIE(dl) - result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'm7m0jJAbMQi') - self.assertEqual(result['title'], 'Driving') - self.assertEqual(len(result['entries']), 24) - - def test_rutube_channel(self): - dl = FakeYDL() - ie = RutubeChannelIE(dl) - result = ie.extract('http://rutube.ru/tags/video/1800/') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '1800') - assertGreaterEqual(self, len(result['entries']), 68) - - def test_rutube_person(self): - dl = FakeYDL() - ie = RutubePersonIE(dl) - result = ie.extract('http://rutube.ru/video/person/313878/') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '313878') - assertGreaterEqual(self, len(result['entries']), 37) - - def test_multiple_brightcove_videos(self): - # https://github.com/rg3/youtube-dl/issues/2283 - dl = FakeYDL() - ie = GenericIE(dl) - result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') - self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') - self.assertEqual(len(result['entries']), 3) - - def test_ted_playlist(self): - dl = FakeYDL() - ie = TEDIE(dl) - result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '10') - self.assertEqual(result['title'], 'Who are the hackers?') - assertGreaterEqual(self, len(result['entries']), 6) - - def test_toypics_user(self): - dl = FakeYDL() - ie = ToypicsUserIE(dl) - result = ie.extract('http://videos.toypics.net/Mikey') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'Mikey') - assertGreaterEqual(self, len(result['entries']), 17) - - def test_xtube_user(self): - dl = FakeYDL() - ie = XTubeUserIE(dl) - result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'greenshowers') - assertGreaterEqual(self, len(result['entries']), 155) - - def test_InstagramUser(self): - dl = FakeYDL() - ie = InstagramUserIE(dl) - result = ie.extract('http://instagram.com/porsche') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'porsche') - assertGreaterEqual(self, len(result['entries']), 2) - test_video = next( - e for e in result['entries'] - if e['id'] == '614605558512799803_462752227') - dl.add_default_extra_info(test_video, ie, '(irrelevant URL)') - dl.process_video_result(test_video, download=False) - EXPECTED = { - 'id': '614605558512799803_462752227', - 'ext': 'mp4', - 'title': '#Porsche Intelligent Performance.', - 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Porsche', - 'uploader_id': 'porsche', - 'timestamp': 1387486713, - 'upload_date': '20131219', - } - expect_info_dict(self, EXPECTED, test_video) - - def test_CSpan_playlist(self): - dl = FakeYDL() - ie = CSpanIE(dl) - result = ie.extract( - 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '342759') - self.assertEqual( - result['title'], 'General Motors Ignition Switch Recall') - whole_duration = sum(e['duration'] for e in result['entries']) - self.assertEqual(whole_duration, 14855) - - def test_aol_playlist(self): - dl = FakeYDL() - ie = AolIE(dl) - result = ie.extract( - 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], '152147') - self.assertEqual( - result['title'], 'Brace Yourself - Today\'s Weirdest News') - assertGreaterEqual(self, len(result['entries']), 10) - - def test_TeacherTubeUser(self): - dl = FakeYDL() - ie = TeacherTubeUserIE(dl) - result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'rbhagwati2') - assertGreaterEqual(self, len(result['entries']), 179) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 0953db371..3efbed29d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # coding: utf-8 +from __future__ import unicode_literals + # Allow direct execution import os import sys @@ -13,7 +15,6 @@ import io import json import xml.etree.ElementTree -#from youtube_dl.utils import htmlentity_transform from youtube_dl.utils import ( DateRange, encodeFilename, @@ -39,13 +40,11 @@ from youtube_dl.utils import ( parse_iso8601, strip_jsonp, uppercase_escape, + limit_length, + escape_rfc3986, + escape_url, ) -if sys.version_info < (3, 0): - _compat_str = lambda b: b.decode('unicode-escape') -else: - _compat_str = lambda s: s - class TestUtil(unittest.TestCase): def test_timeconvert(self): @@ -67,9 +66,9 @@ class TestUtil(unittest.TestCase): self.assertEqual('this - that', sanitize_filename('this: that')) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') - aumlaut = _compat_str('\xe4') + aumlaut = 'ä' self.assertEqual(sanitize_filename(aumlaut), aumlaut) - tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430') + tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' self.assertEqual(sanitize_filename(tests), tests) forbidden = '"\0\\/' @@ -91,9 +90,9 @@ class TestUtil(unittest.TestCase): self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) - tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c') + tests = 'a\xe4b\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') - self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '') # No empty filename + self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: @@ -101,8 +100,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly - self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song') - self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech') + self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') + self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') @@ -120,7 +119,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) def test_unescape_html(self): - self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) + self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual( + unescapeHTML('é'), 'é') def test_daterange(self): _20century = DateRange("19000101","20000101") @@ -138,7 +139,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('1968-12-10'), '19681210') def test_find_xpath_attr(self): - testxml = u''' + testxml = ''' @@ -151,18 +152,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) def test_meta_parser(self): - testhtml = u''' + testhtml = ''' ''' get_meta = lambda name: get_meta_content(name, testhtml) - self.assertEqual(get_meta('description'), u'foo & bar') + self.assertEqual(get_meta('description'), 'foo & bar') self.assertEqual(get_meta('author'), 'Plato') def test_xpath_with_ns(self): - testxml = u''' + testxml = ''' The Author http://server.com/download.mp3 @@ -171,8 +172,8 @@ class TestUtil(unittest.TestCase): doc = xml.etree.ElementTree.fromstring(testxml) find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) self.assertTrue(find('media:song') is not None) - self.assertEqual(find('media:song/media:author').text, u'The Author') - self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + self.assertEqual(find('media:song/media:author').text, 'The Author') + self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') def test_smuggle_url(self): data = {u"ö": u"ö", u"abc": [3]} @@ -187,22 +188,22 @@ class TestUtil(unittest.TestCase): self.assertEqual(res_data, None) def test_shell_quote(self): - args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] - self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") + args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] + self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) def test_url_basename(self): - self.assertEqual(url_basename(u'http://foo.de/'), u'') - self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz') - self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz') - self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz') - self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz') + self.assertEqual(url_basename('http://foo.de/'), '') + self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') self.assertEqual( - url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), - u'trailer.mp4') + url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), + 'trailer.mp4') def test_parse_duration(self): self.assertEqual(parse_duration(None), None) @@ -213,6 +214,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('00:01:01'), 61) self.assertEqual(parse_duration('x:y'), None) self.assertEqual(parse_duration('3h11m53s'), 11513) + self.assertEqual(parse_duration('3h 11m 53s'), 11513) + self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) @@ -256,16 +260,16 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (20, 99), []) def test_struct_unpack(self): - self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) + self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) def test_read_batch_urls(self): - f = io.StringIO(u'''\xef\xbb\xbf foo + f = io.StringIO('''\xef\xbb\xbf foo bar\r baz # More after this line\r ; or after this bam''') - self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) + self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam']) def test_urlencode_postdata(self): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) @@ -282,8 +286,44 @@ class TestUtil(unittest.TestCase): self.assertEqual(d, [{"id": "532cb", "x": 3}]) def test_uppercase_escape(self): - self.assertEqual(uppercase_escape(u'aä'), u'aä') - self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') + self.assertEqual(uppercase_escape('aä'), 'aä') + self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + + def test_limit_length(self): + self.assertEqual(limit_length(None, 12), None) + self.assertEqual(limit_length('foo', 12), 'foo') + self.assertTrue( + limit_length('foo bar baz asd', 12).startswith('foo bar')) + self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + + def test_escape_rfc3986(self): + reserved = "!*'();:@&=+$,/?#[]" + unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' + self.assertEqual(escape_rfc3986(reserved), reserved) + self.assertEqual(escape_rfc3986(unreserved), unreserved) + self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') + self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') + + def test_escape_url(self): + self.assertEqual( + escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + ) + self.assertEqual( + escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + ) + self.assertEqual( + escape_url('http://тест.рф/фрагмент'), + 'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + ) + self.assertEqual( + escape_url('http://тест.рф/абв?абв=абв#абв'), + 'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + ) + self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 3aadedd64..1fa99f88b 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -25,15 +25,6 @@ class TestYoutubeLists(unittest.TestCase): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') - def test_youtube_playlist(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'ytdl test PL') - ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] - self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) - def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True @@ -41,36 +32,7 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') - - def test_issue_673(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('PLBB231211A4F62143') - self.assertTrue(len(result['entries']) > 25) - - def test_youtube_playlist_long(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 799) - - def test_youtube_playlist_with_deleted(self): - #651 - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] - self.assertFalse('pElCt5oNDuI' in ytie_results) - self.assertFalse('KdPEApIVdWM' in ytie_results) - - def test_youtube_playlist_empty(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') - self.assertIsPlaylist(result) - self.assertEqual(len(result['entries']), 0) - + def test_youtube_course(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) @@ -97,12 +59,6 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') self.assertTrue(len(result['entries']) >= 320) - def test_youtube_safe_search(self): - dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') - self.assertEqual(len(result['entries']), 2) - def test_youtube_show(self): dl = FakeYDL() ie = YoutubeShowIE(dl) diff --git a/youtube-dl b/youtube-dl deleted file mode 100755 index e3eb8774c..000000000 --- a/youtube-dl +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import sys, os -import json, hashlib - -try: - import urllib.request as compat_urllib_request -except ImportError: # Python 2 - import urllib2 as compat_urllib_request - -def rsa_verify(message, signature, key): - from struct import pack - from hashlib import sha256 - from sys import version_info - def b(x): - if version_info[0] == 2: return x - else: return x.encode('latin1') - assert(type(message) == type(b(''))) - block_size = 0 - n = key[0] - while n: - block_size += 1 - n >>= 8 - signature = pow(int(signature, 16), key[1], key[0]) - raw_bytes = [] - while signature: - raw_bytes.insert(0, pack("B", signature & 0xFF)) - signature >>= 8 - signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes) - if signature[0:2] != b('\x00\x01'): return False - signature = signature[2:] - if not b('\x00') in signature: return False - signature = signature[signature.index(b('\x00'))+1:] - if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False - signature = signature[19:] - if signature != sha256(message).digest(): return False - return True - -sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') -sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') -sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n') - -try: - raw_input() -except NameError: # Python 3 - input() - -filename = sys.argv[0] - -UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" -VERSION_URL = UPDATE_URL + 'LATEST_VERSION' -JSON_URL = UPDATE_URL + 'versions.json' -UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) - -if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) - -try: - versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8') - versions_info = json.loads(versions_info) -except: - sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.') -if not 'signature' in versions_info: - sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.') -signature = versions_info['signature'] -del versions_info['signature'] -if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY): - sys.exit(u'ERROR: the versions file signature is invalid. Aborting.') - -version = versions_info['versions'][versions_info['latest']] - -try: - urlh = compat_urllib_request.urlopen(version['bin'][0]) - newcontent = urlh.read() - urlh.close() -except (IOError, OSError) as err: - sys.exit('ERROR: unable to download latest version') - -newcontent_hash = hashlib.sha256(newcontent).hexdigest() -if newcontent_hash != version['bin'][1]: - sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.') - -try: - with open(filename, 'wb') as outf: - outf.write(newcontent) -except (IOError, OSError) as err: - sys.exit('ERROR: unable to overwrite current version') - -sys.stderr.write(u'Done! Now you can run youtube-dl.\n') diff --git a/youtube-dl.exe b/youtube-dl.exe deleted file mode 100644 index 45eee04bb..000000000 Binary files a/youtube-dl.exe and /dev/null differ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 98639e004..9519594c9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,6 +28,7 @@ from .utils import ( compat_str, compat_urllib_error, compat_urllib_request, + escape_url, ContentTooShortError, date_from_str, DateRange, @@ -57,6 +58,7 @@ from .utils import ( YoutubeDLHandler, prepend_extension, ) +from .cache import Cache from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .postprocessor import FFmpegMergerPP @@ -133,7 +135,7 @@ class YoutubeDL(object): daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. - None to disable filesystem cache. + False to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. @@ -195,6 +197,7 @@ class YoutubeDL(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr self.params = params + self.cache = Cache(self) if params.get('bidi_workaround', False): try: @@ -1239,6 +1242,25 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ + + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + url = req if isinstance(req, compat_str) else req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + if isinstance(req, compat_str): + req = url_escaped + else: + req = compat_urllib_request.Request( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index b15695053..42d0a0180 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -74,29 +74,28 @@ __authors__ = ( 'Keith Beckman', 'Ole Ernst', 'Aaron McDaniel (mcd1992)', + 'Magnus Kolstad', ) __license__ = 'Public Domain' import codecs import io -import optparse import os import random -import shlex -import shutil import sys +from .options import ( + parseOpts, +) from .utils import ( compat_getpass, compat_print, DateRange, DEFAULT_OUTTMPL, decodeOption, - get_term_width, DownloadError, - get_cachedir, MaxDownloadsReached, preferredencoding, read_batch_urls, @@ -110,7 +109,6 @@ from .downloader import ( FileDownloader, ) from .extractor import gen_extractors -from .version import __version__ from .YoutubeDL import YoutubeDL from .postprocessor import ( AtomicParsleyPP, @@ -124,475 +122,6 @@ from .postprocessor import ( ) -def parseOpts(overrideArguments=None): - def _readOptions(filename_bytes, default=[]): - try: - optionf = open(filename_bytes) - except IOError: - return default # silently skip if file is not present - try: - res = [] - for l in optionf: - res += shlex.split(l, comments=True) - finally: - optionf.close() - return res - - def _readUserConf(): - xdg_config_home = os.environ.get('XDG_CONFIG_HOME') - if xdg_config_home: - userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') - else: - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') - userConf = _readOptions(userConfFile, None) - - if userConf is None: - appdata_dir = os.environ.get('appdata') - if appdata_dir: - userConf = _readOptions( - os.path.join(appdata_dir, 'youtube-dl', 'config'), - default=None) - if userConf is None: - userConf = _readOptions( - os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), - default=None) - - if userConf is None: - userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), - default=None) - if userConf is None: - userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), - default=None) - - if userConf is None: - userConf = [] - - return userConf - - def _format_option_string(option): - ''' ('-o', '--option') -> -o, --format METAVAR''' - - opts = [] - - if option._short_opts: - opts.append(option._short_opts[0]) - if option._long_opts: - opts.append(option._long_opts[0]) - if len(opts) > 1: - opts.insert(1, ', ') - - if option.takes_value(): opts.append(' %s' % option.metavar) - - return "".join(opts) - - def _comma_separated_values_options_callback(option, opt_str, value, parser): - setattr(parser.values, option.dest, value.split(',')) - - def _hide_login_info(opts): - opts = list(opts) - for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: - try: - i = opts.index(private_opt) - opts[i+1] = '' - except ValueError: - pass - return opts - - max_width = 80 - max_help_position = 80 - - # No need to wrap help messages if we're on a wide console - columns = get_term_width() - if columns: max_width = columns - - fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) - fmt.format_option_strings = _format_option_string - - kw = { - 'version' : __version__, - 'formatter' : fmt, - 'usage' : '%prog [options] url [url...]', - 'conflict_handler' : 'resolve', - } - - parser = optparse.OptionParser(**kw) - - # option groups - general = optparse.OptionGroup(parser, 'General Options') - selection = optparse.OptionGroup(parser, 'Video Selection') - authentication = optparse.OptionGroup(parser, 'Authentication Options') - video_format = optparse.OptionGroup(parser, 'Video Format Options') - subtitles = optparse.OptionGroup(parser, 'Subtitle Options') - downloader = optparse.OptionGroup(parser, 'Download Options') - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - workarounds = optparse.OptionGroup(parser, 'Workarounds') - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - - general.add_option('-h', '--help', - action='help', help='print this help text and exit') - general.add_option('-v', '--version', - action='version', help='print program version and exit') - general.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') - general.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False) - general.add_option('--abort-on-error', - action='store_false', dest='ignoreerrors', - help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') - general.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - general.add_option('--list-extractors', - action='store_true', dest='list_extractors', - help='List all supported extractors and the URLs they would handle', default=False) - general.add_option('--extractor-descriptions', - action='store_true', dest='list_extractor_descriptions', - help='Output descriptions of all supported extractors', default=False) - general.add_option( - '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') - general.add_option( - '--socket-timeout', dest='socket_timeout', - type=float, default=None, help=u'Time to wait before giving up, in seconds') - general.add_option( - '--default-search', - dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') - general.add_option( - '--ignore-config', - action='store_true', - help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') - - selection.add_option( - '--playlist-start', - dest='playliststart', metavar='NUMBER', default=1, type=int, - help='playlist video to start at (default is %default)') - selection.add_option( - '--playlist-end', - dest='playlistend', metavar='NUMBER', default=None, type=int, - help='playlist video to end at (default is last)') - selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') - selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') - selection.add_option('--max-downloads', metavar='NUMBER', - dest='max_downloads', type=int, default=None, - help='Abort after downloading NUMBER files') - selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) - selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) - selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) - selection.add_option( - '--datebefore', metavar='DATE', dest='datebefore', default=None, - help='download only videos uploaded on or before this date (i.e. inclusive)') - selection.add_option( - '--dateafter', metavar='DATE', dest='dateafter', default=None, - help='download only videos uploaded on or after this date (i.e. inclusive)') - selection.add_option( - '--min-views', metavar='COUNT', dest='min_views', - default=None, type=int, - help="Do not download any videos with less than COUNT views",) - selection.add_option( - '--max-views', metavar='COUNT', dest='max_views', - default=None, type=int, - help="Do not download any videos with more than COUNT views",) - selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) - selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', - help='download only videos suitable for the given age', - default=None, type=int) - selection.add_option('--download-archive', metavar='FILE', - dest='download_archive', - help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') - selection.add_option( - '--include-ads', dest='include_ads', - action='store_true', - help='Download advertisements as well (experimental)') - selection.add_option( - '--youtube-include-dash-manifest', action='store_true', - dest='youtube_include_dash_manifest', default=False, - help='Try to download the DASH manifest on YouTube videos (experimental)') - - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-2', '--twofactor', - dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - authentication.add_option('--video-password', - dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)') - - - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='all') - video_format.add_option('--prefer-free-formats', - action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - video_format.add_option('-F', '--list-formats', - action='store_true', dest='listformats', help='list all available formats') - - subtitles.add_option('--write-sub', '--write-srt', - action='store_true', dest='writesubtitles', - help='write subtitle file', default=False) - subtitles.add_option('--write-auto-sub', '--write-automatic-sub', - action='store_true', dest='writeautomaticsub', - help='write automatic subtitle file (youtube only)', default=False) - subtitles.add_option('--all-subs', - action='store_true', dest='allsubtitles', - help='downloads all the available subtitles of the video', default=False) - subtitles.add_option('--list-subs', - action='store_true', dest='listsubtitles', - help='lists all available subtitles for the video', default=False) - subtitles.add_option('--sub-format', - action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') - subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', - action='callback', dest='subtitleslangs', metavar='LANGS', type='str', - default=[], callback=_comma_separated_values_options_callback, - help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') - - downloader.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') - downloader.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) - downloader.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") - downloader.add_option('--no-resize-buffer', - action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) - downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) - - workarounds.add_option( - '--encoding', dest='encoding', metavar='ENCODING', - help='Force the specified encoding (experimental)') - workarounds.add_option( - '--no-check-certificate', action='store_true', - dest='no_check_certificate', default=False, - help='Suppress HTTPS certificate validation.') - workarounds.add_option( - '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', - help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') - workarounds.add_option( - '--user-agent', metavar='UA', - dest='user_agent', help='specify a custom user agent') - workarounds.add_option( - '--referer', metavar='REF', - dest='referer', default=None, - help='specify a custom referer, use if the video access is restricted to one domain', - ) - workarounds.add_option( - '--add-header', metavar='FIELD:VALUE', - dest='headers', action='append', - help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', - ) - workarounds.add_option( - '--bidi-workaround', dest='bidi_workaround', action='store_true', - help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') - - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option( - '--no-warnings', - dest='no_warnings', action='store_true', default=False, - help='Ignore warnings') - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) - verbosity.add_option('--skip-download', - action='store_true', dest='skip_download', help='do not download the video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-id', - action='store_true', dest='getid', help='simulate, quiet but print id', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-duration', - action='store_true', dest='getduration', - help='simulate, quiet but print video length', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--get-format', - action='store_true', dest='getformat', - help='simulate, quiet but print output format', default=False) - verbosity.add_option('-j', '--dump-json', - action='store_true', dest='dumpjson', - help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False) - verbosity.add_option('--newline', - action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - verbosity.add_option('-v', '--verbose', - action='store_true', dest='verbose', help='print various debugging information', default=False) - verbosity.add_option('--dump-intermediate-pages', - action='store_true', dest='dump_intermediate_pages', default=False, - help='print downloaded pages to debug problems (very verbose)') - verbosity.add_option('--write-pages', - action='store_true', dest='write_pages', default=False, - help='Write downloaded intermediary pages to files in the current directory to debug problems') - verbosity.add_option('--youtube-print-sig-code', - action='store_true', dest='youtube_print_sig_code', default=False, - help=optparse.SUPPRESS_HELP) - verbosity.add_option('--print-traffic', - dest='debug_printtraffic', action='store_true', default=False, - help='Display sent and read HTTP traffic') - - - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('--id', - action='store_true', dest='useid', help='use only video ID in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', - help=('output filename template. Use %(title)s to get the title, ' - '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' - '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, ' - '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' - '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' - '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' - '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' - '%(height)s and %(width)s for the width and height of the video format. ' - '%(resolution)s for a textual description of the resolution of the video format. ' - 'Use - to output to stdout. Can also be used to download to a different directory, ' - 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) - filesystem.add_option('--autonumber-size', - dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') - filesystem.add_option('--restrict-filenames', - action='store_true', dest='restrictfilenames', - help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True) - filesystem.add_option('--no-continue', - action='store_false', dest='continue_dl', - help='do not resume partially downloaded files (restart from beginning)') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - filesystem.add_option('--write-description', - action='store_true', dest='writedescription', - help='write video description to a .description file', default=False) - filesystem.add_option('--write-info-json', - action='store_true', dest='writeinfojson', - help='write video metadata to a .info.json file', default=False) - filesystem.add_option('--write-annotations', - action='store_true', dest='writeannotations', - help='write video annotations to a .annotation file', default=False) - filesystem.add_option('--write-thumbnail', - action='store_true', dest='writethumbnail', - help='write thumbnail image to disk', default=False) - filesystem.add_option('--load-info', - dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') - filesystem.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') - filesystem.add_option( - '--no-cache-dir', action='store_const', const=None, dest='cachedir', - help='Disable filesystem caching') - filesystem.add_option( - '--rm-cache-dir', action='store_true', dest='rm_cachedir', - help='Delete all filesystem cache files') - - - postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default') - postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') - postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') - postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, - help='keeps the video file on disk after the post-processing; the video is erased by default') - postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, - help='do not overwrite post-processed files; the post-processed files are overwritten by default') - postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='embed subtitles in the video (only for mp4 videos)') - postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, - help='embed thumbnail in the audio as cover art') - postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, - help='write metadata to the video file') - postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, - help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') - postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg', - help='Prefer avconv over ffmpeg for running the postprocessors (default)') - postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg', - help='Prefer ffmpeg over avconv for running the postprocessors') - postproc.add_option( - '--exec', metavar='CMD', dest='exec_cmd', - help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' ) - - parser.add_option_group(general) - parser.add_option_group(selection) - parser.add_option_group(downloader) - parser.add_option_group(filesystem) - parser.add_option_group(verbosity) - parser.add_option_group(workarounds) - parser.add_option_group(video_format) - parser.add_option_group(subtitles) - parser.add_option_group(authentication) - parser.add_option_group(postproc) - - if overrideArguments is not None: - opts, args = parser.parse_args(overrideArguments) - if opts.verbose: - write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') - else: - commandLineConf = sys.argv[1:] - if '--ignore-config' in commandLineConf: - systemConf = [] - userConf = [] - else: - systemConf = _readOptions('/etc/youtube-dl.conf') - if '--ignore-config' in systemConf: - userConf = [] - else: - userConf = _readUserConf() - argv = systemConf + userConf + commandLineConf - - opts, args = parser.parse_args(argv) - if opts.verbose: - write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') - write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') - write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') - - return parser, opts, args - - def _real_main(argv=None): # Compatibility fixes for Windows if sys.platform == 'win32': @@ -872,20 +401,7 @@ def _real_main(argv=None): # Remove cache dir if opts.rm_cachedir: - if opts.cachedir is None: - ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') - else: - if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): - ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') - retcode = 141 - else: - ydl.to_screen( - u'Removing cache dir %s .' % opts.cachedir, - skip_eol=True) - if os.path.exists(opts.cachedir): - ydl.to_screen(u'.', skip_eol=True) - shutil.rmtree(opts.cachedir) - ydl.to_screen(u'.') + ydl.cache.remove() # Maybe do nothing if (len(all_urls) < 1) and (opts.load_info_filename is None): diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py new file mode 100644 index 000000000..79ff09f78 --- /dev/null +++ b/youtube_dl/cache.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +import errno +import io +import json +import os +import re +import shutil +import traceback + +from .utils import ( + write_json_file, +) + + +class Cache(object): + def __init__(self, ydl): + self._ydl = ydl + + def _get_root_dir(self): + res = self._ydl.params.get('cachedir') + if res is None: + cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache') + res = os.path.join(cache_root, 'youtube-dl') + return os.path.expanduser(res) + + def _get_cache_fn(self, section, key, dtype): + assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ + 'invalid section %r' % section + assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key + return os.path.join( + self._get_root_dir(), section, '%s.%s' % (key, dtype)) + + @property + def enabled(self): + return self._ydl.params.get('cachedir') is not False + + def store(self, section, key, data, dtype='json'): + assert dtype in ('json',) + + if not self.enabled: + return + + fn = self._get_cache_fn(section, key, dtype) + try: + try: + os.makedirs(os.path.dirname(fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + write_json_file(data, fn) + except Exception: + tb = traceback.format_exc() + self._ydl.report_warning( + 'Writing cache to %r failed: %s' % (fn, tb)) + + def load(self, section, key, dtype='json', default=None): + assert dtype in ('json',) + + if not self.enabled: + return default + + cache_fn = self._get_cache_fn(section, key, dtype) + try: + try: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + return json.load(cachef) + except ValueError: + try: + file_size = os.path.getsize(cache_fn) + except (OSError, IOError) as oe: + file_size = str(oe) + self._ydl.report_warning( + 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) + except IOError: + pass # No cache available + + return default + + def remove(self): + if not self.enabled: + self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') + return + + cachedir = self._get_root_dir() + if not any((term in cachedir) for term in ('cache', 'tmp')): + raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + + self._ydl.to_screen( + 'Removing cache dir %s .' % cachedir, skip_eol=True) + if os.path.exists(cachedir): + self._ydl.to_screen('.', skip_eol=True) + shutil.rmtree(cachedir) + self._ydl.to_screen('.') diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 9f29e2f81..32852f333 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -3,6 +3,7 @@ import subprocess from .common import FileDownloader from ..utils import ( + check_executable, encodeFilename, ) @@ -19,13 +20,11 @@ class HlsFD(FileDownloader): encodeFilename(tmpfilename, for_subprocess=True)] for program in ['avconv', 'ffmpeg']: - try: - subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + if check_executable(program, ['-version']): break - except (OSError, IOError): - pass else: self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + return False cmd = [program] + args retval = subprocess.call(cmd) @@ -42,5 +41,5 @@ class HlsFD(FileDownloader): return True else: self.to_stderr(u"\n") - self.report_error(u'ffmpeg exited with code %d' % retval) + self.report_error(u'%s exited with code %d' % (program, retval)) return False diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index d01d1897e..6caf7451e 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -193,7 +193,8 @@ class HttpFD(FileDownloader): self.to_stderr(u"\n") self.report_error(u'Did not get any data blocks') return False - stream.close() + if tmpfilename != u'-': + stream.close() self.report_finish(data_len_str, (time.time() - start)) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 68646709a..5eb108302 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -8,9 +8,10 @@ import time from .common import FileDownloader from ..utils import ( + check_executable, + compat_str, encodeFilename, format_bytes, - compat_str, ) @@ -103,9 +104,7 @@ class RtmpFD(FileDownloader): test = self.params.get('test', False) # Check for rtmpdump first - try: - subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) - except (OSError, IOError): + if not check_executable('rtmpdump', ['-h']): self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f0736ee8..4b83d8d99 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -4,12 +4,13 @@ from .addanime import AddAnimeIE from .adultswim import AdultSwimIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE +from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE -from .ard import ARDIE +from .ard import ARDIE, ARDMediathekIE from .arte import ( ArteTvIE, ArteTVPlus7IE, @@ -23,6 +24,7 @@ from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE +from .beeg import BeegIE from .bilibili import BiliBiliIE from .blinkx import BlinkxIE from .bliptv import BlipTVIE, BlipTVUserIE @@ -44,6 +46,7 @@ from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE +from .cloudy import CloudyIE from .clubic import ClubicIE from .cmt import CMTIE from .cnet import CNETIE @@ -65,9 +68,12 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE +from .dbtv import DBTVIE +from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .drtuber import DrTuberIE from .drtv import DRTVIE from .dump import DumpIE from .defense import DefenseGouvFrIE @@ -84,8 +90,9 @@ from .ellentv import ( EllenTVClipsIE, ) from .elpais import ElPaisIE -from .empflix import EmpflixIE +from .empflix import EMPFlixIE from .engadget import EngadgetIE +from .eporner import EpornerIE from .escapist import EscapistIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE @@ -135,6 +142,8 @@ from .grooveshark import GroovesharkIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE +from .hornbunny import HornBunnyIE +from .hostingbulk import HostingBulkIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE @@ -195,6 +204,7 @@ from .mitele import MiTeleIE from .mixcloud import MixcloudIE from .mlb import MLBIE from .mpora import MporaIE +from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE from .mooshare import MooshareIE @@ -210,6 +220,7 @@ from .mtv import ( MTVIggyIE, ) from .musicplayon import MusicPlayOnIE +from .musicvault import MusicVaultIE from .muzu import MuzuTVIE from .myspace import MySpaceIE from .myspass import MySpassIE @@ -230,6 +241,7 @@ from .niconico import NiconicoIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE +from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE @@ -257,6 +269,8 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .pornoxo import PornoXOIE +from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .radiofrance import RadioFranceIE @@ -288,6 +302,7 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .servingsys import ServingSysIE from .shared import SharedIE +from .sharesix import ShareSixIE from .sina import SinaIE from .slideshare import SlideshareIE from .slutload import SlutloadIE @@ -313,13 +328,15 @@ from .southpark import ( ) from .space import SpaceIE from .spankwire import SpankwireIE -from .spiegel import SpiegelIE +from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE +from .sportdeutschland import SportDeutschlandIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .sunporno import SunPornoIE from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE @@ -332,6 +349,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .telemb import TeleMBIE from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE @@ -339,6 +357,7 @@ from .theplatform import ThePlatformIE from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE +from .tnaflix import TNAFlixIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE @@ -347,6 +366,7 @@ from .trutube import TruTubeIE from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE +from .turbo import TurboIE from .tutv import TutvIE from .tvigle import TvigleIE from .tvp import TvpIE @@ -364,6 +384,7 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import VevoIE +from .vgtv import VGTVIE from .vh1 import VH1IE from .viddler import ViddlerIE from .videobam import VideoBamIE @@ -391,6 +412,7 @@ from .vine import ( from .viki import VikiIE from .vk import VKIE from .vodlocker import VodlockerIE +from .vporn import VpornIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py index 59d3bbba4..c983ef0f5 100644 --- a/youtube_dl/extractor/academicearth.py +++ b/youtube_dl/extractor/academicearth.py @@ -7,6 +7,15 @@ from .common import InfoExtractor class AcademicEarthCourseIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' IE_NAME = 'AcademicEarth:Course' + _TEST = { + 'url': 'http://academicearth.org/playlists/laws-of-nature/', + 'info_dict': { + 'id': 'laws-of-nature', + 'title': 'Laws of Nature', + 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', + }, + 'playlist_count': 4, + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index a00bfcb35..b4b40f2d4 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -75,7 +75,9 @@ class AdultSwimIE(InfoExtractor): video_path = mobj.group('path') webpage = self._download_webpage(url, video_path) - episode_id = self._html_search_regex(r'', webpage, 'episode_id') + episode_id = self._html_search_regex( + r'', + webpage, 'episode_id') title = self._og_search_title(webpage) index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id @@ -97,7 +99,9 @@ class AdultSwimIE(InfoExtractor): duration = segment_el.attrib.get('duration') segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id - idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') + idoc = self._download_xml( + segment_url, segment_title, + 'Downloading segment information', 'Unable to download segment information') formats = [] file_els = idoc.findall('.//files/file') diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py new file mode 100644 index 000000000..bc64423a3 --- /dev/null +++ b/youtube_dl/extractor/anysex.py @@ -0,0 +1,61 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, +) + + +class AnySexIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P\d+)' + _TEST = { + 'url': 'http://anysex.com/156592/', + 'md5': '023e9fbb7f7987f5529a394c34ad3d3d', + 'info_dict': { + 'id': '156592', + 'ext': 'mp4', + 'title': 'Busty and sexy blondie in her bikini strips for you', + 'description': 'md5:de9e418178e2931c10b62966474e1383', + 'categories': ['Erotic'], + 'duration': 270, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') + + title = self._html_search_regex(r'(.*?)', webpage, 'title') + description = self._html_search_regex( + r'
([^<]+)
', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) + + categories = re.findall( + r'([^<]+)', webpage) + + duration = parse_duration(self._search_regex( + r'Duration: (\d+:\d+)', webpage, 'duration', fatal=False)) + view_count = int_or_none(self._html_search_regex( + r'Views: (\d+)', webpage, 'view count', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'duration': duration, + 'view_count': view_count, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index a7bfe5a5c..47f8e4157 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -21,7 +21,7 @@ class AolIE(InfoExtractor): (?:$|\?) ''' - _TEST = { + _TESTS = [{ 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { @@ -30,7 +30,14 @@ class AolIE(InfoExtractor): 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', }, 'add_ie': ['FiveMin'], - } + }, { + 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316', + 'info_dict': { + 'id': '152147', + 'title': 'Brace Yourself - Today\'s Weirdest News', + }, + 'playlist_mincount': 10, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 7f0da8ab6..12457f0f9 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -10,10 +10,15 @@ from ..utils import ( qualities, compat_urllib_parse_urlparse, compat_urllib_parse, + int_or_none, + parse_duration, + unified_strdate, + xpath_text, ) -class ARDIE(InfoExtractor): +class ARDMediathekIE(InfoExtractor): + IE_NAME = 'ARD:mediathek' _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ @@ -128,3 +133,61 @@ class ARDIE(InfoExtractor): 'formats': formats, 'thumbnail': thumbnail, } + + +class ARDIE(InfoExtractor): + _VALID_URL = '(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' + _TEST = { + 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', + 'md5': 'd216c3a86493f9322545e045ddc3eb35', + 'info_dict': { + 'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge', + 'id': '100', + 'ext': 'mp4', + 'duration': 2600, + 'title': 'Die Story im Ersten: Mission unter falscher Flagge', + 'upload_date': '20140804', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + player_url = mobj.group('mainurl') + '~playerXml.xml' + doc = self._download_xml(player_url, display_id) + video_node = doc.find('./video') + upload_date = unified_strdate(xpath_text( + video_node, './broadcastDate')) + thumbnail = xpath_text(video_node, './/teaserImage//variant/url') + + formats = [] + for a in video_node.findall('.//asset'): + f = { + 'format_id': a.attrib['type'], + 'width': int_or_none(a.find('./frameWidth').text), + 'height': int_or_none(a.find('./frameHeight').text), + 'vbr': int_or_none(a.find('./bitrateVideo').text), + 'abr': int_or_none(a.find('./bitrateAudio').text), + 'vcodec': a.find('./codecVideo').text, + 'tbr': int_or_none(a.find('./totalBitrate').text), + } + if a.find('./serverPrefix').text: + f['url'] = a.find('./serverPrefix').text + f['playpath'] = a.find('./fileName').text + else: + f['url'] = a.find('./fileName').text + formats.append(f) + self._sort_formats(formats) + + return { + 'id': mobj.group('id'), + 'formats': formats, + 'display_id': display_id, + 'title': video_node.find('./title').text, + 'duration': parse_duration(video_node.find('./duration').text), + 'upload_date': upload_date, + 'thumbnail': thumbnail, + } + diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 1c72b2ff6..957d35979 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor): def _extract_from_webpage(self, webpage, video_id, lang): json_url = self._html_search_regex( - r'arte_vp_url="(.*?)"', webpage, 'json vp url') + [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], + webpage, 'json vp url') return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index ccd31c4c7..de5d4faf3 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor): _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' # The maximum number we can get with each request _STEP = 50 + _TEST = { + 'url': 'http://bambuser.com/channel/pixelversity', + 'info_dict': { + 'title': 'pixelversity', + }, + 'playlist_mincount': 60, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor): req = compat_urllib_request.Request(req_url) # Without setting this header, we wouldn't get any result req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) - info_json = self._download_webpage(req, user, - 'Downloading page %d' % i) - results = json.loads(info_json)['result'] - if len(results) == 0: + data = self._download_json( + req, user, 'Downloading page %d' % i) + results = data['result'] + if not results: break last_id = results[-1]['vid'] urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index dcbbdef43..c569aa4d2 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+))' - _TEST = { + _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'playlist': [ { @@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor): 'playlistend': 2 }, 'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' - } + }, { + 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', + 'info_dict': { + 'title': 'Hierophany of the Open Grave', + }, + 'playlist_mincount': 9, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py new file mode 100644 index 000000000..314e37f8b --- /dev/null +++ b/youtube_dl/extractor/beeg.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BeegIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' + _TEST = { + 'url': 'http://beeg.com/5416503', + 'md5': '634526ae978711f6b748fe0dd6c11f57', + 'info_dict': { + 'id': '5416503', + 'ext': 'mp4', + 'title': 'Sultry Striptease', + 'description': 'md5:6db3c6177972822aaba18652ff59c773', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + quality_arr = self._search_regex( + r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats') + + formats = [{ + 'url': fmt[1], + 'format_id': fmt[0], + 'height': int(fmt[0][:-1]), + } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)] + + self._sort_formats(formats) + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*beeg\.?', webpage, 'title') + + description = self._html_search_regex( + r'(?P[0-9]+)x(?P[0-9]+)<\\/b> + \s*\(\s*(?P[0-9]+)\s*kb\\/s''', qd) + if m: + f['width'] = int(m.group('width')) + f['height'] = int(m.group('height')) + f['tbr'] = int(m.group('tbr')) + formats.append(f) + self._sort_formats(formats) + + thumbnail = self._search_regex( + r"var\s+mov_thumb\s*=\s*'([^']+)';", + webpage, 'thumbnail', fatal=False) + duration = int_or_none(self._search_regex( + r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False)) + return { 'id': video_id, 'title': video_title, 'formats': formats, + 'duration': duration, + 'age_limit': self._rta_search(webpage), + 'thumbnail': thumbnail, } diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py new file mode 100644 index 000000000..386f080d2 --- /dev/null +++ b/youtube_dl/extractor/cloudy.py @@ -0,0 +1,108 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_parse_qs, + compat_urllib_parse, + remove_end, + HEADRequest, + compat_HTTPError, +) + + +class CloudyIE(InfoExtractor): + _IE_DESC = 'cloudy.ec and videoraj.ch' + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?Pcloudy\.ec|videoraj\.ch)/ + (?:v/|embed\.php\?id=) + (?P[A-Za-z0-9]+) + ''' + _EMBED_URL = 'http://www.%s/embed.php?id=%s' + _API_URL = 'http://www.%s/api/player.api.php?%s' + _MAX_TRIES = 2 + _TESTS = [ + { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', + } + }, + { + 'url': 'http://www.videoraj.ch/v/47f399fd8bb60', + 'md5': '7d0f8799d91efd4eda26587421c3c3b0', + 'info_dict': { + 'id': '47f399fd8bb60', + 'ext': 'flv', + 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', + } + } + ] + + def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): + + if try_num > self._MAX_TRIES - 1: + raise ExtractorError('Unable to extract video URL', expected=True) + + form = { + 'file': video_id, + 'key': file_key, + } + + if error_url: + form.update({ + 'numOfErrors': try_num, + 'errorCode': '404', + 'errorUrl': error_url, + }) + + data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) + player_data = self._download_webpage( + data_url, video_id, 'Downloading player data') + data = compat_parse_qs(player_data) + + try_num += 1 + + if 'error' in data: + raise ExtractorError( + '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), + expected=True) + + title = data.get('title', [None])[0] + if title: + title = remove_end(title, '&asdasdas').strip() + + video_url = data.get('url', [None])[0] + + if video_url: + try: + self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: + self.report_warning('Invalid video URL, requesting another', video_id) + return self._extract_video(video_host, video_id, file_key, video_url, try_num) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_host = mobj.group('host') + video_id = mobj.group('id') + + url = self._EMBED_URL % (video_host, video_id) + webpage = self._download_webpage(url, video_id) + + file_key = self._search_regex( + r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + + return self._extract_video(video_host, video_id, file_key) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index c81ce5a96..035046120 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor): (?Pthedailyshow|thecolbertreport)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P.*)| (?P - (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) |(watch/(?P[^/]*)/(?P.*)) )| (?P extended-interviews/(?P[0-9a-z]+)/(?:playlist_tds_extended_)?(?P.*?)(/.*?)?))) (?:[?#].*|$)''' - _TEST = { + _TESTS = [{ 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'info_dict': { @@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor): 'uploader': 'thedailyshow', 'title': 'thedailyshow kristen-stewart part 1', } - } + }, { + 'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', + 'only_matching': True, + }] _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor): 'ext': self._video_extensions.get(format, 'mp4'), 'height': h, 'width': w, + + 'format_note': 'HTTP 400 at the moment (patches welcome!)', + 'preference': -100, }) formats.append({ 'format_id': 'rtmp-%s' % format, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69d5f687c..929dd1e97 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import base64 import hashlib import json @@ -114,7 +116,7 @@ class InfoExtractor(object): upload_date: Video upload date (YYYYMMDD). If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. - location: Physical location of the video. + location: Physical location where the video was filmed. subtitles: The subtitle file contents as a dictionary in the format {language: subtitles}. duration: Length of the video in seconds, as an integer. @@ -202,17 +204,17 @@ class InfoExtractor(object): self.report_download_webpage(video_id) elif note is not False: if video_id is None: - self.to_screen(u'%s' % (note,)) + self.to_screen('%s' % (note,)) else: - self.to_screen(u'%s: %s' % (video_id, note)) + self.to_screen('%s: %s' % (video_id, note)) try: return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if errnote is False: return False if errnote is None: - errnote = u'Unable to download webpage' - errmsg = u'%s: %s' % (errnote, compat_str(err)) + errnote = 'Unable to download webpage' + errmsg = '%s: %s' % (errnote, compat_str(err)) if fatal: raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) else: @@ -249,7 +251,7 @@ class InfoExtractor(object): url = url_or_request.get_full_url() except AttributeError: url = url_or_request - self.to_screen(u'Dumping request to ' + url) + self.to_screen('Dumping request to ' + url) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self._downloader.params.get('write_pages', False): @@ -259,11 +261,11 @@ class InfoExtractor(object): url = url_or_request basen = '%s_%s' % (video_id, url) if len(basen) > 240: - h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() + h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h raw_filename = basen + '.dump' filename = sanitize_filename(raw_filename, restricted=True) - self.to_screen(u'Saving request to ' + filename) + self.to_screen('Saving request to ' + filename) with open(filename, 'wb') as outf: outf.write(webpage_bytes) @@ -272,14 +274,14 @@ class InfoExtractor(object): except LookupError: content = webpage_bytes.decode('utf-8', 'replace') - if (u'Access to this site is blocked' in content and - u'Websense' in content[:512]): - msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' + if ('Access to this site is blocked' in content and + 'Websense' in content[:512]): + msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' blocked_iframe = self._html_search_regex( r'