From: Sergey M․ Date: Sun, 22 Mar 2015 01:44:28 +0000 (+0600) Subject: Merge branch 'the-daily-show-podcast' of https://github.com/fstirlitz/youtube-dl... X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=ef249a2cd7a7cfbd92a030cb72e238ba4ad52604;hp=9ef4f12b534578ae3d3e47815492c90826c03c36;p=youtube-dl.git Merge branch 'the-daily-show-podcast' of https://github.com/fstirlitz/youtube-dl into fstirlitz-the-daily-show-podcast --- diff --git a/AUTHORS b/AUTHORS index 872da6071..512469f4c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -116,3 +116,4 @@ Duncan Keall Alexander Mamay Devin J. Pohly Eduardo Ferro Aldama +Jeff Buchbinder diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 000000000..ff66449eb --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +import codecs +import subprocess + +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import intlist_to_bytes +from youtube_dl.aes import aes_encrypt, key_expansion + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text') +print(repr(r)) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d6a1e67c6..72b365305 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -112,6 +112,7 @@ - **Discovery** - **divxstage**: DivxStage - **Dotsub** + - **DouyuTV** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -342,6 +343,7 @@ - **PornHubPlaylist** - **Pornotube** - **PornoXO** + - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital - **Puls4** @@ -367,6 +369,7 @@ - **RTP** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta + - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **RUHD** - **rutube**: Rutube videos @@ -487,6 +490,7 @@ - **Ubu** - **udemy** - **udemy:course** + - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt - **ustream** diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index db8a47d2d..652519831 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor +from youtube_dl.utils import match_filter_func TEST_URL = 'http://localhost/sample.mp4' @@ -339,6 +340,8 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') + +class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): def s_formats(lang, autocaption=False): return [{ @@ -461,6 +464,73 @@ class TestFormatSelection(unittest.TestCase): self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(audiofile) + def test_match_filter(self): + class FilterYDL(YDL): + def __init__(self, *args, **kwargs): + super(FilterYDL, self).__init__(*args, **kwargs) + self.params['simulate'] = True + + def process_info(self, info_dict): + super(YDL, self).process_info(info_dict) + + def _match_entry(self, info_dict, incomplete): + res = super(FilterYDL, self)._match_entry(info_dict, incomplete) + if res is None: + self.downloaded_info_dicts.append(info_dict) + return res + + first = { + 'id': '1', + 'url': TEST_URL, + 'title': 'one', + 'extractor': 'TEST', + 'duration': 30, + 'filesize': 10 * 1024, + } + second = { + 'id': '2', + 'url': TEST_URL, + 'title': 'two', + 'extractor': 'TEST', + 'duration': 10, + 'description': 'foo', + 'filesize': 5 * 1024, + } + videos = [first, second] + + def get_videos(filter_=None): + ydl = FilterYDL({'match_filter': filter_}) + for v in videos: + ydl.process_ie_result(v, download=True) + return [v['id'] for v in ydl.downloaded_info_dicts] + + res = get_videos() + self.assertEqual(res, ['1', '2']) + + def f(v): + if v['id'] == '1': + return None + else: + return 'Video id is not 1' + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('duration < 30') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description = foo') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description =? foo') + res = get_videos(f) + self.assertEqual(res, ['1', '2']) + + f = match_filter_func('filesize > 5KiB') + res = get_videos(f) + self.assertEqual(res, ['1']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py new file mode 100644 index 000000000..111b902e1 --- /dev/null +++ b/test/test_aes.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text +from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes +import base64 + +# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' + + +class TestAES(unittest.TestCase): + def setUp(self): + self.key = self.iv = [0x20, 0x15] + 14 * [0] + self.secret_msg = b'Secret message goes here' + + def test_encrypt(self): + msg = b'message' + key = list(range(16)) + encrypted = aes_encrypt(bytes_to_intlist(msg), key) + decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + self.assertEqual(decrypted, msg) + + def test_cbc_decrypt(self): + data = bytes_to_intlist( + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd" + ) + decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_decrypt_text(self): + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + ) + decrypted = (aes_decrypt_text(encrypted, password, 16)) + self.assertEqual(decrypted, self.secret_msg) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_http.py b/test/test_http.py index bd4d46fef..f2e305b6f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,7 +8,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server +from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl import threading @@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase): r = ydl.extract_info('https://localhost:%d/video.html' % self.port) self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + +def _build_proxy_handler(name): + class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + proxy_name = name + + def log_message(self, format, *args): + pass + + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain; charset=utf-8') + self.end_headers() + self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) + return HTTPTestRequestHandler + + +class TestProxy(unittest.TestCase): + def setUp(self): + self.proxy = compat_http_server.HTTPServer( + ('localhost', 0), _build_proxy_handler('normal')) + self.port = self.proxy.socket.getsockname()[1] + self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) + self.proxy_thread.daemon = True + self.proxy_thread.start() + + self.cn_proxy = compat_http_server.HTTPServer( + ('localhost', 0), _build_proxy_handler('cn')) + self.cn_port = self.cn_proxy.socket.getsockname()[1] + self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) + self.cn_proxy_thread.daemon = True + self.cn_proxy_thread.start() + + def test_proxy(self): + cn_proxy = 'localhost:{0}'.format(self.cn_port) + ydl = YoutubeDL({ + 'proxy': 'localhost:{0}'.format(self.port), + 'cn_verification_proxy': cn_proxy, + }) + url = 'http://foo.com/bar' + response = ydl.urlopen(url).read().decode('utf-8') + self.assertEqual(response, 'normal: {0}'.format(url)) + + req = compat_urllib_request.Request(url) + req.add_header('Ytdl-request-proxy', cn_proxy) + response = ydl.urlopen(req).read().decode('utf-8') + self.assertEqual(response, 'cn: {0}'.format(url)) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 8f790bf0a..a8ab87685 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -24,6 +24,7 @@ from youtube_dl.utils import ( encodeFilename, escape_rfc3986, escape_url, + ExtractorError, find_xpath_attr, fix_xml_ampersands, InAdvancePagedList, @@ -39,6 +40,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + sanitize_url_path_consecutive_slashes, shell_quote, smuggle_url, str_to_int, @@ -53,6 +55,7 @@ from youtube_dl.utils import ( urlencode_postdata, version_tuple, xpath_with_ns, + xpath_text, render_table, match_str, ) @@ -168,6 +171,26 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_url_path_consecutive_slashes(self): + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname//'), + 'http://hostname/') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/'), + 'http://hostname/') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/abc//'), + 'http://hostname/abc/') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) @@ -229,6 +252,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') + def test_xpath_text(self): + testxml = ''' +
+

Foo

+
+
''' + doc = xml.etree.ElementTree.fromstring(testxml) + self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') + self.assertTrue(xpath_text(doc, 'div/bar') is None) + self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) + def test_smuggle_url(self): data = {"ö": "ö", "abc": [3]} url = 'https://foo.bar/baz?x=y#a' diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5a83bc956..b5ef5e009 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -328,9 +328,6 @@ class YoutubeDL(object): 'Parameter outtmpl is bytes, but should be a unicode string. ' 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') - if '%(stitle)s' in self.params.get('outtmpl', ''): - self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') - self._setup_opener() if auto_init: @@ -1218,9 +1215,6 @@ class YoutubeDL(object): if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + '...' - # Keep for backwards compatibility - info_dict['stitle'] = info_dict['title'] - if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e94779d40..a20492fc3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -107,6 +107,7 @@ from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE +from .douyutv import DouyuTVIE from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE @@ -346,6 +347,7 @@ from .npo import ( ) from .nrk import ( NRKIE, + NRKPlaylistIE, NRKTVIE, ) from .ntvde import NTVDeIE @@ -381,6 +383,7 @@ from .pornhub import ( ) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE +from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE @@ -537,6 +540,7 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) +from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 929dd3cc5..8273bd6c9 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor): formats.append(format) + self._check_formats(formats, video_id) self._sort_formats(formats) info_dict['formats'] = formats diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e64b88fbc..6ded723c9 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -23,7 +23,6 @@ from ..utils import ( ) from ..aes import ( aes_cbc_decrypt, - inc, ) @@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor): key = obfuscate_key(id) - class Counter: - __value = iv - - def next_value(self): - temp = self.__value - self.__value = inc(self.__value) - return temp decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) return zlib.decompress(decrypted_data) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py new file mode 100644 index 000000000..d7956e6e4 --- /dev/null +++ b/youtube_dl/extractor/douyutv.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class DouyuTVIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P[A-Za-z0-9]+)' + _TEST = { + 'url': 'http://www.douyutv.com/iseven', + 'info_dict': { + 'id': 'iseven', + 'ext': 'flv', + 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:9e525642c25a0a24302869937cf69d17', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': '7师傅', + 'uploader_id': '431925', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + config = self._download_json( + 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id) + + data = config['data'] + + error_code = config.get('error', 0) + show_status = data.get('show_status') + if error_code is not 0: + raise ExtractorError( + 'Server reported error %i' % error_code, expected=True) + + # 1 = live, 2 = offline + if show_status == '2': + raise ExtractorError( + 'Live stream is offline', expected=True) + + base_url = data['rtmp_url'] + live_path = data['rtmp_live'] + + title = self._live_title(data['room_name']) + description = data.get('show_details') + thumbnail = data.get('room_src') + + uploader = data.get('nickname') + uploader_id = data.get('owner_uid') + + multi_formats = data.get('rtmp_multi_bitrate') + if not isinstance(multi_formats, dict): + multi_formats = {} + multi_formats['live'] = live_path + + formats = [{ + 'url': '%s/%s' % (base_url, format_path), + 'format_id': format_id, + 'preference': 1 if format_id == 'live' else 0, + } for format_id, format_path in multi_formats.items()] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4e6927b08..8716e4503 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1006,6 +1006,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for NYTimes player + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or @@ -1268,10 +1275,16 @@ class GenericIE(InfoExtractor): # HTML5 video found = re.findall(r'(?s).*?]*)?\s+src=["\'](.*?)["\']', webpage) if not found: + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)]+' r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') title = self._html_search_regex( PREFIX + r'm-title="([^"]+)"', webpage, 'title') @@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor): r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) like_count = str_to_int(self._search_regex( - [r'([0-9]+)<'], + r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], webpage, 'play count', fatal=False)) - timestamp = parse_iso8601(self._search_regex( - r'