From: Sergey M․ Date: Wed, 27 May 2015 17:22:19 +0000 (+0600) Subject: Merge branch 'akirk-nowtv' X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=d9446c73199d917712e1555fbf662c7542510d0e;hp=b25b645d5106e5b2bf33c640813fe744b63f4730;p=youtube-dl.git Merge branch 'akirk-nowtv' --- diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 891ee620b..c4e3adb67 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['no'])) - self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') + self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') class TestRaiSubtitles(BaseTestSubtitles): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 17248ccea..3c1e8d526 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,7 +149,6 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE -from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE @@ -480,7 +479,6 @@ from .smotri import ( SmotriBroadcastIE, ) from .snotr import SnotrIE -from .sockshare import SockshareIE from .sohu import SohuIE from .soundcloud import ( SoundcloudIE, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 7ca835e31..2103ed73a 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import re import itertools +import json +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..utils import ( @@ -67,11 +69,19 @@ class BiliBiliIE(InfoExtractor): entries = [] - lq_doc = self._download_xml( + lq_page = self._download_webpage( 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, video_id, note='Downloading LQ video info' ) + try: + err_info = json.loads(lq_page) + raise ExtractorError( + 'BiliBili said: ' + err_info['error_text'], expected=True) + except ValueError: + pass + + lq_doc = ET.fromstring(lq_page) lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( @@ -80,9 +90,11 @@ class BiliBiliIE(InfoExtractor): note='Downloading HQ video info', fatal=False, ) - hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) - - assert len(lq_durls) == len(hq_durls) + if hq_doc is not False: + hq_durls = hq_doc.findall('./durl') + assert len(lq_durls) == len(hq_durls) + else: + hq_durls = itertools.repeat(None) i = 1 for lq_durl, hq_durl in zip(lq_durls, hq_durls): diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index cf0a7551b..c949a4814 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor): 'uploader_id': 'Cinemassacre', 'title': 'AVGN: McKids', } + }, + { + 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', + 'md5': '1376908e49572389e7b06251a53cdd08', + 'info_dict': { + 'id': 'Cinemassacre-555779690c440', + 'ext': 'mp4', + 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', + 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', + 'upload_date': '20150525', + } } ] @@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor): playerdata_url = self._search_regex( [ - r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', r']+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', ], webpage, 'player data URL', default=None) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index db10b8d00..70aa4333c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)$' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', @@ -239,7 +239,8 @@ class DailymotionUserIE(DailymotionPlaylistIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user = mobj.group('user') - webpage = self._download_webpage(url, user) + webpage = self._download_webpage( + 'https://www.dailymotion.com/user/%s' % user, user) full_user = unescapeHTML(self._html_search_regex( r'' % re.escape(user), webpage, 'user')) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 937b28fcc..82dc27bc6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor): 'id': '274175099429670', 'ext': 'mp4', 'title': 'Facebook video #274175099429670', - } + }, + 'expected_warnings': [ + 'title' + ] }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -149,12 +152,12 @@ class FacebookIE(InfoExtractor): raise ExtractorError('Cannot find video formats') video_title = self._html_search_regex( - r'

([^<]*)

', webpage, 'title', - fatal=False) + r']*class="uiHeaderTitle"[^>]*>([^<]*)', webpage, 'title', + default=None) if not video_title: video_title = self._html_search_regex( r'(?s)(.*?)', - webpage, 'alternative title', default=None) + webpage, 'alternative title', fatal=False) video_title = limit_length(video_title, 80) if not video_title: video_title = 'Facebook video #%s' % video_id diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py deleted file mode 100644 index 3191116d9..000000000 --- a/youtube_dl/extractor/firedrive.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) -from ..utils import ( - ExtractorError, -) - - -class FiredriveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ - '(?:file|embed)/(?P[0-9a-zA-Z]+)' - _FILE_DELETED_REGEX = r'
' - - _TESTS = [{ - 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', - 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', - 'info_dict': { - 'id': 'FEB892FA160EBD01', - 'ext': 'flv', - 'title': 'bbb_theora_486kbit.flv', - 'thumbnail': 're:^http://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://firedrive.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) - - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - fields = dict(re.findall(r'''(?x)(.+)
', - webpage, 'title') - thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, - 'thumbnail', fatal=False) - if thumbnail is not None: - thumbnail = 'http:' + thumbnail - - ext = self._search_regex(r'type:\s?\'([^\']+)\',', - webpage, 'extension', fatal=False) - video_url = self._search_regex( - r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url') - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'ext': ext, - }] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c10405f04..925967753 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -16,7 +17,7 @@ from ..utils import ( class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', 'info_dict': { 'id': '81652', @@ -25,7 +26,18 @@ class NaverIE(InfoExtractor): 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', 'upload_date': '20130903', }, - } + }, { + 'url': 'http://tvcast.naver.com/v/395837', + 'md5': '638ed4c12012c458fefcddfd01f173cd', + 'info_dict': { + 'id': '395837', + 'ext': 'mp4', + 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', + 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', + 'upload_date': '20150519', + }, + 'skip': 'Georestricted', + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +47,7 @@ class NaverIE(InfoExtractor): webpage) if m_id is None: m_error = re.search( - r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', + r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', webpage) if m_error: raise ExtractorError(clean_html(m_error.group('msg')), expected=True) @@ -58,14 +70,18 @@ class NaverIE(InfoExtractor): formats = [] for format_el in urls.findall('EncodingOptions/EncodingOption'): domain = format_el.find('Domain').text + uri = format_el.find('uri').text f = { - 'url': domain + format_el.find('uri').text, + 'url': compat_urlparse.urljoin(domain, uri), 'ext': 'mp4', 'width': int(format_el.find('width').text), 'height': int(format_el.find('height').text), } if domain.startswith('rtmp'): + # urlparse does not support custom schemes + # https://bugs.python.org/issue18828 f.update({ + 'url': domain + uri, 'ext': 'flv', 'rtmp_protocol': '1', # rtmpt }) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0c8b731cf..daa284ea2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -71,7 +71,8 @@ class PornHubIE(InfoExtractor): video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) + password = compat_urllib_parse.unquote_plus( + self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) formats = [] diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py deleted file mode 100644 index b5fa6f1da..000000000 --- a/youtube_dl/extractor/sockshare.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) -from ..utils import ( - determine_ext, - ExtractorError, -) - -from .common import InfoExtractor - - -class SockshareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P[0-9A-Za-z]+)' - _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.' - _TEST = { - 'url': 'http://www.sockshare.com/file/437BE28B89D799D7', - 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', - 'info_dict': { - 'id': '437BE28B89D799D7', - 'title': 'big_buck_bunny_720p_surround.avi', - 'ext': 'avi', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://sockshare.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) - - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - confirm_hash = self._html_search_regex(r'''(?x)(.+)', - r'var name = "([^"]+)";'), - webpage, 'title', default=None) - thumbnail = self._html_search_regex( - r'\d+)(?:-\d+)?\.html' - _TESTS = { + _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P\d+)(?:-\d+)?\.html' + _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { 'id': '10635995', @@ -32,7 +32,10 @@ class TF1IE(InfoExtractor): # Sometimes wat serves the whole file with the --test option 'skip_download': True, }, - } + }, { + 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url)