X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdailymotion.py;h=1a41c0db181c43c9c35d24988b9fd118c248f399;hb=3f724339dbe61fe84dd8e66e9c3b74ba6a9c6ddf;hp=dbcf5d6a72a5a8f44e988b506e2361e6279229ec;hpb=c59c3c84ede823e5c97f695ae904545c615e4ded;p=youtube-dl.git diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index dbcf5d6a7..1a41c0db1 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,28 +6,30 @@ import json import itertools from .common import InfoExtractor -from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - compat_urllib_request, +from ..compat import ( compat_str, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + int_or_none, orderedSet, str_to_int, - int_or_none, - ExtractorError, unescapeHTML, ) + class DailymotionBaseInfoExtractor(InfoExtractor): @staticmethod def _build_request(url): """Build a request with the family filter disabled""" request = compat_urllib_request.Request(url) - request.add_header('Cookie', 'family_filter=off') - request.add_header('Cookie', 'ff=off') + request.add_header('Cookie', 'family_filter=off; ff=off') return request -class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): + +class DailymotionIE(DailymotionBaseInfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P[^/?_]+)' @@ -43,13 +45,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): _TESTS = [ { - 'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', - 'md5': '392c4b85a60a90dc4792da41ce3144eb', + 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', + 'md5': '2137c41a8e78554bb09225b8eb322406', 'info_dict': { - 'id': 'x33vw9', + 'id': 'x2iuewm', 'ext': 'mp4', - 'uploader': 'Amphora Alex and Van .', - 'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"', + 'uploader': 'IGN', + 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', + 'upload_date': '20150306', + 'duration': 74, } }, # Vevo video @@ -83,7 +87,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'http://www.dailymotion.com/video/%s' % video_id + url = 'https://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information request = self._build_request(url) @@ -94,7 +98,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # It may just embed a vevo video: m_vevo = re.search( - r'[\w]*)', + r'[\w]*)', webpage) if m_vevo is not None: vevo_id = m_vevo.group('id') @@ -104,15 +108,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): age_limit = self._rta_search(webpage) video_upload_date = None - mobj = re.search(r'
([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) + mobj = re.search(r'', webpage) if mobj is not None: - video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) - embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id - embed_page = self._download_webpage(embed_url, video_id, - 'Downloading embed page') + embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id + embed_request = self._build_request(embed_url) + embed_page = self._download_webpage( + embed_request, video_id, 'Downloading embed page') info = self._search_regex(r'var info = ({.*?}),$', embed_page, - 'video info', flags=re.MULTILINE) + 'video info', flags=re.MULTILINE) info = json.loads(info) if info.get('error') is not None: msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] @@ -139,9 +144,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # subtitles video_subtitles = self.extract_subtitles(video_id, webpage) - if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id, webpage) - return view_count = str_to_int(self._search_regex( r'video_views_count[^>]+>\s+([\d\.,]+)', @@ -163,9 +165,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'thumbnail': info['thumbnail_url'], 'age_limit': age_limit, 'view_count': view_count, + 'duration': info['duration'] } - def _get_available_subtitles(self, video_id, webpage): + def _get_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, @@ -175,7 +178,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): return {} info = json.loads(sub_list) if (info['total'] > 0): - sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) return sub_lang_list self._downloader.report_warning('video doesn\'t have subtitles') return {} @@ -190,6 +193,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', 'info_dict': { 'title': 'SPORT', + 'id': 'xv4bw_nqtv_sport', }, 'playlist_mincount': 20, }] @@ -206,7 +210,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: break return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') - for video_id in orderedSet(video_ids)] + for video_id in orderedSet(video_ids)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -223,7 +227,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)$' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', @@ -237,7 +241,8 @@ class DailymotionUserIE(DailymotionPlaylistIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user = mobj.group('user') - webpage = self._download_webpage(url, user) + webpage = self._download_webpage( + 'https://www.dailymotion.com/user/%s' % user, user) full_user = unescapeHTML(self._html_search_regex( r'' % re.escape(user), webpage, 'user')) @@ -248,3 +253,53 @@ class DailymotionUserIE(DailymotionPlaylistIE): 'title': full_user, 'entries': self._extract_entries(user), } + + +class DailymotionCloudIE(DailymotionBaseInfoExtractor): + _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/' + _VALID_URL = r'%s[^/]+/(?P[^/?]+)' % _VALID_URL_PREFIX + _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX + + _TESTS = [{ + # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html + # Tested at FranceTvInfo_2 + 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', + 'only_matching': True, + }, { + # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html + 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', + 'only_matching': True, + }] + + @classmethod + def _extract_dmcloud_url(self, webpage): + mobj = re.search(r']+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage) + if mobj: + return mobj.group(1) + + mobj = re.search( + r']+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, + webpage) + if mobj: + return mobj.group(1) + + def _real_extract(self, url): + video_id = self._match_id(url) + + request = self._build_request(url) + webpage = self._download_webpage(request, video_id) + + title = self._html_search_regex(r'([^>]+)', webpage, 'title') + + video_info = self._parse_json(self._search_regex( + r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) + + # TODO: parse ios_url, which is in fact a manifest + video_url = video_info['mp4_url'] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': video_info.get('thumbnail_url'), + }