youtube_dl/extractor/dailymotion.py

   1 import re
   2 import json
   3 import socket
   4
   5 from .common import InfoExtractor
   6 from .subtitles import SubtitlesIE
   7
   8 from ..utils import (
   9     compat_http_client,
  10     compat_urllib_error,
  11     compat_urllib_request,
  12     compat_str,
  13     get_element_by_attribute,
  14     get_element_by_id,
  15
  16     ExtractorError,
  17 )
  18
  19
  20 class DailyMotionSubtitlesIE(SubtitlesIE):
  21
  22     def _get_available_subtitles(self, video_id):
  23         request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
  24         try:
  25             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
  26         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  27             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
  28             return {}
  29         info = json.loads(sub_list)
  30         if (info['total'] > 0):
  31             sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
  32             return sub_lang_list
  33         self._downloader.report_warning(u'video doesn\'t have subtitles')
  34         return {}
  35
  36     def _request_automatic_caption(self, video_id, webpage):
  37         self._downloader.report_warning(u'Automatic Captions not supported by this server')
  38         return {}
  39
  40
  41 class DailymotionIE(DailyMotionSubtitlesIE):
  42     """Information Extractor for Dailymotion"""
  43
  44     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
  45     IE_NAME = u'dailymotion'
  46     _TEST = {
  47         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
  48         u'file': u'x33vw9.mp4',
  49         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
  50         u'info_dict': {
  51             u"uploader": u"Alex and Van .",
  52             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
  53         }
  54     }
  55
  56     def _real_extract(self, url):
  57         # Extract id and simplified title from URL
  58         mobj = re.match(self._VALID_URL, url)
  59
  60         video_id = mobj.group(1).split('_')[0].split('?')[0]
  61
  62         video_extension = 'mp4'
  63
  64         # Retrieve video webpage to extract further information
  65         request = compat_urllib_request.Request(url)
  66         request.add_header('Cookie', 'family_filter=off')
  67         webpage = self._download_webpage(request, video_id)
  68
  69         # Extract URL, uploader and title from webpage
  70         self.report_extraction(video_id)
  71
  72         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
  73                                              # Looking for official user
  74                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
  75                                             webpage, 'video uploader')
  76
  77         video_upload_date = None
  78         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
  79         if mobj is not None:
  80             video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
  81
  82         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
  83         embed_page = self._download_webpage(embed_url, video_id,
  84                                             u'Downloading embed page')
  85         info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
  86         info = json.loads(info)
  87
  88         # TODO: support choosing qualities
  89
  90         for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
  91                     'stream_h264_hq_url', 'stream_h264_url',
  92                     'stream_h264_ld_url']:
  93             if info.get(key):  # key in info and info[key]:
  94                 max_quality = key
  95                 self.to_screen(u'%s: Using %s' % (video_id, key))
  96                 break
  97         else:
  98             raise ExtractorError(u'Unable to extract video URL')
  99         video_url = info[max_quality]
 100
 101         # subtitles
 102         video_subtitles = None
 103         video_webpage = None
 104
 105         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
 106             video_subtitles = self._extract_subtitles(video_id)
 107         elif self._downloader.params.get('writeautomaticsub', False):
 108             video_subtitles = self._request_automatic_caption(video_id, video_webpage)
 109
 110         if self._downloader.params.get('listsubtitles', False):
 111             self._list_available_subtitles(video_id)
 112             return
 113
 114         return [{
 115             'id':       video_id,
 116             'url':      video_url,
 117             'uploader': video_uploader,
 118             'upload_date':  video_upload_date,
 119             'title':    self._og_search_title(webpage),
 120             'ext':      video_extension,
 121             'subtitles':    video_subtitles,
 122             'thumbnail': info['thumbnail_url']
 123         }]