youtube_dl/extractor/cloudy.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import (
   8     compat_parse_qs,
   9     compat_HTTPError,
  10 )
  11 from ..utils import (
  12     ExtractorError,
  13     HEADRequest,
  14     remove_end,
  15 )
  16
  17
  18 class CloudyIE(InfoExtractor):
  19     _IE_DESC = 'cloudy.ec'
  20     _VALID_URL = r'''(?x)
  21         https?://(?:www\.)?cloudy\.ec/
  22         (?:v/|embed\.php\?id=)
  23         (?P<id>[A-Za-z0-9]+)
  24         '''
  25     _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
  26     _API_URL = 'http://www.cloudy.ec/api/player.api.php'
  27     _MAX_TRIES = 2
  28     _TEST = {
  29         'url': 'https://www.cloudy.ec/v/af511e2527aac',
  30         'md5': '5cb253ace826a42f35b4740539bedf07',
  31         'info_dict': {
  32             'id': 'af511e2527aac',
  33             'ext': 'flv',
  34             'title': 'Funny Cats and Animals Compilation june 2013',
  35         }
  36     }
  37
  38     def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
  39
  40         if try_num > self._MAX_TRIES - 1:
  41             raise ExtractorError('Unable to extract video URL', expected=True)
  42
  43         form = {
  44             'file': video_id,
  45             'key': file_key,
  46         }
  47
  48         if error_url:
  49             form.update({
  50                 'numOfErrors': try_num,
  51                 'errorCode': '404',
  52                 'errorUrl': error_url,
  53             })
  54
  55         player_data = self._download_webpage(
  56             self._API_URL, video_id, 'Downloading player data', query=form)
  57         data = compat_parse_qs(player_data)
  58
  59         try_num += 1
  60
  61         if 'error' in data:
  62             raise ExtractorError(
  63                 '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
  64                 expected=True)
  65
  66         title = data.get('title', [None])[0]
  67         if title:
  68             title = remove_end(title, '&asdasdas').strip()
  69
  70         video_url = data.get('url', [None])[0]
  71
  72         if video_url:
  73             try:
  74                 self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
  75             except ExtractorError as e:
  76                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
  77                     self.report_warning('Invalid video URL, requesting another', video_id)
  78                     return self._extract_video(video_id, file_key, video_url, try_num)
  79
  80         return {
  81             'id': video_id,
  82             'url': video_url,
  83             'title': title,
  84         }
  85
  86     def _real_extract(self, url):
  87         mobj = re.match(self._VALID_URL, url)
  88         video_id = mobj.group('id')
  89
  90         url = self._EMBED_URL % video_id
  91         webpage = self._download_webpage(url, video_id)
  92
  93         file_key = self._search_regex(
  94             [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
  95             webpage, 'file_key')
  96
  97         return self._extract_video(video_id, file_key)