youtube_dl/extractor/vidme.py

   1 from __future__ import unicode_literals
   2
   3 import itertools
   4
   5 from .common import InfoExtractor
   6 from ..compat import compat_HTTPError
   7 from ..utils import (
   8     ExtractorError,
   9     int_or_none,
  10     float_or_none,
  11     parse_iso8601,
  12     url_or_none,
  13 )
  14
  15
  16 class VidmeIE(InfoExtractor):
  17     IE_NAME = 'vidme'
  18     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
  19     _TESTS = [{
  20         'url': 'https://vid.me/QNB',
  21         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  22         'info_dict': {
  23             'id': 'QNB',
  24             'ext': 'mp4',
  25             'title': 'Fishing for piranha - the easy way',
  26             'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
  27             'thumbnail': r're:^https?://.*\.jpg',
  28             'timestamp': 1406313244,
  29             'upload_date': '20140725',
  30             'age_limit': 0,
  31             'duration': 119.92,
  32             'view_count': int,
  33             'like_count': int,
  34             'comment_count': int,
  35         },
  36     }, {
  37         'url': 'https://vid.me/Gc6M',
  38         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  39         'info_dict': {
  40             'id': 'Gc6M',
  41             'ext': 'mp4',
  42             'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
  43             'thumbnail': r're:^https?://.*\.jpg',
  44             'timestamp': 1441211642,
  45             'upload_date': '20150902',
  46             'uploader': 'SunshineM',
  47             'uploader_id': '3552827',
  48             'age_limit': 0,
  49             'duration': 223.72,
  50             'view_count': int,
  51             'like_count': int,
  52             'comment_count': int,
  53         },
  54         'params': {
  55             'skip_download': True,
  56         },
  57     }, {
  58         # tests uploader field
  59         'url': 'https://vid.me/4Iib',
  60         'info_dict': {
  61             'id': '4Iib',
  62             'ext': 'mp4',
  63             'title': 'The Carver',
  64             'description': 'md5:e9c24870018ae8113be936645b93ba3c',
  65             'thumbnail': r're:^https?://.*\.jpg',
  66             'timestamp': 1433203629,
  67             'upload_date': '20150602',
  68             'uploader': 'Thomas',
  69             'uploader_id': '109747',
  70             'age_limit': 0,
  71             'duration': 97.859999999999999,
  72             'view_count': int,
  73             'like_count': int,
  74             'comment_count': int,
  75         },
  76         'params': {
  77             'skip_download': True,
  78         },
  79     }, {
  80         # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
  81         'url': 'https://vid.me/e/Wmur',
  82         'info_dict': {
  83             'id': 'Wmur',
  84             'ext': 'mp4',
  85             'title': 'naked smoking & stretching',
  86             'thumbnail': r're:^https?://.*\.jpg',
  87             'timestamp': 1430931613,
  88             'upload_date': '20150506',
  89             'uploader': 'naked-yogi',
  90             'uploader_id': '1638622',
  91             'age_limit': 18,
  92             'duration': 653.26999999999998,
  93             'view_count': int,
  94             'like_count': int,
  95             'comment_count': int,
  96         },
  97         'params': {
  98             'skip_download': True,
  99         },
 100     }, {
 101         # nsfw, user-disabled
 102         'url': 'https://vid.me/dzGJ',
 103         'only_matching': True,
 104     }, {
 105         # suspended
 106         'url': 'https://vid.me/Ox3G',
 107         'only_matching': True,
 108     }, {
 109         # deleted
 110         'url': 'https://vid.me/KTPm',
 111         'only_matching': True,
 112     }, {
 113         # no formats in the API response
 114         'url': 'https://vid.me/e5g',
 115         'info_dict': {
 116             'id': 'e5g',
 117             'ext': 'mp4',
 118             'title': 'Video upload (e5g)',
 119             'thumbnail': r're:^https?://.*\.jpg',
 120             'timestamp': 1401480195,
 121             'upload_date': '20140530',
 122             'uploader': None,
 123             'uploader_id': None,
 124             'age_limit': 0,
 125             'duration': 483,
 126             'view_count': int,
 127             'like_count': int,
 128             'comment_count': int,
 129         },
 130         'params': {
 131             'skip_download': True,
 132         },
 133     }]
 134
 135     def _real_extract(self, url):
 136         video_id = self._match_id(url)
 137
 138         try:
 139             response = self._download_json(
 140                 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
 141         except ExtractorError as e:
 142             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
 143                 response = self._parse_json(e.cause.read(), video_id)
 144             else:
 145                 raise
 146
 147         error = response.get('error')
 148         if error:
 149             raise ExtractorError(
 150                 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
 151
 152         video = response['video']
 153
 154         if video.get('state') == 'deleted':
 155             raise ExtractorError(
 156                 'Vidme said: Sorry, this video has been deleted.',
 157                 expected=True)
 158
 159         if video.get('state') in ('user-disabled', 'suspended'):
 160             raise ExtractorError(
 161                 'Vidme said: This video has been suspended either due to a copyright claim, '
 162                 'or for violating the terms of use.',
 163                 expected=True)
 164
 165         formats = []
 166         for f in video.get('formats', []):
 167             format_url = url_or_none(f.get('uri'))
 168             if not format_url:
 169                 continue
 170             format_type = f.get('type')
 171             if format_type == 'dash':
 172                 formats.extend(self._extract_mpd_formats(
 173                     format_url, video_id, mpd_id='dash', fatal=False))
 174             elif format_type == 'hls':
 175                 formats.extend(self._extract_m3u8_formats(
 176                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
 177                     m3u8_id='hls', fatal=False))
 178             else:
 179                 formats.append({
 180                     'format_id': f.get('type'),
 181                     'url': format_url,
 182                     'width': int_or_none(f.get('width')),
 183                     'height': int_or_none(f.get('height')),
 184                     'preference': 0 if f.get('type', '').endswith(
 185                         'clip') else 1,
 186                 })
 187
 188         if not formats and video.get('complete_url'):
 189             formats.append({
 190                 'url': video.get('complete_url'),
 191                 'width': int_or_none(video.get('width')),
 192                 'height': int_or_none(video.get('height')),
 193             })
 194
 195         self._sort_formats(formats)
 196
 197         title = video['title']
 198         description = video.get('description')
 199         thumbnail = video.get('thumbnail_url')
 200         timestamp = parse_iso8601(video.get('date_created'), ' ')
 201         uploader = video.get('user', {}).get('username')
 202         uploader_id = video.get('user', {}).get('user_id')
 203         age_limit = 18 if video.get('nsfw') is True else 0
 204         duration = float_or_none(video.get('duration'))
 205         view_count = int_or_none(video.get('view_count'))
 206         like_count = int_or_none(video.get('likes_count'))
 207         comment_count = int_or_none(video.get('comment_count'))
 208
 209         return {
 210             'id': video_id,
 211             'title': title or 'Video upload (%s)' % video_id,
 212             'description': description,
 213             'thumbnail': thumbnail,
 214             'uploader': uploader,
 215             'uploader_id': uploader_id,
 216             'age_limit': age_limit,
 217             'timestamp': timestamp,
 218             'duration': duration,
 219             'view_count': view_count,
 220             'like_count': like_count,
 221             'comment_count': comment_count,
 222             'formats': formats,
 223         }
 224
 225
 226 class VidmeListBaseIE(InfoExtractor):
 227     # Max possible limit according to https://docs.vid.me/#api-Videos-List
 228     _LIMIT = 100
 229
 230     def _entries(self, user_id, user_name):
 231         for page_num in itertools.count(1):
 232             page = self._download_json(
 233                 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
 234                 % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
 235                 user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
 236
 237             videos = page.get('videos', [])
 238             if not videos:
 239                 break
 240
 241             for video in videos:
 242                 video_url = video.get('full_url') or video.get('embed_url')
 243                 if video_url:
 244                     yield self.url_result(video_url, VidmeIE.ie_key())
 245
 246             total = int_or_none(page.get('page', {}).get('total'))
 247             if total and self._LIMIT * page_num >= total:
 248                 break
 249
 250     def _real_extract(self, url):
 251         user_name = self._match_id(url)
 252
 253         user_id = self._download_json(
 254             'https://api.vid.me/userByUsername?username=%s' % user_name,
 255             user_name)['user']['user_id']
 256
 257         return self.playlist_result(
 258             self._entries(user_id, user_name), user_id,
 259             '%s - %s' % (user_name, self._TITLE))
 260
 261
 262 class VidmeUserIE(VidmeListBaseIE):
 263     IE_NAME = 'vidme:user'
 264     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
 265     _API_ITEM = 'list'
 266     _TITLE = 'Videos'
 267     _TESTS = [{
 268         'url': 'https://vid.me/MasakoX',
 269         'info_dict': {
 270             'id': '16112341',
 271             'title': 'MasakoX - %s' % _TITLE,
 272         },
 273         'playlist_mincount': 191,
 274     }, {
 275         'url': 'https://vid.me/unsQuare_netWork',
 276         'only_matching': True,
 277     }]
 278
 279
 280 class VidmeUserLikesIE(VidmeListBaseIE):
 281     IE_NAME = 'vidme:user:likes'
 282     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
 283     _API_ITEM = 'likes'
 284     _TITLE = 'Likes'
 285     _TESTS = [{
 286         'url': 'https://vid.me/ErinAlexis/likes',
 287         'info_dict': {
 288             'id': '6483530',
 289             'title': 'ErinAlexis - %s' % _TITLE,
 290         },
 291         'playlist_mincount': 415,
 292     }, {
 293         'url': 'https://vid.me/Kaleidoscope-Ish/likes',
 294         'only_matching': True,
 295     }]