youtube_dl/extractor/flipagram.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     int_or_none,
   8     float_or_none,
   9     try_get,
  10     unified_timestamp,
  11 )
  12
  13
  14 class FlipagramIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
  16     _TEST = {
  17         'url': 'https://flipagram.com/f/nyvTSJMKId',
  18         'md5': '888dcf08b7ea671381f00fab74692755',
  19         'info_dict': {
  20             'id': 'nyvTSJMKId',
  21             'ext': 'mp4',
  22             'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
  23             'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
  24             'duration': 35.571,
  25             'timestamp': 1461244995,
  26             'upload_date': '20160421',
  27             'uploader': 'kitty juria',
  28             'uploader_id': 'sjuria101',
  29             'creator': 'kitty juria',
  30             'view_count': int,
  31             'like_count': int,
  32             'repost_count': int,
  33             'comment_count': int,
  34             'comments': list,
  35             'formats': 'mincount:2',
  36         },
  37     }
  38
  39     def _real_extract(self, url):
  40         video_id = self._match_id(url)
  41         webpage = self._download_webpage(url, video_id)
  42
  43         video_data = self._parse_json(
  44             self._search_regex(
  45                 r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
  46             video_id)
  47
  48         flipagram = video_data['flipagram']
  49         video = flipagram['video']
  50
  51         json_ld = self._search_json_ld(webpage, video_id, default={})
  52         title = json_ld.get('title') or flipagram['captionText']
  53         description = json_ld.get('description') or flipagram.get('captionText')
  54
  55         formats = [{
  56             'url': video['url'],
  57             'width': int_or_none(video.get('width')),
  58             'height': int_or_none(video.get('height')),
  59             'filesize': int_or_none(video_data.get('size')),
  60         }]
  61
  62         preview_url = try_get(
  63             flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
  64         if preview_url:
  65             formats.append({
  66                 'url': preview_url,
  67                 'ext': 'm4a',
  68                 'vcodec': 'none',
  69             })
  70
  71         self._sort_formats(formats)
  72
  73         counts = flipagram.get('counts', {})
  74         user = flipagram.get('user', {})
  75         video_data = flipagram.get('video', {})
  76
  77         thumbnails = [{
  78             'url': self._proto_relative_url(cover['url']),
  79             'width': int_or_none(cover.get('width')),
  80             'height': int_or_none(cover.get('height')),
  81             'filesize': int_or_none(cover.get('size')),
  82         } for cover in flipagram.get('covers', []) if cover.get('url')]
  83
  84         # Note that this only retrieves comments that are initially loaded.
  85         # For videos with large amounts of comments, most won't be retrieved.
  86         comments = []
  87         for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
  88             text = comment.get('comment')
  89             if not text or not isinstance(text, list):
  90                 continue
  91             comments.append({
  92                 'author': comment.get('user', {}).get('name'),
  93                 'author_id': comment.get('user', {}).get('username'),
  94                 'id': comment.get('id'),
  95                 'text': text[0],
  96                 'timestamp': unified_timestamp(comment.get('created')),
  97             })
  98
  99         return {
 100             'id': video_id,
 101             'title': title,
 102             'description': description,
 103             'duration': float_or_none(flipagram.get('duration'), 1000),
 104             'thumbnails': thumbnails,
 105             'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
 106             'uploader': user.get('name'),
 107             'uploader_id': user.get('username'),
 108             'creator': user.get('name'),
 109             'view_count': int_or_none(counts.get('plays')),
 110             'like_count': int_or_none(counts.get('likes')),
 111             'repost_count': int_or_none(counts.get('reflips')),
 112             'comment_count': int_or_none(counts.get('comments')),
 113             'comments': comments,
 114             'formats': formats,
 115         }