youtube_dl/extractor/vporn.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import int_or_none
   8
   9 class VpornIE(InfoExtractor):
  10     _VALID_URL = r'http?://(?:www\.)?vporn\.com/[a-z]+/(?P<title_dash>[a-z-]+)/(?P<id>\d+)/?'
  11     _TEST = {
  12         'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
  13         'md5': 'facf37c1b86546fa0208058546842c55',
  14         'info_dict': {
  15             'id': '497944',
  16             'ext': 'mp4',
  17             'title': 'Violet On Her 19th Birthday',
  18             'description': 'Violet dances in front of the camera which is sure to get you horny.',
  19             'duration': 393,
  20             'thumbnail': 're:^https?://.*\.jpg$',
  21         }
  22     }
  23
  24     def _real_extract(self, url):
  25         mobj = re.match(self._VALID_URL, url)
  26         video_id = mobj.group('id')
  27
  28         webpage = self._download_webpage(url, video_id)
  29         title = self._html_search_regex(r'<title>(.*?) - Vporn Video</title>', webpage, 'title')
  30         video_url = self._html_search_regex(r'flashvars.videoUrlMedium  = "(.*?)"', webpage, 'video_url')
  31         description = self._html_search_regex(r'<div class="description_txt">(.*?)</div>', webpage, 'description')
  32         thumbnail = 'http://www.vporn.com' + self._html_search_regex(r'flashvars.imageUrl = "(.*?)"', webpage, 'description')
  33
  34         mobj = re.search(r'<span class="f_right">duration (?P<minutes>\d+) min (?P<seconds>\d+) sec </span>', webpage)
  35         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
  36
  37         mobj = re.search(r'<span>((?P<thousands>\d+),)?(?P<units>\d+) VIEWS</span>', webpage)
  38         try:
  39             view_count = int(mobj.group('units'))
  40             view_count += int(mobj.group('thousands')) * 1000
  41         except:
  42             pass
  43
  44         return {
  45             'id': video_id,
  46             'url': video_url,
  47             'thumbnail': thumbnail,
  48             'title': title,
  49             'description': description,
  50             'duration': int_or_none(duration),
  51             'view_count': int_or_none(view_count),
  52         }