1 from __future__ import unicode_literals
5 from .common import InfoExtractor
6 from ..compat import compat_urllib_request
13 from ..aes import aes_decrypt_text
16 class YouPornIE(InfoExtractor):
17 _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
19 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
22 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
24 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
25 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
26 'thumbnail': 're:^https?://.*\.jpg$',
27 'uploader': 'Ask Dan And Jennifer',
28 'upload_date': '20101221',
29 'average_rating': int,
37 def _real_extract(self, url):
38 mobj = re.match(self._VALID_URL, url)
39 video_id = mobj.group('id')
40 display_id = mobj.group('display_id')
42 request = compat_urllib_request.Request(url)
43 request.add_header('Cookie', 'age_verified=1')
44 webpage = self._download_webpage(request, display_id)
46 title = self._search_regex(
47 [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
48 r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
49 webpage, 'title', group='title')
53 sources = self._search_regex(
54 r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
56 for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
60 for _, link in re.findall(
61 r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
64 # Fallback #2, this also contains extra low quality 180p format
65 for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
68 # Fallback #3, encrypted links
69 for _, encrypted_link in re.findall(
70 r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
71 links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
74 for video_url in set(unescapeHTML(link) for link in links):
78 # Video URL's path looks like this:
79 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
80 # We will benefit from it by extracting some metadata
81 mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
83 height = int(mobj.group('height'))
84 bitrate = int(mobj.group('bitrate'))
86 'format_id': '%dp-%dk' % (height, bitrate),
91 self._sort_formats(formats)
93 description = self._html_search_regex(
94 r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>',
95 webpage, 'description', fatal=False)
96 thumbnail = self._search_regex(
97 r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
98 webpage, 'thumbnail', fatal=False, group='thumbnail')
100 uploader = self._search_regex(
101 r'<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>\s*<a[^>]+href="[^"]*">([^<]+)</a>',
102 webpage, 'uploader', fatal=False)
103 upload_date = unified_strdate(self._html_search_regex(
104 r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
105 webpage, 'upload date', fatal=False))
107 age_limit = self._rta_search(webpage)
109 average_rating = int_or_none(self._search_regex(
110 r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
111 webpage, 'average rating', fatal=False))
113 view_count = str_to_int(self._search_regex(
114 r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>',
115 webpage, 'view count', fatal=False))
117 def extract_tag_box(title):
118 tag_box = self._search_regex(
119 (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*'
120 '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title),
121 webpage, '%s tag box' % title, default=None)
124 return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
126 categories = extract_tag_box('Category')
127 tags = extract_tag_box('Tags')
131 'display_id': display_id,
133 'description': description,
134 'thumbnail': thumbnail,
135 'uploader': uploader,
136 'upload_date': upload_date,
137 'average_rating': average_rating,
138 'view_count': view_count,
139 'categories': categories,
141 'age_limit': age_limit,