X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhd.py;h=33faf5e583df034aa33c2b47ebcf1683b8ddef0f;hb=d0d93f76ea0dd1dae15bdba6059815d9cc467b05;hp=954dfccb75954d50a9a46bc14bdb1d0dcbd5588c;hpb=cf372f0778e82cdc181a6173909589e640ac29fb;p=youtube-dl.git diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 954dfccb7..33faf5e58 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -12,7 +12,7 @@ from ..utils import ( class PornHdIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' + _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' _TEST = { 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'md5': '956b8ca569f7f4d8ec563e2c41598441', @@ -36,26 +36,29 @@ class PornHdIE(InfoExtractor): webpage = self._download_webpage(url, display_id or video_id) title = self._html_search_regex( - r'(.+) porn HD.+?', webpage, 'title') + [r']+class=["\']video-name["\'][^>]*>([^<]+)', + r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') description = self._html_search_regex( - r'
([^<]+)
', webpage, 'description', fatal=False) + r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)', webpage, 'view count', fatal=False)) + r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) quality = qualities(['sd', 'hd']) sources = json.loads(js_to_json(self._search_regex( - r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) + r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", + webpage, 'sources'))) formats = [] - for container, s in sources.items(): - for qname, video_url in s.items(): - formats.append({ - 'url': video_url, - 'container': container, - 'format_id': '%s-%s' % (container, qname), - 'quality': quality(qname), - }) + for qname, video_url in sources.items(): + if not video_url: + continue + formats.append({ + 'url': video_url, + 'format_id': qname, + 'quality': quality(qname), + }) self._sort_formats(formats) return {