]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/worldstarhiphop.py
[pornhub] Extract metadata from JSON-LD (closes #26614)
[youtube-dl.git] / youtube_dl / extractor / worldstarhiphop.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4
5
6 class WorldStarHipHopIE(InfoExtractor):
7     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
8     _TESTS = [{
9         'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
10         'md5': '9d04de741161603bf7071bbf4e883186',
11         'info_dict': {
12             'id': 'wshh6a7q1ny0G34ZwuIO',
13             'ext': 'mp4',
14             'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
15         }
16     }, {
17         'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
18         'only_matching': True,
19     }]
20
21     def _real_extract(self, url):
22         video_id = self._match_id(url)
23         webpage = self._download_webpage(url, video_id)
24
25         entries = self._parse_html5_media_entries(url, webpage, video_id)
26
27         if not entries:
28             return self.url_result(url, 'Generic')
29
30         title = self._html_search_regex(
31             [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
32              r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
33             webpage, 'title')
34
35         info = entries[0]
36         info.update({
37             'id': video_id,
38             'title': title,
39         })
40         return info