.*?
(.+?)
',
+ webpage, u'uploader', fatal=False, flags=re.DOTALL)
return [{
'id': video_id,
@@ -4330,18 +4220,13 @@ class FlickrIE(InfoExtractor):
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
- mobj = re.search(r"photo_secret: '(\w+)'", webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video secret')
- secret = mobj.group(1)
+ secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
- mobj = re.search(r'
- (\d+-\d+)
', first_xml)
- if mobj is None:
- raise ExtractorError(u'Unable to extract node_id')
- node_id = mobj.group(1)
+ node_id = self._html_search_regex(r'
- (\d+-\d+)
',
+ first_xml, u'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
@@ -4353,22 +4238,14 @@ class FlickrIE(InfoExtractor):
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
- mobj = re.search(r'
(.*?)', data)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- video_url = mobj.group(1)
+
+ video_url = self._html_search_regex(r'
(.*?)',
+ data, u'video URL')
return [{
'id': video_id,
@@ -4423,9 +4293,112 @@ class TeamcocoIE(InfoExtractor):
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
- 'description': description,
+ 'description': video_description,
}]
+class XHamsterIE(InfoExtractor):
+ """Information Extractor for xHamster"""
+ _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P
[0-9]+)/.*\.html'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+ webpage = self._download_webpage(mrss_url, video_id)
+
+ mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ if len(mobj.group('server')) == 0:
+ video_url = compat_urllib_parse.unquote(mobj.group('file'))
+ else:
+ video_url = mobj.group('server')+'/key='+mobj.group('file')
+ video_extension = video_url.split('.')[-1]
+
+ video_title = self._html_search_regex(r'(?P.+?) - xHamster\.com',
+ webpage, u'title')
+
+ # Can't see the description anywhere in the UI
+ # video_description = self._html_search_regex(r'Description: (?P[^<]+)',
+ # webpage, u'description', fatal=False)
+ # if video_description: video_description = unescapeHTML(video_description)
+
+ mobj = re.search(r'hint=\'(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+ if mobj:
+ video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+ else:
+ video_upload_date = None
+ self._downloader.report_warning(u'Unable to extract upload date')
+
+ video_uploader_id = self._html_search_regex(r']+>(?P[^<]+)',
+ webpage, u'uploader id', default=u'anonymous')
+
+ video_thumbnail = self._search_regex(r'\'image\':\'(?P[^\']+)\'',
+ webpage, u'thumbnail', fatal=False)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ # 'description': video_description,
+ 'upload_date': video_upload_date,
+ 'uploader_id': video_uploader_id,
+ 'thumbnail': video_thumbnail
+ }]
+
+class HypemIE(InfoExtractor):
+ """Information Extractor for hypem"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ track_id = mobj.group(1)
+
+ data = { 'ax': 1, 'ts': time.time() }
+ data_encoded = compat_urllib_parse.urlencode(data)
+ complete_url = url + "?" + data_encoded
+ request = compat_urllib_request.Request(complete_url)
+ response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+ cookie = urlh.headers.get('Set-Cookie', '')
+
+ self.report_extraction(track_id)
+
+ html_tracks = self._html_search_regex(r'',
+ response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
+ try:
+ track_list = json.loads(html_tracks)
+ track = track_list[u'tracks'][0]
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+
+ key = track[u"key"]
+ track_id = track[u"id"]
+ artist = track[u"artist"]
+ title = track[u"song"]
+
+ serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
+ request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+ request.add_header('cookie', cookie)
+ song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+ try:
+ song_data = json.loads(song_data_json)
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+ final_url = song_data[u"url"]
+
+ return [{
+ 'id': track_id,
+ 'url': final_url,
+ 'ext': "mp3",
+ 'title': title,
+ 'artist': artist,
+ }]
+
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
@@ -4487,6 +4460,8 @@ def gen_extractors():
VineIE(),
FlickrIE(),
TeamcocoIE(),
+ XHamsterIE(),
+ HypemIE(),
GenericIE()
]