(?P<title>.+?) - xHamster\.com

(.*?)', data, u'video URL') return [{ 'id': video_id, 'url': video_url, 'ext': 'mp4', 'title': video_title, 'thumbnail': thumbnail, 'description': video_description, }] class XHamsterIE(InfoExtractor): """Information Extractor for xHamster""" _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html' def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id webpage = self._download_webpage(mrss_url, video_id) mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage) if mobj is None: raise ExtractorError(u'Unable to extract media URL') if len(mobj.group('server')) == 0: video_url = compat_urllib_parse.unquote(mobj.group('file')) else: video_url = mobj.group('server')+'/key='+mobj.group('file') video_extension = video_url.split('.')[-1] video_title = self._html_search_regex(r'(?P<title>.+?) - xHamster\.com', webpage, u'title') # Can't see the description anywhere in the UI # video_description = self._html_search_regex(r'Description: (?P[^<]+)', # webpage, u'description', fatal=False) # if video_description: video_description = unescapeHTML(video_description) mobj = re.search(r'hint=\'(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) if mobj: video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') else: video_upload_date = None self._downloader.report_warning(u'Unable to extract upload date') video_uploader_id = self._html_search_regex(r']+>(?P[^<]+)', webpage, u'uploader id', default=u'anonymous') video_thumbnail = self._search_regex(r'\'image\':\'(?P[^\']+)\'', webpage, u'thumbnail', fatal=False) return [{ 'id': video_id, 'url': video_url, 'ext': video_extension, 'title': video_title, # 'description': video_description, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, 'thumbnail': video_thumbnail }] class HypemIE(InfoExtractor): """Information Extractor for hypem""" _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) track_id = mobj.group(1) data = { 'ax': 1, 'ts': time.time() } data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?" + data_encoded request = compat_urllib_request.Request(complete_url) response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') self.report_extraction(track_id) html_tracks = self._html_search_regex(r'', response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() try: track_list = json.loads(html_tracks) track = track_list[u'tracks'][0] except ValueError: raise ExtractorError(u'Hypemachine contained invalid JSON.') key = track[u"key"] track_id = track[u"id"] artist = track[u"artist"] title = track[u"song"] serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) request.add_header('cookie', cookie) song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') try: song_data = json.loads(song_data_json) except ValueError: raise ExtractorError(u'Hypemachine contained invalid JSON.') final_url = song_data[u"url"] return [{ 'id': track_id, 'url': final_url, 'ext': "mp3", 'title': title, 'artist': artist, }] def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ return [ YoutubePlaylistIE(), YoutubeChannelIE(), YoutubeUserIE(), YoutubeSearchIE(), YoutubeIE(), MetacafeIE(), DailymotionIE(), GoogleSearchIE(), PhotobucketIE(), YahooIE(), YahooSearchIE(), DepositFilesIE(), FacebookIE(), BlipTVIE(), BlipTVUserIE(), VimeoIE(), MyVideoIE(), ComedyCentralIE(), EscapistIE(), CollegeHumorIE(), XVideosIE(), SoundcloudSetIE(), SoundcloudIE(), InfoQIE(), MixcloudIE(), StanfordOpenClassroomIE(), MTVIE(), YoukuIE(), XNXXIE(), YouJizzIE(), PornotubeIE(), YouPornIE(), GooglePlusIE(), ArteTvIE(), NBAIE(), WorldStarHipHopIE(), JustinTVIE(), FunnyOrDieIE(), SteamIE(), UstreamIE(), RBMARadioIE(), EightTracksIE(), KeekIE(), TEDIE(), MySpassIE(), SpiegelIE(), LiveLeakIE(), ARDIE(), ZDFIE(), TumblrIE(), BandcampIE(), RedTubeIE(), InaIE(), HowcastIE(), VineIE(), FlickrIE(), TeamcocoIE(), XHamsterIE(), HypemIE(), Vbox7IE(), GametrailersIE(), StatigramIE(), GenericIE() ] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name+'IE']

(.+?)