10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bandcamp import BandcampIE
24 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
25 from .extractor.comedycentral import ComedyCentralIE
26 from .extractor.collegehumor import CollegeHumorIE
27 from .extractor.dailymotion import DailymotionIE
28 from .extractor.depositfiles import DepositFilesIE
29 from .extractor.eighttracks import EightTracksIE
30 from .extractor.escapist import EscapistIE
31 from .extractor.facebook import FacebookIE
32 from .extractor.flickr import FlickrIE
33 from .extractor.funnyordie import FunnyOrDieIE
34 from .extractor.gametrailers import GametrailersIE
35 from .extractor.generic import GenericIE
36 from .extractor.googleplus import GooglePlusIE
37 from .extractor.googlesearch import GoogleSearchIE
38 from .extractor.howcast import HowcastIE
39 from .extractor.hypem import HypemIE
40 from .extractor.ina import InaIE
41 from .extractor.infoq import InfoQIE
42 from .extractor.justintv import JustinTVIE
43 from .extractor.keek import KeekIE
44 from .extractor.liveleak import LiveLeakIE
45 from .extractor.metacafe import MetacafeIE
46 from .extractor.mixcloud import MixcloudIE
47 from .extractor.mtv import MTVIE
48 from .extractor.myspass import MySpassIE
49 from .extractor.myvideo import MyVideoIE
50 from .extractor.nba import NBAIE
51 from .extractor.statigram import StatigramIE
52 from .extractor.photobucket import PhotobucketIE
53 from .extractor.pornotube import PornotubeIE
54 from .extractor.rbmaradio import RBMARadioIE
55 from .extractor.redtube import RedTubeIE
56 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
57 from .extractor.spiegel import SpiegelIE
58 from .extractor.stanfordoc import StanfordOpenClassroomIE
59 from .extractor.steam import SteamIE
60 from .extractor.teamcoco import TeamcocoIE
61 from .extractor.ted import TEDIE
62 from .extractor.tumblr import TumblrIE
63 from .extractor.ustream import UstreamIE
64 from .extractor.vbox7 import Vbox7IE
65 from .extractor.vimeo import VimeoIE
66 from .extractor.vine import VineIE
67 from .extractor.worldstarhiphop import WorldStarHipHopIE
68 from .extractor.xnxx import XNXXIE
69 from .extractor.xvideos import XVideosIE
70 from .extractor.yahoo import YahooIE, YahooSearchIE
71 from .extractor.youjizz import YouJizzIE
72 from .extractor.youku import YoukuIE
73 from .extractor.youporn import YouPornIE
74 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
75 from .extractor.zdf import ZDFIE
116 class XHamsterIE(InfoExtractor):
117 """Information Extractor for xHamster"""
118 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
120 def _real_extract(self,url):
121 mobj = re.match(self._VALID_URL, url)
123 video_id = mobj.group('id')
124 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
125 webpage = self._download_webpage(mrss_url, video_id)
127 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
129 raise ExtractorError(u'Unable to extract media URL')
130 if len(mobj.group('server')) == 0:
131 video_url = compat_urllib_parse.unquote(mobj.group('file'))
133 video_url = mobj.group('server')+'/key='+mobj.group('file')
134 video_extension = video_url.split('.')[-1]
136 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
139 # Can't see the description anywhere in the UI
140 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
141 # webpage, u'description', fatal=False)
142 # if video_description: video_description = unescapeHTML(video_description)
144 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
146 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
148 video_upload_date = None
149 self._downloader.report_warning(u'Unable to extract upload date')
151 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
152 webpage, u'uploader id', default=u'anonymous')
154 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
155 webpage, u'thumbnail', fatal=False)
160 'ext': video_extension,
161 'title': video_title,
162 # 'description': video_description,
163 'upload_date': video_upload_date,
164 'uploader_id': video_uploader_id,
165 'thumbnail': video_thumbnail
172 def gen_extractors():
173 """ Return a list of an instance of every supported extractor.
174 The order does matter; the first extractor matched is the one handling the URL.
202 StanfordOpenClassroomIE(),
242 def get_info_extractor(ie_name):
243 """Returns the info extractor class with the given ie_name"""
244 return globals()[ie_name+'IE']