]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge pull request #866 from yasoob/master
authorFilippo Valsorda <filippo.valsorda@gmail.com>
Tue, 4 Jun 2013 17:39:31 +0000 (10:39 -0700)
committerFilippo Valsorda <filippo.valsorda@gmail.com>
Tue, 4 Jun 2013 17:39:31 +0000 (10:39 -0700)
Added support for XHamster - closes #841

1  2 
youtube_dl/InfoExtractors.py

index 37f9c1449d5be600ff2b42748cfbef122cf0952c,5811ef0da634358ff41cd9b70492522796a34db4..a6294e1e1823bb3c47a8860f6986c7a09f57c92f
@@@ -1099,7 -1099,7 +1099,7 @@@ class VimeoIE(InfoExtractor)
  
          # Extract uploader and uploader_id
          video_uploader = config["video"]["owner"]["name"]
 -        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
 +        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
  
          # Extract video thumbnail
          video_thumbnail = config["video"]["thumbnail"]
@@@ -3980,7 -3980,7 +3980,7 @@@ class SpiegelIE(InfoExtractor)
          video_id = m.group('videoID')
  
          webpage = self._download_webpage(url, video_id)
 -        m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
 +        m = re.search(r'<div class="module-title">(.*?)</div>', webpage)
          if not m:
              raise ExtractorError(u'Cannot find title')
          video_title = unescapeHTML(m.group(1))
@@@ -4425,6 -4425,63 +4425,63 @@@ class TeamcocoIE(InfoExtractor)
              'thumbnail':   thumbnail,
              'description': description,
          }]
+         
+ class XHamsterIE(InfoExtractor):
+     """Information Extractor for xHamster"""
+     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+     def _real_extract(self,url):
+         mobj = re.match(self._VALID_URL, url)
+         video_id = mobj.group('id')
+         mrss_url='http://xhamster.com/movies/%s/.html' % video_id
+         webpage = self._download_webpage(mrss_url, video_id)
+         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+         if mobj is None:
+             raise ExtractorError(u'Unable to extract media URL')
+         if len(mobj.group('server')) == 0:
+             video_url = compat_urllib_parse.unquote(mobj.group('file'))
+         else:
+             video_url = mobj.group('server')+'/key='+mobj.group('file')
+         video_extension = video_url.split('.')[-1]
+         mobj = re.search(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage)
+         if mobj is None:
+             raise ExtractorError(u'Unable to extract title')
+         video_title = unescapeHTML(mobj.group('title'))
+         mobj = re.search(r'<span>Description: </span>(?P<description>[^<]+)', webpage)
+         if mobj is None:
+             video_description = u''
+         else:
+             video_description = unescapeHTML(mobj.group('description'))
+         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+         if mobj is None:
+             raise ExtractorError(u'Unable to extract upload date')
+         video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+         mobj = re.search(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^>]+)', webpage)
+         if mobj is None:
+             video_uploader_id = u'anonymous'
+         else:
+             video_uploader_id = mobj.group('uploader_id')
+         mobj = re.search(r'\'image\':\'(?P<thumbnail>[^\']+)\'', webpage)
+         if mobj is None:
+             raise ExtractorError(u'Unable to extract thumbnail URL')
+         video_thumbnail = mobj.group('thumbnail')
+         return [{
+             'id':       video_id,
+             'url':      video_url,
+             'ext':      video_extension,
+             'title':    video_title,
+             'description': video_description,
+             'upload_date': video_upload_date,
+             'uploader_id': video_uploader_id,
+             'thumbnail': video_thumbnail
+         }]
  
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
          VineIE(),
          FlickrIE(),
          TeamcocoIE(),
+         XHamsterIE(),
          GenericIE()
      ]