-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-class DepositFilesIE(InfoExtractor):
- """Information extractor for depositfiles.com"""
-
- _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
-
- def _real_extract(self, url):
- file_id = url.split('/')[-1]
- # Rebuild url in english locale
- url = 'http://depositfiles.com/en/files/' + file_id
-
- # Retrieve file webpage with 'Free download' button pressed
- free_download_indication = { 'gateway_result' : '1' }
- request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
- try:
- self.report_download_webpage(file_id)
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
-
- # Search for the real file URL
- mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
- if (mobj is None) or (mobj.group(1) is None):
- # Try to figure out reason of the error.
- mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
- if (mobj is not None) and (mobj.group(1) is not None):
- restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
- raise ExtractorError(u'%s' % restriction_message)
- else:
- raise ExtractorError(u'Unable to extract download URL from: %s' % url)
-
- file_url = mobj.group(1)
- file_extension = os.path.splitext(file_url)[1][1:]
-
- # Search for file title
- file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
-
- return [{
- 'id': file_id.decode('utf-8'),
- 'url': file_url.decode('utf-8'),
- 'uploader': None,
- 'upload_date': None,
- 'title': file_title,
- 'ext': file_extension.decode('utf-8'),
- }]
-
-
-class FacebookIE(InfoExtractor):
- """Information Extractor for Facebook"""
-
- _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
- _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
- _NETRC_MACHINE = 'facebook'
- IE_NAME = u'facebook'
-
- def report_login(self):
- """Report attempt to log in."""
- self.to_screen(u'Logging in')
-
- def _real_initialize(self):
- if self._downloader is None:
- return
-
- useremail = None
- password = None
- downloader_params = self._downloader.params
-
- # Attempt to use provided username and password or .netrc data
- if downloader_params.get('username', None) is not None:
- useremail = downloader_params['username']
- password = downloader_params['password']
- elif downloader_params.get('usenetrc', False):
- try:
- info = netrc.netrc().authenticators(self._NETRC_MACHINE)
- if info is not None:
- useremail = info[0]
- password = info[2]
- else:
- raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
- except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
- return
-
- if useremail is None:
- return
-
- # Log in
- login_form = {
- 'email': useremail,
- 'pass': password,
- 'login': 'Log+In'
- }
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
- try:
- self.report_login()
- login_results = compat_urllib_request.urlopen(request).read()
- if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
- self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
- return
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
- return
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- video_id = mobj.group('ID')
-
- url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
- webpage = self._download_webpage(url, video_id)
-
- BEFORE = '{swf.addParam(param[0], param[1]);});\n'
- AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
- m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
- if not m:
- raise ExtractorError(u'Cannot parse data')
- data = dict(json.loads(m.group(1)))
- params_raw = compat_urllib_parse.unquote(data['params'])
- params = json.loads(params_raw)
- video_data = params['video_data'][0]
- video_url = video_data.get('hd_src')
- if not video_url:
- video_url = video_data['sd_src']
- if not video_url:
- raise ExtractorError(u'Cannot find video URL')
- video_duration = int(video_data['video_duration'])
- thumbnail = video_data['thumbnail_src']
-
- video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
- webpage, u'title')
-
- info = {
- 'id': video_id,
- 'title': video_title,
- 'url': video_url,
- 'ext': 'mp4',
- 'duration': video_duration,
- 'thumbnail': thumbnail,
- }
- return [info]
-
-
-
-
-
-
-
-class EscapistIE(InfoExtractor):
- """Information extractor for The Escapist """
-
- _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
- IE_NAME = u'escapist'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- showName = mobj.group('showname')
- videoId = mobj.group('episode')
-
- self.report_extraction(videoId)
- webpage = self._download_webpage(url, videoId)
-
- videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
- webpage, u'description', fatal=False)
-
- imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
- webpage, u'thumbnail', fatal=False)
-
- playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
- webpage, u'player url')
-
- title = self._html_search_regex('<meta name="title" content="([^"]*)"',
- webpage, u'player url').split(' : ')[-1]
-
- configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
- configUrl = compat_urllib_parse.unquote(configUrl)
-
- configJSON = self._download_webpage(configUrl, videoId,
- u'Downloading configuration',
- u'unable to download configuration')
-
- # Technically, it's JavaScript, not JSON
- configJSON = configJSON.replace("'", '"')
-
- try:
- config = json.loads(configJSON)
- except (ValueError,) as err:
- raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
-
- playlist = config['playlist']
- videoUrl = playlist[1]['url']
-
- info = {
- 'id': videoId,
- 'url': videoUrl,
- 'uploader': showName,
- 'upload_date': None,
- 'title': title,
- 'ext': 'mp4',
- 'thumbnail': imgUrl,
- 'description': videoDesc,
- 'player_url': playerUrl,
- }
-
- return [info]
-
-class CollegeHumorIE(InfoExtractor):
- """Information extractor for collegehumor.com"""
-
- _WORKING = False
- _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
- IE_NAME = u'collegehumor'
-
- def report_manifest(self, video_id):
- """Report information extraction."""
- self.to_screen(u'%s: Downloading XML manifest' % video_id)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- video_id = mobj.group('videoid')
-
- info = {
- 'id': video_id,
- 'uploader': None,
- 'upload_date': None,
- }
-
- self.report_extraction(video_id)
- xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
- try:
- metaXml = compat_urllib_request.urlopen(xmlUrl).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
-
- mdoc = xml.etree.ElementTree.fromstring(metaXml)
- try:
- videoNode = mdoc.findall('./video')[0]
- info['description'] = videoNode.findall('./description')[0].text
- info['title'] = videoNode.findall('./caption')[0].text
- info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
- manifest_url = videoNode.findall('./file')[0].text
- except IndexError:
- raise ExtractorError(u'Invalid metadata XML file')
-
- manifest_url += '?hdcore=2.10.3'
- self.report_manifest(video_id)
- try:
- manifestXml = compat_urllib_request.urlopen(manifest_url).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
-
- adoc = xml.etree.ElementTree.fromstring(manifestXml)
- try:
- media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
- node_id = media_node.attrib['url']
- video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
- except IndexError as err:
- raise ExtractorError(u'Invalid manifest file')
-
- url_pr = compat_urllib_parse_urlparse(manifest_url)
- url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
-
- info['url'] = url
- info['ext'] = 'f4f'
- return [info]
-
-
-class XVideosIE(InfoExtractor):
- """Information extractor for xvideos.com"""
-
- _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
- IE_NAME = u'xvideos'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- video_id = mobj.group(1)
-
- webpage = self._download_webpage(url, video_id)
-
- self.report_extraction(video_id)
-
- # Extract video URL
- video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
- webpage, u'video URL'))
-
- # Extract title
- video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
- webpage, u'title')
-
- # Extract video thumbnail
- video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
- webpage, u'thumbnail', fatal=False)
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'uploader': None,
- 'upload_date': None,
- 'title': video_title,
- 'ext': 'flv',
- 'thumbnail': video_thumbnail,
- 'description': None,
- }
-
- return [info]
-
-
-class SoundcloudIE(InfoExtractor):
- """Information extractor for soundcloud.com
- To access the media, the uid of the song and a stream token
- must be extracted from the page source and the script must make
- a request to media.soundcloud.com/crossdomain.xml. Then
- the media can be grabbed by requesting from an url composed
- of the stream token and uid
- """
-
- _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
- IE_NAME = u'soundcloud'
-
- def report_resolve(self, video_id):
- """Report information extraction."""
- self.to_screen(u'%s: Resolving id' % video_id)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
- # extract uploader (which is in the url)
- uploader = mobj.group(1)
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group(2)
- simple_title = uploader + u'-' + slug_title
- full_title = '%s/%s' % (uploader, slug_title)
-
- self.report_resolve(full_title)
-
- url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
- resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
- info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
-
- info = json.loads(info_json)
- video_id = info['id']
- self.report_extraction(full_title)
-
- streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
- stream_json = self._download_webpage(streams_url, full_title,
- u'Downloading stream definitions',
- u'unable to download stream definitions')
-
- streams = json.loads(stream_json)
- mediaURL = streams['http_mp3_128_url']
- upload_date = unified_strdate(info['created_at'])
-
- return [{
- 'id': info['id'],
- 'url': mediaURL,
- 'uploader': info['user']['username'],
- 'upload_date': upload_date,
- 'title': info['title'],
- 'ext': u'mp3',
- 'description': info['description'],
- }]
-
-class SoundcloudSetIE(InfoExtractor):
- """Information extractor for soundcloud.com sets
- To access the media, the uid of the song and a stream token
- must be extracted from the page source and the script must make
- a request to media.soundcloud.com/crossdomain.xml. Then
- the media can be grabbed by requesting from an url composed
- of the stream token and uid
- """
-
- _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
- IE_NAME = u'soundcloud:set'
-
- def report_resolve(self, video_id):
- """Report information extraction."""
- self.to_screen(u'%s: Resolving id' % video_id)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
- # extract uploader (which is in the url)
- uploader = mobj.group(1)
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group(2)
- simple_title = uploader + u'-' + slug_title
- full_title = '%s/sets/%s' % (uploader, slug_title)
-
- self.report_resolve(full_title)
-
- url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
- resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
- info_json = self._download_webpage(resolv_url, full_title)
-
- videos = []
- info = json.loads(info_json)
- if 'errors' in info:
- for err in info['errors']:
- self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
- return
-
- self.report_extraction(full_title)
- for track in info['tracks']:
- video_id = track['id']
-
- streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
- stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
-
- self.report_extraction(video_id)
- streams = json.loads(stream_json)
- mediaURL = streams['http_mp3_128_url']
-
- videos.append({
- 'id': video_id,
- 'url': mediaURL,
- 'uploader': track['user']['username'],
- 'upload_date': unified_strdate(track['created_at']),
- 'title': track['title'],
- 'ext': u'mp3',
- 'description': track['description'],
- })
- return videos
-
-
-class InfoQIE(InfoExtractor):
- """Information extractor for infoq.com"""
- _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
- webpage = self._download_webpage(url, video_id=url)
- self.report_extraction(url)
-
- # Extract video URL
- mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
- video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
-
- # Extract title
- video_title = self._search_regex(r'contentTitle = "(.*?)";',
- webpage, u'title')
-
- # Extract description
- video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
- webpage, u'description', fatal=False)
-
- video_filename = video_url.split('/')[-1]
- video_id, extension = video_filename.split('.')
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'uploader': None,
- 'upload_date': None,
- 'title': video_title,
- 'ext': extension, # Extension is always(?) mp4, but seems to be flv
- 'thumbnail': None,
- 'description': video_description,
- }
-
- return [info]
-