X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2FInfoExtractors.py;h=672ef9eedb40b1f8aa7db86e0a20b591c88511f3;hb=95c29381eb8994370ee3924427ecc344ec891f63;hp=fb10c2ec4aa04c0b14848ca75a028f7fb2d5b7ed;hpb=934858ad86f5b628978d3bcdd7edd765d4590840;p=youtube-dl.git diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index fb10c2ec4..672ef9eed 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1,2781 +1,4 @@ -import base64 -import datetime -import itertools -import netrc -import os -import re -import socket -import time -import email.utils -import xml.etree.ElementTree -import random -import math -import operator -import hashlib -import binascii -import urllib +# Legacy file for backwards compatibility, use youtube_dl.extractor instead! -from .utils import * from .extractor.common import InfoExtractor, SearchInfoExtractor - -from .extractor.ard import ARDIE -from .extractor.arte import ArteTvIE -from .extractor.dailymotion import DailymotionIE -from .extractor.gametrailers import GametrailersIE -from .extractor.generic import GenericIE -from .extractor.metacafe import MetacafeIE -from .extractor.statigram import StatigramIE -from .extractor.photobucket import PhotobucketIE -from .extractor.vimeo import VimeoIE -from .extractor.yahoo import YahooIE, YahooSearchIE -from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE -from .extractor.zdf import ZDFIE - - - - - - - - - - - - - - - - - -class BlipTVUserIE(InfoExtractor): - """Information Extractor for blip.tv users.""" - - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' - _PAGE_SIZE = 12 - IE_NAME = u'blip.tv:user' - - def _real_extract(self, url): - # Extract username - mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - username = mobj.group(1) - - page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' - - page = self._download_webpage(url, username, u'Downloading user page') - mobj = re.search(r'data-users-id="([^"]+)"', page) - page_base = page_base % mobj.group(1) - - - # Download video ids using BlipTV Ajax calls. Result size per - # query is limited (currently to 12 videos) so we need to query - # page by page until there are no video ids - it means we got - # all of them. - - video_ids = [] - pagenum = 1 - - while True: - url = page_base + "&page=" + str(pagenum) - page = self._download_webpage(url, username, - u'Downloading video ids from page %d' % pagenum) - - # Extract video identifiers - ids_in_page = [] - - for mobj in re.finditer(r'href="/([^"]+)"', page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(unescapeHTML(mobj.group(1))) - - video_ids.extend(ids_in_page) - - # A little optimization - if current page is not - # "full", ie. does not contain PAGE_SIZE video ids then - # we can assume that this page is the last one - there - # are no more ids on further pages - no need to query - # again. - - if len(ids_in_page) < self._PAGE_SIZE: - break - - pagenum += 1 - - urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] - url_entries = [self.url_result(url, 'BlipTV') for url in urls] - return [self.playlist_result(url_entries, playlist_title = username)] - - -class DepositFilesIE(InfoExtractor): - """Information extractor for depositfiles.com""" - - _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' - - def _real_extract(self, url): - file_id = url.split('/')[-1] - # Rebuild url in english locale - url = 'http://depositfiles.com/en/files/' + file_id - - # Retrieve file webpage with 'Free download' button pressed - free_download_indication = { 'gateway_result' : '1' } - request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) - try: - self.report_download_webpage(file_id) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) - - # Search for the real file URL - mobj = re.search(r'