simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ def yield_preferredencoding():
+ try:
+ pref = locale.getpreferredencoding()
+ u'TEST'.encode(pref)
+ except:
+ pref = 'UTF-8'
+ while True:
+ yield pref
+ return yield_preferredencoding().next()
+
class DownloadError(Exception):
"""Download Error exception.
@staticmethod
def verify_url(url):
- """Verify a URL is valid and data could be downloaded."""
+ """Verify a URL is valid and data could be downloaded. Return real data URL."""
request = urllib2.Request(url, None, std_headers)
data = urllib2.urlopen(request)
data.read(1)
+ url = data.geturl()
data.close()
+ return url
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
def to_stdout(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False):
- print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
+ print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
sys.stdout.flush()
def to_stderr(self, message):
"""Print message to stderr."""
- print >>sys.stderr, message.encode(locale.getpreferredencoding())
+ print >>sys.stderr, message.encode(preferredencoding())
def fixed_template(self):
"""Checks if the output template is fixed."""
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
try:
- self.verify_url(info_dict['url'])
+ info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
raise UnavailableFormatError
# Forced printings
if self.params.get('forcetitle', False):
- print info_dict['title'].encode(locale.getpreferredencoding())
+ print info_dict['title'].encode(preferredencoding())
if self.params.get('forceurl', False):
- print info_dict['url'].encode(locale.getpreferredencoding())
+ print info_dict['url'].encode(preferredencoding())
return
filename = self.params['outtmpl'] % template_dict
except (ValueError, KeyError), err:
self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
- if self.params['nooverwrites'] and os.path.exists(filename):
+ if self.params.get('nooverwrites', False) and os.path.exists(filename):
self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
return
return
try:
- success = self._do_download(filename, info_dict['url'])
+ success = self._do_download(filename, info_dict['url'].encode('utf-8'))
except (OSError, IOError), err:
raise UnavailableFormatError
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
resume_len = os.path.getsize(filename)
else:
resume_len = 0
- if self.params['continuedl'] and resume_len != 0:
+ if self.params.get('continuedl', False) and resume_len != 0:
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
- _available_formats = ['22', '35', '18', '5', '17', '13'] # listed in order of priority for -b flag
+ _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
_video_extensions = {
'13': '3gp',
'17': 'mp4',
"""Report attempt to extract video information."""
self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
- def report_video_url(self, video_id, video_real_url):
- """Report extracted video URL."""
- self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
-
def report_unavailable_format(self, video_id, format):
"""Report extracted video URL."""
self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
- self.report_video_url(video_id, video_real_url)
# uploader
mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
video_uploader = urllib.unquote(mobj.group(1))
# title
- mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage)
+ mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
- video_title = urllib.unquote(mobj.group(1))
+ video_title = urllib.unquote_plus(mobj.group(1))
video_title = video_title.decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
- _MORE_PAGES_INDICATOR = r'>Next</a>'
+ _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
_youtube_ie = None
_max_youtube_results = 1000
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
return
- if self._MORE_PAGES_INDICATOR not in page:
+ if re.search(self._MORE_PAGES_INDICATOR, page) is None:
for id in video_ids:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
return
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
- _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
+ _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
import getpass
import optparse
+ # Function to update the program file with the latest version from bitbucket.org
+ def update_self(downloader, filename):
+ # Note: downloader only used for options
+ if not os.access (filename, os.W_OK):
+ sys.exit('ERROR: no write permissions on %s' % filename)
+
+ downloader.to_stdout('Updating to latest stable version...')
+ latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
+ latest_version = urllib.urlopen(latest_url).read().strip()
+ prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+ newcontent = urllib.urlopen(prog_url).read()
+ stream = open(filename, 'w')
+ stream.write(newcontent)
+ stream.close()
+ downloader.to_stdout('Updated to version %s' % latest_version)
+
# General configuration
urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
# Parse command line
parser = optparse.OptionParser(
usage='Usage: %prog [options] url...',
- version='2009.06.29',
+ version='INTERNAL',
conflict_handler='resolve',
)
action='help', help='print this help text and exit')
parser.add_option('-v', '--version',
action='version', help='print program version and exit')
+ parser.add_option('-U', '--update',
+ action='store_true', dest='update_self', help='update this program to latest stable version')
parser.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
parser.add_option('-r', '--rate-limit',
parser.add_option_group(filesystem)
(opts, args) = parser.parse_args()
-
+
# Batch file verification
batchurls = []
if opts.batchfile is not None:
all_urls = batchurls + args
# Conflicting, missing and erroneous options
- if len(all_urls) < 1:
- parser.error(u'you must provide at least one URL')
if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
'forcetitle': opts.gettitle,
'simulate': (opts.simulate or opts.geturl or opts.gettitle),
'format': opts.format,
- 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
+ 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'),
fd.add_info_extractor(youtube_pl_ie)
fd.add_info_extractor(metacafe_ie)
fd.add_info_extractor(youtube_ie)
+
+ # Update version
+ if opts.update_self:
+ update_self(fd, sys.argv[0])
+
+ # Maybe do nothing
+ if len(all_urls) < 1:
+ if not opts.update_self:
+ parser.error(u'you must provide at least one URL')
+ else:
+ sys.exit()
retcode = fd.download(all_urls)
sys.exit(retcode)