youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_unquote_plus,
  64     compat_urllib_request,
  65     compat_urlparse,
  66     compat_xpath,
  67 )
  68
  69 from .socks import (
  70     ProxyType,
  71     sockssocket,
  72 )
  73
  74
  75 def register_socks_protocols():
  76     # "Register" SOCKS protocols
  77     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  78     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  79     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  80         if scheme not in compat_urlparse.uses_netloc:
  81             compat_urlparse.uses_netloc.append(scheme)
  82
  83
  84 # This is not clearly defined otherwise
  85 compiled_regex_type = type(re.compile(''))
  86
  87
  88 def random_user_agent():
  89     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  90     _CHROME_VERSIONS = (
  91         '74.0.3729.129',
  92         '76.0.3780.3',
  93         '76.0.3780.2',
  94         '74.0.3729.128',
  95         '76.0.3780.1',
  96         '76.0.3780.0',
  97         '75.0.3770.15',
  98         '74.0.3729.127',
  99         '74.0.3729.126',
 100         '76.0.3779.1',
 101         '76.0.3779.0',
 102         '75.0.3770.14',
 103         '74.0.3729.125',
 104         '76.0.3778.1',
 105         '76.0.3778.0',
 106         '75.0.3770.13',
 107         '74.0.3729.124',
 108         '74.0.3729.123',
 109         '73.0.3683.121',
 110         '76.0.3777.1',
 111         '76.0.3777.0',
 112         '75.0.3770.12',
 113         '74.0.3729.122',
 114         '76.0.3776.4',
 115         '75.0.3770.11',
 116         '74.0.3729.121',
 117         '76.0.3776.3',
 118         '76.0.3776.2',
 119         '73.0.3683.120',
 120         '74.0.3729.120',
 121         '74.0.3729.119',
 122         '74.0.3729.118',
 123         '76.0.3776.1',
 124         '76.0.3776.0',
 125         '76.0.3775.5',
 126         '75.0.3770.10',
 127         '74.0.3729.117',
 128         '76.0.3775.4',
 129         '76.0.3775.3',
 130         '74.0.3729.116',
 131         '75.0.3770.9',
 132         '76.0.3775.2',
 133         '76.0.3775.1',
 134         '76.0.3775.0',
 135         '75.0.3770.8',
 136         '74.0.3729.115',
 137         '74.0.3729.114',
 138         '76.0.3774.1',
 139         '76.0.3774.0',
 140         '75.0.3770.7',
 141         '74.0.3729.113',
 142         '74.0.3729.112',
 143         '74.0.3729.111',
 144         '76.0.3773.1',
 145         '76.0.3773.0',
 146         '75.0.3770.6',
 147         '74.0.3729.110',
 148         '74.0.3729.109',
 149         '76.0.3772.1',
 150         '76.0.3772.0',
 151         '75.0.3770.5',
 152         '74.0.3729.108',
 153         '74.0.3729.107',
 154         '76.0.3771.1',
 155         '76.0.3771.0',
 156         '75.0.3770.4',
 157         '74.0.3729.106',
 158         '74.0.3729.105',
 159         '75.0.3770.3',
 160         '74.0.3729.104',
 161         '74.0.3729.103',
 162         '74.0.3729.102',
 163         '75.0.3770.2',
 164         '74.0.3729.101',
 165         '75.0.3770.1',
 166         '75.0.3770.0',
 167         '74.0.3729.100',
 168         '75.0.3769.5',
 169         '75.0.3769.4',
 170         '74.0.3729.99',
 171         '75.0.3769.3',
 172         '75.0.3769.2',
 173         '75.0.3768.6',
 174         '74.0.3729.98',
 175         '75.0.3769.1',
 176         '75.0.3769.0',
 177         '74.0.3729.97',
 178         '73.0.3683.119',
 179         '73.0.3683.118',
 180         '74.0.3729.96',
 181         '75.0.3768.5',
 182         '75.0.3768.4',
 183         '75.0.3768.3',
 184         '75.0.3768.2',
 185         '74.0.3729.95',
 186         '74.0.3729.94',
 187         '75.0.3768.1',
 188         '75.0.3768.0',
 189         '74.0.3729.93',
 190         '74.0.3729.92',
 191         '73.0.3683.117',
 192         '74.0.3729.91',
 193         '75.0.3766.3',
 194         '74.0.3729.90',
 195         '75.0.3767.2',
 196         '75.0.3767.1',
 197         '75.0.3767.0',
 198         '74.0.3729.89',
 199         '73.0.3683.116',
 200         '75.0.3766.2',
 201         '74.0.3729.88',
 202         '75.0.3766.1',
 203         '75.0.3766.0',
 204         '74.0.3729.87',
 205         '73.0.3683.115',
 206         '74.0.3729.86',
 207         '75.0.3765.1',
 208         '75.0.3765.0',
 209         '74.0.3729.85',
 210         '73.0.3683.114',
 211         '74.0.3729.84',
 212         '75.0.3764.1',
 213         '75.0.3764.0',
 214         '74.0.3729.83',
 215         '73.0.3683.113',
 216         '75.0.3763.2',
 217         '75.0.3761.4',
 218         '74.0.3729.82',
 219         '75.0.3763.1',
 220         '75.0.3763.0',
 221         '74.0.3729.81',
 222         '73.0.3683.112',
 223         '75.0.3762.1',
 224         '75.0.3762.0',
 225         '74.0.3729.80',
 226         '75.0.3761.3',
 227         '74.0.3729.79',
 228         '73.0.3683.111',
 229         '75.0.3761.2',
 230         '74.0.3729.78',
 231         '74.0.3729.77',
 232         '75.0.3761.1',
 233         '75.0.3761.0',
 234         '73.0.3683.110',
 235         '74.0.3729.76',
 236         '74.0.3729.75',
 237         '75.0.3760.0',
 238         '74.0.3729.74',
 239         '75.0.3759.8',
 240         '75.0.3759.7',
 241         '75.0.3759.6',
 242         '74.0.3729.73',
 243         '75.0.3759.5',
 244         '74.0.3729.72',
 245         '73.0.3683.109',
 246         '75.0.3759.4',
 247         '75.0.3759.3',
 248         '74.0.3729.71',
 249         '75.0.3759.2',
 250         '74.0.3729.70',
 251         '73.0.3683.108',
 252         '74.0.3729.69',
 253         '75.0.3759.1',
 254         '75.0.3759.0',
 255         '74.0.3729.68',
 256         '73.0.3683.107',
 257         '74.0.3729.67',
 258         '75.0.3758.1',
 259         '75.0.3758.0',
 260         '74.0.3729.66',
 261         '73.0.3683.106',
 262         '74.0.3729.65',
 263         '75.0.3757.1',
 264         '75.0.3757.0',
 265         '74.0.3729.64',
 266         '73.0.3683.105',
 267         '74.0.3729.63',
 268         '75.0.3756.1',
 269         '75.0.3756.0',
 270         '74.0.3729.62',
 271         '73.0.3683.104',
 272         '75.0.3755.3',
 273         '75.0.3755.2',
 274         '73.0.3683.103',
 275         '75.0.3755.1',
 276         '75.0.3755.0',
 277         '74.0.3729.61',
 278         '73.0.3683.102',
 279         '74.0.3729.60',
 280         '75.0.3754.2',
 281         '74.0.3729.59',
 282         '75.0.3753.4',
 283         '74.0.3729.58',
 284         '75.0.3754.1',
 285         '75.0.3754.0',
 286         '74.0.3729.57',
 287         '73.0.3683.101',
 288         '75.0.3753.3',
 289         '75.0.3752.2',
 290         '75.0.3753.2',
 291         '74.0.3729.56',
 292         '75.0.3753.1',
 293         '75.0.3753.0',
 294         '74.0.3729.55',
 295         '73.0.3683.100',
 296         '74.0.3729.54',
 297         '75.0.3752.1',
 298         '75.0.3752.0',
 299         '74.0.3729.53',
 300         '73.0.3683.99',
 301         '74.0.3729.52',
 302         '75.0.3751.1',
 303         '75.0.3751.0',
 304         '74.0.3729.51',
 305         '73.0.3683.98',
 306         '74.0.3729.50',
 307         '75.0.3750.0',
 308         '74.0.3729.49',
 309         '74.0.3729.48',
 310         '74.0.3729.47',
 311         '75.0.3749.3',
 312         '74.0.3729.46',
 313         '73.0.3683.97',
 314         '75.0.3749.2',
 315         '74.0.3729.45',
 316         '75.0.3749.1',
 317         '75.0.3749.0',
 318         '74.0.3729.44',
 319         '73.0.3683.96',
 320         '74.0.3729.43',
 321         '74.0.3729.42',
 322         '75.0.3748.1',
 323         '75.0.3748.0',
 324         '74.0.3729.41',
 325         '75.0.3747.1',
 326         '73.0.3683.95',
 327         '75.0.3746.4',
 328         '74.0.3729.40',
 329         '74.0.3729.39',
 330         '75.0.3747.0',
 331         '75.0.3746.3',
 332         '75.0.3746.2',
 333         '74.0.3729.38',
 334         '75.0.3746.1',
 335         '75.0.3746.0',
 336         '74.0.3729.37',
 337         '73.0.3683.94',
 338         '75.0.3745.5',
 339         '75.0.3745.4',
 340         '75.0.3745.3',
 341         '75.0.3745.2',
 342         '74.0.3729.36',
 343         '75.0.3745.1',
 344         '75.0.3745.0',
 345         '75.0.3744.2',
 346         '74.0.3729.35',
 347         '73.0.3683.93',
 348         '74.0.3729.34',
 349         '75.0.3744.1',
 350         '75.0.3744.0',
 351         '74.0.3729.33',
 352         '73.0.3683.92',
 353         '74.0.3729.32',
 354         '74.0.3729.31',
 355         '73.0.3683.91',
 356         '75.0.3741.2',
 357         '75.0.3740.5',
 358         '74.0.3729.30',
 359         '75.0.3741.1',
 360         '75.0.3741.0',
 361         '74.0.3729.29',
 362         '75.0.3740.4',
 363         '73.0.3683.90',
 364         '74.0.3729.28',
 365         '75.0.3740.3',
 366         '73.0.3683.89',
 367         '75.0.3740.2',
 368         '74.0.3729.27',
 369         '75.0.3740.1',
 370         '75.0.3740.0',
 371         '74.0.3729.26',
 372         '73.0.3683.88',
 373         '73.0.3683.87',
 374         '74.0.3729.25',
 375         '75.0.3739.1',
 376         '75.0.3739.0',
 377         '73.0.3683.86',
 378         '74.0.3729.24',
 379         '73.0.3683.85',
 380         '75.0.3738.4',
 381         '75.0.3738.3',
 382         '75.0.3738.2',
 383         '75.0.3738.1',
 384         '75.0.3738.0',
 385         '74.0.3729.23',
 386         '73.0.3683.84',
 387         '74.0.3729.22',
 388         '74.0.3729.21',
 389         '75.0.3737.1',
 390         '75.0.3737.0',
 391         '74.0.3729.20',
 392         '73.0.3683.83',
 393         '74.0.3729.19',
 394         '75.0.3736.1',
 395         '75.0.3736.0',
 396         '74.0.3729.18',
 397         '73.0.3683.82',
 398         '74.0.3729.17',
 399         '75.0.3735.1',
 400         '75.0.3735.0',
 401         '74.0.3729.16',
 402         '73.0.3683.81',
 403         '75.0.3734.1',
 404         '75.0.3734.0',
 405         '74.0.3729.15',
 406         '73.0.3683.80',
 407         '74.0.3729.14',
 408         '75.0.3733.1',
 409         '75.0.3733.0',
 410         '75.0.3732.1',
 411         '74.0.3729.13',
 412         '74.0.3729.12',
 413         '73.0.3683.79',
 414         '74.0.3729.11',
 415         '75.0.3732.0',
 416         '74.0.3729.10',
 417         '73.0.3683.78',
 418         '74.0.3729.9',
 419         '74.0.3729.8',
 420         '74.0.3729.7',
 421         '75.0.3731.3',
 422         '75.0.3731.2',
 423         '75.0.3731.0',
 424         '74.0.3729.6',
 425         '73.0.3683.77',
 426         '73.0.3683.76',
 427         '75.0.3730.5',
 428         '75.0.3730.4',
 429         '73.0.3683.75',
 430         '74.0.3729.5',
 431         '73.0.3683.74',
 432         '75.0.3730.3',
 433         '75.0.3730.2',
 434         '74.0.3729.4',
 435         '73.0.3683.73',
 436         '73.0.3683.72',
 437         '75.0.3730.1',
 438         '75.0.3730.0',
 439         '74.0.3729.3',
 440         '73.0.3683.71',
 441         '74.0.3729.2',
 442         '73.0.3683.70',
 443         '74.0.3729.1',
 444         '74.0.3729.0',
 445         '74.0.3726.4',
 446         '73.0.3683.69',
 447         '74.0.3726.3',
 448         '74.0.3728.0',
 449         '74.0.3726.2',
 450         '73.0.3683.68',
 451         '74.0.3726.1',
 452         '74.0.3726.0',
 453         '74.0.3725.4',
 454         '73.0.3683.67',
 455         '73.0.3683.66',
 456         '74.0.3725.3',
 457         '74.0.3725.2',
 458         '74.0.3725.1',
 459         '74.0.3724.8',
 460         '74.0.3725.0',
 461         '73.0.3683.65',
 462         '74.0.3724.7',
 463         '74.0.3724.6',
 464         '74.0.3724.5',
 465         '74.0.3724.4',
 466         '74.0.3724.3',
 467         '74.0.3724.2',
 468         '74.0.3724.1',
 469         '74.0.3724.0',
 470         '73.0.3683.64',
 471         '74.0.3723.1',
 472         '74.0.3723.0',
 473         '73.0.3683.63',
 474         '74.0.3722.1',
 475         '74.0.3722.0',
 476         '73.0.3683.62',
 477         '74.0.3718.9',
 478         '74.0.3702.3',
 479         '74.0.3721.3',
 480         '74.0.3721.2',
 481         '74.0.3721.1',
 482         '74.0.3721.0',
 483         '74.0.3720.6',
 484         '73.0.3683.61',
 485         '72.0.3626.122',
 486         '73.0.3683.60',
 487         '74.0.3720.5',
 488         '72.0.3626.121',
 489         '74.0.3718.8',
 490         '74.0.3720.4',
 491         '74.0.3720.3',
 492         '74.0.3718.7',
 493         '74.0.3720.2',
 494         '74.0.3720.1',
 495         '74.0.3720.0',
 496         '74.0.3718.6',
 497         '74.0.3719.5',
 498         '73.0.3683.59',
 499         '74.0.3718.5',
 500         '74.0.3718.4',
 501         '74.0.3719.4',
 502         '74.0.3719.3',
 503         '74.0.3719.2',
 504         '74.0.3719.1',
 505         '73.0.3683.58',
 506         '74.0.3719.0',
 507         '73.0.3683.57',
 508         '73.0.3683.56',
 509         '74.0.3718.3',
 510         '73.0.3683.55',
 511         '74.0.3718.2',
 512         '74.0.3718.1',
 513         '74.0.3718.0',
 514         '73.0.3683.54',
 515         '74.0.3717.2',
 516         '73.0.3683.53',
 517         '74.0.3717.1',
 518         '74.0.3717.0',
 519         '73.0.3683.52',
 520         '74.0.3716.1',
 521         '74.0.3716.0',
 522         '73.0.3683.51',
 523         '74.0.3715.1',
 524         '74.0.3715.0',
 525         '73.0.3683.50',
 526         '74.0.3711.2',
 527         '74.0.3714.2',
 528         '74.0.3713.3',
 529         '74.0.3714.1',
 530         '74.0.3714.0',
 531         '73.0.3683.49',
 532         '74.0.3713.1',
 533         '74.0.3713.0',
 534         '72.0.3626.120',
 535         '73.0.3683.48',
 536         '74.0.3712.2',
 537         '74.0.3712.1',
 538         '74.0.3712.0',
 539         '73.0.3683.47',
 540         '72.0.3626.119',
 541         '73.0.3683.46',
 542         '74.0.3710.2',
 543         '72.0.3626.118',
 544         '74.0.3711.1',
 545         '74.0.3711.0',
 546         '73.0.3683.45',
 547         '72.0.3626.117',
 548         '74.0.3710.1',
 549         '74.0.3710.0',
 550         '73.0.3683.44',
 551         '72.0.3626.116',
 552         '74.0.3709.1',
 553         '74.0.3709.0',
 554         '74.0.3704.9',
 555         '73.0.3683.43',
 556         '72.0.3626.115',
 557         '74.0.3704.8',
 558         '74.0.3704.7',
 559         '74.0.3708.0',
 560         '74.0.3706.7',
 561         '74.0.3704.6',
 562         '73.0.3683.42',
 563         '72.0.3626.114',
 564         '74.0.3706.6',
 565         '72.0.3626.113',
 566         '74.0.3704.5',
 567         '74.0.3706.5',
 568         '74.0.3706.4',
 569         '74.0.3706.3',
 570         '74.0.3706.2',
 571         '74.0.3706.1',
 572         '74.0.3706.0',
 573         '73.0.3683.41',
 574         '72.0.3626.112',
 575         '74.0.3705.1',
 576         '74.0.3705.0',
 577         '73.0.3683.40',
 578         '72.0.3626.111',
 579         '73.0.3683.39',
 580         '74.0.3704.4',
 581         '73.0.3683.38',
 582         '74.0.3704.3',
 583         '74.0.3704.2',
 584         '74.0.3704.1',
 585         '74.0.3704.0',
 586         '73.0.3683.37',
 587         '72.0.3626.110',
 588         '72.0.3626.109',
 589         '74.0.3703.3',
 590         '74.0.3703.2',
 591         '73.0.3683.36',
 592         '74.0.3703.1',
 593         '74.0.3703.0',
 594         '73.0.3683.35',
 595         '72.0.3626.108',
 596         '74.0.3702.2',
 597         '74.0.3699.3',
 598         '74.0.3702.1',
 599         '74.0.3702.0',
 600         '73.0.3683.34',
 601         '72.0.3626.107',
 602         '73.0.3683.33',
 603         '74.0.3701.1',
 604         '74.0.3701.0',
 605         '73.0.3683.32',
 606         '73.0.3683.31',
 607         '72.0.3626.105',
 608         '74.0.3700.1',
 609         '74.0.3700.0',
 610         '73.0.3683.29',
 611         '72.0.3626.103',
 612         '74.0.3699.2',
 613         '74.0.3699.1',
 614         '74.0.3699.0',
 615         '73.0.3683.28',
 616         '72.0.3626.102',
 617         '73.0.3683.27',
 618         '73.0.3683.26',
 619         '74.0.3698.0',
 620         '74.0.3696.2',
 621         '72.0.3626.101',
 622         '73.0.3683.25',
 623         '74.0.3696.1',
 624         '74.0.3696.0',
 625         '74.0.3694.8',
 626         '72.0.3626.100',
 627         '74.0.3694.7',
 628         '74.0.3694.6',
 629         '74.0.3694.5',
 630         '74.0.3694.4',
 631         '72.0.3626.99',
 632         '72.0.3626.98',
 633         '74.0.3694.3',
 634         '73.0.3683.24',
 635         '72.0.3626.97',
 636         '72.0.3626.96',
 637         '72.0.3626.95',
 638         '73.0.3683.23',
 639         '72.0.3626.94',
 640         '73.0.3683.22',
 641         '73.0.3683.21',
 642         '72.0.3626.93',
 643         '74.0.3694.2',
 644         '72.0.3626.92',
 645         '74.0.3694.1',
 646         '74.0.3694.0',
 647         '74.0.3693.6',
 648         '73.0.3683.20',
 649         '72.0.3626.91',
 650         '74.0.3693.5',
 651         '74.0.3693.4',
 652         '74.0.3693.3',
 653         '74.0.3693.2',
 654         '73.0.3683.19',
 655         '74.0.3693.1',
 656         '74.0.3693.0',
 657         '73.0.3683.18',
 658         '72.0.3626.90',
 659         '74.0.3692.1',
 660         '74.0.3692.0',
 661         '73.0.3683.17',
 662         '72.0.3626.89',
 663         '74.0.3687.3',
 664         '74.0.3691.1',
 665         '74.0.3691.0',
 666         '73.0.3683.16',
 667         '72.0.3626.88',
 668         '72.0.3626.87',
 669         '73.0.3683.15',
 670         '74.0.3690.1',
 671         '74.0.3690.0',
 672         '73.0.3683.14',
 673         '72.0.3626.86',
 674         '73.0.3683.13',
 675         '73.0.3683.12',
 676         '74.0.3689.1',
 677         '74.0.3689.0',
 678         '73.0.3683.11',
 679         '72.0.3626.85',
 680         '73.0.3683.10',
 681         '72.0.3626.84',
 682         '73.0.3683.9',
 683         '74.0.3688.1',
 684         '74.0.3688.0',
 685         '73.0.3683.8',
 686         '72.0.3626.83',
 687         '74.0.3687.2',
 688         '74.0.3687.1',
 689         '74.0.3687.0',
 690         '73.0.3683.7',
 691         '72.0.3626.82',
 692         '74.0.3686.4',
 693         '72.0.3626.81',
 694         '74.0.3686.3',
 695         '74.0.3686.2',
 696         '74.0.3686.1',
 697         '74.0.3686.0',
 698         '73.0.3683.6',
 699         '72.0.3626.80',
 700         '74.0.3685.1',
 701         '74.0.3685.0',
 702         '73.0.3683.5',
 703         '72.0.3626.79',
 704         '74.0.3684.1',
 705         '74.0.3684.0',
 706         '73.0.3683.4',
 707         '72.0.3626.78',
 708         '72.0.3626.77',
 709         '73.0.3683.3',
 710         '73.0.3683.2',
 711         '72.0.3626.76',
 712         '73.0.3683.1',
 713         '73.0.3683.0',
 714         '72.0.3626.75',
 715         '71.0.3578.141',
 716         '73.0.3682.1',
 717         '73.0.3682.0',
 718         '72.0.3626.74',
 719         '71.0.3578.140',
 720         '73.0.3681.4',
 721         '73.0.3681.3',
 722         '73.0.3681.2',
 723         '73.0.3681.1',
 724         '73.0.3681.0',
 725         '72.0.3626.73',
 726         '71.0.3578.139',
 727         '72.0.3626.72',
 728         '72.0.3626.71',
 729         '73.0.3680.1',
 730         '73.0.3680.0',
 731         '72.0.3626.70',
 732         '71.0.3578.138',
 733         '73.0.3678.2',
 734         '73.0.3679.1',
 735         '73.0.3679.0',
 736         '72.0.3626.69',
 737         '71.0.3578.137',
 738         '73.0.3678.1',
 739         '73.0.3678.0',
 740         '71.0.3578.136',
 741         '73.0.3677.1',
 742         '73.0.3677.0',
 743         '72.0.3626.68',
 744         '72.0.3626.67',
 745         '71.0.3578.135',
 746         '73.0.3676.1',
 747         '73.0.3676.0',
 748         '73.0.3674.2',
 749         '72.0.3626.66',
 750         '71.0.3578.134',
 751         '73.0.3674.1',
 752         '73.0.3674.0',
 753         '72.0.3626.65',
 754         '71.0.3578.133',
 755         '73.0.3673.2',
 756         '73.0.3673.1',
 757         '73.0.3673.0',
 758         '72.0.3626.64',
 759         '71.0.3578.132',
 760         '72.0.3626.63',
 761         '72.0.3626.62',
 762         '72.0.3626.61',
 763         '72.0.3626.60',
 764         '73.0.3672.1',
 765         '73.0.3672.0',
 766         '72.0.3626.59',
 767         '71.0.3578.131',
 768         '73.0.3671.3',
 769         '73.0.3671.2',
 770         '73.0.3671.1',
 771         '73.0.3671.0',
 772         '72.0.3626.58',
 773         '71.0.3578.130',
 774         '73.0.3670.1',
 775         '73.0.3670.0',
 776         '72.0.3626.57',
 777         '71.0.3578.129',
 778         '73.0.3669.1',
 779         '73.0.3669.0',
 780         '72.0.3626.56',
 781         '71.0.3578.128',
 782         '73.0.3668.2',
 783         '73.0.3668.1',
 784         '73.0.3668.0',
 785         '72.0.3626.55',
 786         '71.0.3578.127',
 787         '73.0.3667.2',
 788         '73.0.3667.1',
 789         '73.0.3667.0',
 790         '72.0.3626.54',
 791         '71.0.3578.126',
 792         '73.0.3666.1',
 793         '73.0.3666.0',
 794         '72.0.3626.53',
 795         '71.0.3578.125',
 796         '73.0.3665.4',
 797         '73.0.3665.3',
 798         '72.0.3626.52',
 799         '73.0.3665.2',
 800         '73.0.3664.4',
 801         '73.0.3665.1',
 802         '73.0.3665.0',
 803         '72.0.3626.51',
 804         '71.0.3578.124',
 805         '72.0.3626.50',
 806         '73.0.3664.3',
 807         '73.0.3664.2',
 808         '73.0.3664.1',
 809         '73.0.3664.0',
 810         '73.0.3663.2',
 811         '72.0.3626.49',
 812         '71.0.3578.123',
 813         '73.0.3663.1',
 814         '73.0.3663.0',
 815         '72.0.3626.48',
 816         '71.0.3578.122',
 817         '73.0.3662.1',
 818         '73.0.3662.0',
 819         '72.0.3626.47',
 820         '71.0.3578.121',
 821         '73.0.3661.1',
 822         '72.0.3626.46',
 823         '73.0.3661.0',
 824         '72.0.3626.45',
 825         '71.0.3578.120',
 826         '73.0.3660.2',
 827         '73.0.3660.1',
 828         '73.0.3660.0',
 829         '72.0.3626.44',
 830         '71.0.3578.119',
 831         '73.0.3659.1',
 832         '73.0.3659.0',
 833         '72.0.3626.43',
 834         '71.0.3578.118',
 835         '73.0.3658.1',
 836         '73.0.3658.0',
 837         '72.0.3626.42',
 838         '71.0.3578.117',
 839         '73.0.3657.1',
 840         '73.0.3657.0',
 841         '72.0.3626.41',
 842         '71.0.3578.116',
 843         '73.0.3656.1',
 844         '73.0.3656.0',
 845         '72.0.3626.40',
 846         '71.0.3578.115',
 847         '73.0.3655.1',
 848         '73.0.3655.0',
 849         '72.0.3626.39',
 850         '71.0.3578.114',
 851         '73.0.3654.1',
 852         '73.0.3654.0',
 853         '72.0.3626.38',
 854         '71.0.3578.113',
 855         '73.0.3653.1',
 856         '73.0.3653.0',
 857         '72.0.3626.37',
 858         '71.0.3578.112',
 859         '73.0.3652.1',
 860         '73.0.3652.0',
 861         '72.0.3626.36',
 862         '71.0.3578.111',
 863         '73.0.3651.1',
 864         '73.0.3651.0',
 865         '72.0.3626.35',
 866         '71.0.3578.110',
 867         '73.0.3650.1',
 868         '73.0.3650.0',
 869         '72.0.3626.34',
 870         '71.0.3578.109',
 871         '73.0.3649.1',
 872         '73.0.3649.0',
 873         '72.0.3626.33',
 874         '71.0.3578.108',
 875         '73.0.3648.2',
 876         '73.0.3648.1',
 877         '73.0.3648.0',
 878         '72.0.3626.32',
 879         '71.0.3578.107',
 880         '73.0.3647.2',
 881         '73.0.3647.1',
 882         '73.0.3647.0',
 883         '72.0.3626.31',
 884         '71.0.3578.106',
 885         '73.0.3635.3',
 886         '73.0.3646.2',
 887         '73.0.3646.1',
 888         '73.0.3646.0',
 889         '72.0.3626.30',
 890         '71.0.3578.105',
 891         '72.0.3626.29',
 892         '73.0.3645.2',
 893         '73.0.3645.1',
 894         '73.0.3645.0',
 895         '72.0.3626.28',
 896         '71.0.3578.104',
 897         '72.0.3626.27',
 898         '72.0.3626.26',
 899         '72.0.3626.25',
 900         '72.0.3626.24',
 901         '73.0.3644.0',
 902         '73.0.3643.2',
 903         '72.0.3626.23',
 904         '71.0.3578.103',
 905         '73.0.3643.1',
 906         '73.0.3643.0',
 907         '72.0.3626.22',
 908         '71.0.3578.102',
 909         '73.0.3642.1',
 910         '73.0.3642.0',
 911         '72.0.3626.21',
 912         '71.0.3578.101',
 913         '73.0.3641.1',
 914         '73.0.3641.0',
 915         '72.0.3626.20',
 916         '71.0.3578.100',
 917         '72.0.3626.19',
 918         '73.0.3640.1',
 919         '73.0.3640.0',
 920         '72.0.3626.18',
 921         '73.0.3639.1',
 922         '71.0.3578.99',
 923         '73.0.3639.0',
 924         '72.0.3626.17',
 925         '73.0.3638.2',
 926         '72.0.3626.16',
 927         '73.0.3638.1',
 928         '73.0.3638.0',
 929         '72.0.3626.15',
 930         '71.0.3578.98',
 931         '73.0.3635.2',
 932         '71.0.3578.97',
 933         '73.0.3637.1',
 934         '73.0.3637.0',
 935         '72.0.3626.14',
 936         '71.0.3578.96',
 937         '71.0.3578.95',
 938         '72.0.3626.13',
 939         '71.0.3578.94',
 940         '73.0.3636.2',
 941         '71.0.3578.93',
 942         '73.0.3636.1',
 943         '73.0.3636.0',
 944         '72.0.3626.12',
 945         '71.0.3578.92',
 946         '73.0.3635.1',
 947         '73.0.3635.0',
 948         '72.0.3626.11',
 949         '71.0.3578.91',
 950         '73.0.3634.2',
 951         '73.0.3634.1',
 952         '73.0.3634.0',
 953         '72.0.3626.10',
 954         '71.0.3578.90',
 955         '71.0.3578.89',
 956         '73.0.3633.2',
 957         '73.0.3633.1',
 958         '73.0.3633.0',
 959         '72.0.3610.4',
 960         '72.0.3626.9',
 961         '71.0.3578.88',
 962         '73.0.3632.5',
 963         '73.0.3632.4',
 964         '73.0.3632.3',
 965         '73.0.3632.2',
 966         '73.0.3632.1',
 967         '73.0.3632.0',
 968         '72.0.3626.8',
 969         '71.0.3578.87',
 970         '73.0.3631.2',
 971         '73.0.3631.1',
 972         '73.0.3631.0',
 973         '72.0.3626.7',
 974         '71.0.3578.86',
 975         '72.0.3626.6',
 976         '73.0.3630.1',
 977         '73.0.3630.0',
 978         '72.0.3626.5',
 979         '71.0.3578.85',
 980         '72.0.3626.4',
 981         '73.0.3628.3',
 982         '73.0.3628.2',
 983         '73.0.3629.1',
 984         '73.0.3629.0',
 985         '72.0.3626.3',
 986         '71.0.3578.84',
 987         '73.0.3628.1',
 988         '73.0.3628.0',
 989         '71.0.3578.83',
 990         '73.0.3627.1',
 991         '73.0.3627.0',
 992         '72.0.3626.2',
 993         '71.0.3578.82',
 994         '71.0.3578.81',
 995         '71.0.3578.80',
 996         '72.0.3626.1',
 997         '72.0.3626.0',
 998         '71.0.3578.79',
 999         '70.0.3538.124',
1000         '71.0.3578.78',
1001         '72.0.3623.4',
1002         '72.0.3625.2',
1003         '72.0.3625.1',
1004         '72.0.3625.0',
1005         '71.0.3578.77',
1006         '70.0.3538.123',
1007         '72.0.3624.4',
1008         '72.0.3624.3',
1009         '72.0.3624.2',
1010         '71.0.3578.76',
1011         '72.0.3624.1',
1012         '72.0.3624.0',
1013         '72.0.3623.3',
1014         '71.0.3578.75',
1015         '70.0.3538.122',
1016         '71.0.3578.74',
1017         '72.0.3623.2',
1018         '72.0.3610.3',
1019         '72.0.3623.1',
1020         '72.0.3623.0',
1021         '72.0.3622.3',
1022         '72.0.3622.2',
1023         '71.0.3578.73',
1024         '70.0.3538.121',
1025         '72.0.3622.1',
1026         '72.0.3622.0',
1027         '71.0.3578.72',
1028         '70.0.3538.120',
1029         '72.0.3621.1',
1030         '72.0.3621.0',
1031         '71.0.3578.71',
1032         '70.0.3538.119',
1033         '72.0.3620.1',
1034         '72.0.3620.0',
1035         '71.0.3578.70',
1036         '70.0.3538.118',
1037         '71.0.3578.69',
1038         '72.0.3619.1',
1039         '72.0.3619.0',
1040         '71.0.3578.68',
1041         '70.0.3538.117',
1042         '71.0.3578.67',
1043         '72.0.3618.1',
1044         '72.0.3618.0',
1045         '71.0.3578.66',
1046         '70.0.3538.116',
1047         '72.0.3617.1',
1048         '72.0.3617.0',
1049         '71.0.3578.65',
1050         '70.0.3538.115',
1051         '72.0.3602.3',
1052         '71.0.3578.64',
1053         '72.0.3616.1',
1054         '72.0.3616.0',
1055         '71.0.3578.63',
1056         '70.0.3538.114',
1057         '71.0.3578.62',
1058         '72.0.3615.1',
1059         '72.0.3615.0',
1060         '71.0.3578.61',
1061         '70.0.3538.113',
1062         '72.0.3614.1',
1063         '72.0.3614.0',
1064         '71.0.3578.60',
1065         '70.0.3538.112',
1066         '72.0.3613.1',
1067         '72.0.3613.0',
1068         '71.0.3578.59',
1069         '70.0.3538.111',
1070         '72.0.3612.2',
1071         '72.0.3612.1',
1072         '72.0.3612.0',
1073         '70.0.3538.110',
1074         '71.0.3578.58',
1075         '70.0.3538.109',
1076         '72.0.3611.2',
1077         '72.0.3611.1',
1078         '72.0.3611.0',
1079         '71.0.3578.57',
1080         '70.0.3538.108',
1081         '72.0.3610.2',
1082         '71.0.3578.56',
1083         '71.0.3578.55',
1084         '72.0.3610.1',
1085         '72.0.3610.0',
1086         '71.0.3578.54',
1087         '70.0.3538.107',
1088         '71.0.3578.53',
1089         '72.0.3609.3',
1090         '71.0.3578.52',
1091         '72.0.3609.2',
1092         '71.0.3578.51',
1093         '72.0.3608.5',
1094         '72.0.3609.1',
1095         '72.0.3609.0',
1096         '71.0.3578.50',
1097         '70.0.3538.106',
1098         '72.0.3608.4',
1099         '72.0.3608.3',
1100         '72.0.3608.2',
1101         '71.0.3578.49',
1102         '72.0.3608.1',
1103         '72.0.3608.0',
1104         '70.0.3538.105',
1105         '71.0.3578.48',
1106         '72.0.3607.1',
1107         '72.0.3607.0',
1108         '71.0.3578.47',
1109         '70.0.3538.104',
1110         '72.0.3606.2',
1111         '72.0.3606.1',
1112         '72.0.3606.0',
1113         '71.0.3578.46',
1114         '70.0.3538.103',
1115         '70.0.3538.102',
1116         '72.0.3605.3',
1117         '72.0.3605.2',
1118         '72.0.3605.1',
1119         '72.0.3605.0',
1120         '71.0.3578.45',
1121         '70.0.3538.101',
1122         '71.0.3578.44',
1123         '71.0.3578.43',
1124         '70.0.3538.100',
1125         '70.0.3538.99',
1126         '71.0.3578.42',
1127         '72.0.3604.1',
1128         '72.0.3604.0',
1129         '71.0.3578.41',
1130         '70.0.3538.98',
1131         '71.0.3578.40',
1132         '72.0.3603.2',
1133         '72.0.3603.1',
1134         '72.0.3603.0',
1135         '71.0.3578.39',
1136         '70.0.3538.97',
1137         '72.0.3602.2',
1138         '71.0.3578.38',
1139         '71.0.3578.37',
1140         '72.0.3602.1',
1141         '72.0.3602.0',
1142         '71.0.3578.36',
1143         '70.0.3538.96',
1144         '72.0.3601.1',
1145         '72.0.3601.0',
1146         '71.0.3578.35',
1147         '70.0.3538.95',
1148         '72.0.3600.1',
1149         '72.0.3600.0',
1150         '71.0.3578.34',
1151         '70.0.3538.94',
1152         '72.0.3599.3',
1153         '72.0.3599.2',
1154         '72.0.3599.1',
1155         '72.0.3599.0',
1156         '71.0.3578.33',
1157         '70.0.3538.93',
1158         '72.0.3598.1',
1159         '72.0.3598.0',
1160         '71.0.3578.32',
1161         '70.0.3538.87',
1162         '72.0.3597.1',
1163         '72.0.3597.0',
1164         '72.0.3596.2',
1165         '71.0.3578.31',
1166         '70.0.3538.86',
1167         '71.0.3578.30',
1168         '71.0.3578.29',
1169         '72.0.3596.1',
1170         '72.0.3596.0',
1171         '71.0.3578.28',
1172         '70.0.3538.85',
1173         '72.0.3595.2',
1174         '72.0.3591.3',
1175         '72.0.3595.1',
1176         '72.0.3595.0',
1177         '71.0.3578.27',
1178         '70.0.3538.84',
1179         '72.0.3594.1',
1180         '72.0.3594.0',
1181         '71.0.3578.26',
1182         '70.0.3538.83',
1183         '72.0.3593.2',
1184         '72.0.3593.1',
1185         '72.0.3593.0',
1186         '71.0.3578.25',
1187         '70.0.3538.82',
1188         '72.0.3589.3',
1189         '72.0.3592.2',
1190         '72.0.3592.1',
1191         '72.0.3592.0',
1192         '71.0.3578.24',
1193         '72.0.3589.2',
1194         '70.0.3538.81',
1195         '70.0.3538.80',
1196         '72.0.3591.2',
1197         '72.0.3591.1',
1198         '72.0.3591.0',
1199         '71.0.3578.23',
1200         '70.0.3538.79',
1201         '71.0.3578.22',
1202         '72.0.3590.1',
1203         '72.0.3590.0',
1204         '71.0.3578.21',
1205         '70.0.3538.78',
1206         '70.0.3538.77',
1207         '72.0.3589.1',
1208         '72.0.3589.0',
1209         '71.0.3578.20',
1210         '70.0.3538.76',
1211         '71.0.3578.19',
1212         '70.0.3538.75',
1213         '72.0.3588.1',
1214         '72.0.3588.0',
1215         '71.0.3578.18',
1216         '70.0.3538.74',
1217         '72.0.3586.2',
1218         '72.0.3587.0',
1219         '71.0.3578.17',
1220         '70.0.3538.73',
1221         '72.0.3586.1',
1222         '72.0.3586.0',
1223         '71.0.3578.16',
1224         '70.0.3538.72',
1225         '72.0.3585.1',
1226         '72.0.3585.0',
1227         '71.0.3578.15',
1228         '70.0.3538.71',
1229         '71.0.3578.14',
1230         '72.0.3584.1',
1231         '72.0.3584.0',
1232         '71.0.3578.13',
1233         '70.0.3538.70',
1234         '72.0.3583.2',
1235         '71.0.3578.12',
1236         '72.0.3583.1',
1237         '72.0.3583.0',
1238         '71.0.3578.11',
1239         '70.0.3538.69',
1240         '71.0.3578.10',
1241         '72.0.3582.0',
1242         '72.0.3581.4',
1243         '71.0.3578.9',
1244         '70.0.3538.67',
1245         '72.0.3581.3',
1246         '72.0.3581.2',
1247         '72.0.3581.1',
1248         '72.0.3581.0',
1249         '71.0.3578.8',
1250         '70.0.3538.66',
1251         '72.0.3580.1',
1252         '72.0.3580.0',
1253         '71.0.3578.7',
1254         '70.0.3538.65',
1255         '71.0.3578.6',
1256         '72.0.3579.1',
1257         '72.0.3579.0',
1258         '71.0.3578.5',
1259         '70.0.3538.64',
1260         '71.0.3578.4',
1261         '71.0.3578.3',
1262         '71.0.3578.2',
1263         '71.0.3578.1',
1264         '71.0.3578.0',
1265         '70.0.3538.63',
1266         '69.0.3497.128',
1267         '70.0.3538.62',
1268         '70.0.3538.61',
1269         '70.0.3538.60',
1270         '70.0.3538.59',
1271         '71.0.3577.1',
1272         '71.0.3577.0',
1273         '70.0.3538.58',
1274         '69.0.3497.127',
1275         '71.0.3576.2',
1276         '71.0.3576.1',
1277         '71.0.3576.0',
1278         '70.0.3538.57',
1279         '70.0.3538.56',
1280         '71.0.3575.2',
1281         '70.0.3538.55',
1282         '69.0.3497.126',
1283         '70.0.3538.54',
1284         '71.0.3575.1',
1285         '71.0.3575.0',
1286         '71.0.3574.1',
1287         '71.0.3574.0',
1288         '70.0.3538.53',
1289         '69.0.3497.125',
1290         '70.0.3538.52',
1291         '71.0.3573.1',
1292         '71.0.3573.0',
1293         '70.0.3538.51',
1294         '69.0.3497.124',
1295         '71.0.3572.1',
1296         '71.0.3572.0',
1297         '70.0.3538.50',
1298         '69.0.3497.123',
1299         '71.0.3571.2',
1300         '70.0.3538.49',
1301         '69.0.3497.122',
1302         '71.0.3571.1',
1303         '71.0.3571.0',
1304         '70.0.3538.48',
1305         '69.0.3497.121',
1306         '71.0.3570.1',
1307         '71.0.3570.0',
1308         '70.0.3538.47',
1309         '69.0.3497.120',
1310         '71.0.3568.2',
1311         '71.0.3569.1',
1312         '71.0.3569.0',
1313         '70.0.3538.46',
1314         '69.0.3497.119',
1315         '70.0.3538.45',
1316         '71.0.3568.1',
1317         '71.0.3568.0',
1318         '70.0.3538.44',
1319         '69.0.3497.118',
1320         '70.0.3538.43',
1321         '70.0.3538.42',
1322         '71.0.3567.1',
1323         '71.0.3567.0',
1324         '70.0.3538.41',
1325         '69.0.3497.117',
1326         '71.0.3566.1',
1327         '71.0.3566.0',
1328         '70.0.3538.40',
1329         '69.0.3497.116',
1330         '71.0.3565.1',
1331         '71.0.3565.0',
1332         '70.0.3538.39',
1333         '69.0.3497.115',
1334         '71.0.3564.1',
1335         '71.0.3564.0',
1336         '70.0.3538.38',
1337         '69.0.3497.114',
1338         '71.0.3563.0',
1339         '71.0.3562.2',
1340         '70.0.3538.37',
1341         '69.0.3497.113',
1342         '70.0.3538.36',
1343         '70.0.3538.35',
1344         '71.0.3562.1',
1345         '71.0.3562.0',
1346         '70.0.3538.34',
1347         '69.0.3497.112',
1348         '70.0.3538.33',
1349         '71.0.3561.1',
1350         '71.0.3561.0',
1351         '70.0.3538.32',
1352         '69.0.3497.111',
1353         '71.0.3559.6',
1354         '71.0.3560.1',
1355         '71.0.3560.0',
1356         '71.0.3559.5',
1357         '71.0.3559.4',
1358         '70.0.3538.31',
1359         '69.0.3497.110',
1360         '71.0.3559.3',
1361         '70.0.3538.30',
1362         '69.0.3497.109',
1363         '71.0.3559.2',
1364         '71.0.3559.1',
1365         '71.0.3559.0',
1366         '70.0.3538.29',
1367         '69.0.3497.108',
1368         '71.0.3558.2',
1369         '71.0.3558.1',
1370         '71.0.3558.0',
1371         '70.0.3538.28',
1372         '69.0.3497.107',
1373         '71.0.3557.2',
1374         '71.0.3557.1',
1375         '71.0.3557.0',
1376         '70.0.3538.27',
1377         '69.0.3497.106',
1378         '71.0.3554.4',
1379         '70.0.3538.26',
1380         '71.0.3556.1',
1381         '71.0.3556.0',
1382         '70.0.3538.25',
1383         '71.0.3554.3',
1384         '69.0.3497.105',
1385         '71.0.3554.2',
1386         '70.0.3538.24',
1387         '69.0.3497.104',
1388         '71.0.3555.2',
1389         '70.0.3538.23',
1390         '71.0.3555.1',
1391         '71.0.3555.0',
1392         '70.0.3538.22',
1393         '69.0.3497.103',
1394         '71.0.3554.1',
1395         '71.0.3554.0',
1396         '70.0.3538.21',
1397         '69.0.3497.102',
1398         '71.0.3553.3',
1399         '70.0.3538.20',
1400         '69.0.3497.101',
1401         '71.0.3553.2',
1402         '69.0.3497.100',
1403         '71.0.3553.1',
1404         '71.0.3553.0',
1405         '70.0.3538.19',
1406         '69.0.3497.99',
1407         '69.0.3497.98',
1408         '69.0.3497.97',
1409         '71.0.3552.6',
1410         '71.0.3552.5',
1411         '71.0.3552.4',
1412         '71.0.3552.3',
1413         '71.0.3552.2',
1414         '71.0.3552.1',
1415         '71.0.3552.0',
1416         '70.0.3538.18',
1417         '69.0.3497.96',
1418         '71.0.3551.3',
1419         '71.0.3551.2',
1420         '71.0.3551.1',
1421         '71.0.3551.0',
1422         '70.0.3538.17',
1423         '69.0.3497.95',
1424         '71.0.3550.3',
1425         '71.0.3550.2',
1426         '71.0.3550.1',
1427         '71.0.3550.0',
1428         '70.0.3538.16',
1429         '69.0.3497.94',
1430         '71.0.3549.1',
1431         '71.0.3549.0',
1432         '70.0.3538.15',
1433         '69.0.3497.93',
1434         '69.0.3497.92',
1435         '71.0.3548.1',
1436         '71.0.3548.0',
1437         '70.0.3538.14',
1438         '69.0.3497.91',
1439         '71.0.3547.1',
1440         '71.0.3547.0',
1441         '70.0.3538.13',
1442         '69.0.3497.90',
1443         '71.0.3546.2',
1444         '69.0.3497.89',
1445         '71.0.3546.1',
1446         '71.0.3546.0',
1447         '70.0.3538.12',
1448         '69.0.3497.88',
1449         '71.0.3545.4',
1450         '71.0.3545.3',
1451         '71.0.3545.2',
1452         '71.0.3545.1',
1453         '71.0.3545.0',
1454         '70.0.3538.11',
1455         '69.0.3497.87',
1456         '71.0.3544.5',
1457         '71.0.3544.4',
1458         '71.0.3544.3',
1459         '71.0.3544.2',
1460         '71.0.3544.1',
1461         '71.0.3544.0',
1462         '69.0.3497.86',
1463         '70.0.3538.10',
1464         '69.0.3497.85',
1465         '70.0.3538.9',
1466         '69.0.3497.84',
1467         '71.0.3543.4',
1468         '70.0.3538.8',
1469         '71.0.3543.3',
1470         '71.0.3543.2',
1471         '71.0.3543.1',
1472         '71.0.3543.0',
1473         '70.0.3538.7',
1474         '69.0.3497.83',
1475         '71.0.3542.2',
1476         '71.0.3542.1',
1477         '71.0.3542.0',
1478         '70.0.3538.6',
1479         '69.0.3497.82',
1480         '69.0.3497.81',
1481         '71.0.3541.1',
1482         '71.0.3541.0',
1483         '70.0.3538.5',
1484         '69.0.3497.80',
1485         '71.0.3540.1',
1486         '71.0.3540.0',
1487         '70.0.3538.4',
1488         '69.0.3497.79',
1489         '70.0.3538.3',
1490         '71.0.3539.1',
1491         '71.0.3539.0',
1492         '69.0.3497.78',
1493         '68.0.3440.134',
1494         '69.0.3497.77',
1495         '70.0.3538.2',
1496         '70.0.3538.1',
1497         '70.0.3538.0',
1498         '69.0.3497.76',
1499         '68.0.3440.133',
1500         '69.0.3497.75',
1501         '70.0.3537.2',
1502         '70.0.3537.1',
1503         '70.0.3537.0',
1504         '69.0.3497.74',
1505         '68.0.3440.132',
1506         '70.0.3536.0',
1507         '70.0.3535.5',
1508         '70.0.3535.4',
1509         '70.0.3535.3',
1510         '69.0.3497.73',
1511         '68.0.3440.131',
1512         '70.0.3532.8',
1513         '70.0.3532.7',
1514         '69.0.3497.72',
1515         '69.0.3497.71',
1516         '70.0.3535.2',
1517         '70.0.3535.1',
1518         '70.0.3535.0',
1519         '69.0.3497.70',
1520         '68.0.3440.130',
1521         '69.0.3497.69',
1522         '68.0.3440.129',
1523         '70.0.3534.4',
1524         '70.0.3534.3',
1525         '70.0.3534.2',
1526         '70.0.3534.1',
1527         '70.0.3534.0',
1528         '69.0.3497.68',
1529         '68.0.3440.128',
1530         '70.0.3533.2',
1531         '70.0.3533.1',
1532         '70.0.3533.0',
1533         '69.0.3497.67',
1534         '68.0.3440.127',
1535         '70.0.3532.6',
1536         '70.0.3532.5',
1537         '70.0.3532.4',
1538         '69.0.3497.66',
1539         '68.0.3440.126',
1540         '70.0.3532.3',
1541         '70.0.3532.2',
1542         '70.0.3532.1',
1543         '69.0.3497.60',
1544         '69.0.3497.65',
1545         '69.0.3497.64',
1546         '70.0.3532.0',
1547         '70.0.3531.0',
1548         '70.0.3530.4',
1549         '70.0.3530.3',
1550         '70.0.3530.2',
1551         '69.0.3497.58',
1552         '68.0.3440.125',
1553         '69.0.3497.57',
1554         '69.0.3497.56',
1555         '69.0.3497.55',
1556         '69.0.3497.54',
1557         '70.0.3530.1',
1558         '70.0.3530.0',
1559         '69.0.3497.53',
1560         '68.0.3440.124',
1561         '69.0.3497.52',
1562         '70.0.3529.3',
1563         '70.0.3529.2',
1564         '70.0.3529.1',
1565         '70.0.3529.0',
1566         '69.0.3497.51',
1567         '70.0.3528.4',
1568         '68.0.3440.123',
1569         '70.0.3528.3',
1570         '70.0.3528.2',
1571         '70.0.3528.1',
1572         '70.0.3528.0',
1573         '69.0.3497.50',
1574         '68.0.3440.122',
1575         '70.0.3527.1',
1576         '70.0.3527.0',
1577         '69.0.3497.49',
1578         '68.0.3440.121',
1579         '70.0.3526.1',
1580         '70.0.3526.0',
1581         '68.0.3440.120',
1582         '69.0.3497.48',
1583         '69.0.3497.47',
1584         '68.0.3440.119',
1585         '68.0.3440.118',
1586         '70.0.3525.5',
1587         '70.0.3525.4',
1588         '70.0.3525.3',
1589         '68.0.3440.117',
1590         '69.0.3497.46',
1591         '70.0.3525.2',
1592         '70.0.3525.1',
1593         '70.0.3525.0',
1594         '69.0.3497.45',
1595         '68.0.3440.116',
1596         '70.0.3524.4',
1597         '70.0.3524.3',
1598         '69.0.3497.44',
1599         '70.0.3524.2',
1600         '70.0.3524.1',
1601         '70.0.3524.0',
1602         '70.0.3523.2',
1603         '69.0.3497.43',
1604         '68.0.3440.115',
1605         '70.0.3505.9',
1606         '69.0.3497.42',
1607         '70.0.3505.8',
1608         '70.0.3523.1',
1609         '70.0.3523.0',
1610         '69.0.3497.41',
1611         '68.0.3440.114',
1612         '70.0.3505.7',
1613         '69.0.3497.40',
1614         '70.0.3522.1',
1615         '70.0.3522.0',
1616         '70.0.3521.2',
1617         '69.0.3497.39',
1618         '68.0.3440.113',
1619         '70.0.3505.6',
1620         '70.0.3521.1',
1621         '70.0.3521.0',
1622         '69.0.3497.38',
1623         '68.0.3440.112',
1624         '70.0.3520.1',
1625         '70.0.3520.0',
1626         '69.0.3497.37',
1627         '68.0.3440.111',
1628         '70.0.3519.3',
1629         '70.0.3519.2',
1630         '70.0.3519.1',
1631         '70.0.3519.0',
1632         '69.0.3497.36',
1633         '68.0.3440.110',
1634         '70.0.3518.1',
1635         '70.0.3518.0',
1636         '69.0.3497.35',
1637         '69.0.3497.34',
1638         '68.0.3440.109',
1639         '70.0.3517.1',
1640         '70.0.3517.0',
1641         '69.0.3497.33',
1642         '68.0.3440.108',
1643         '69.0.3497.32',
1644         '70.0.3516.3',
1645         '70.0.3516.2',
1646         '70.0.3516.1',
1647         '70.0.3516.0',
1648         '69.0.3497.31',
1649         '68.0.3440.107',
1650         '70.0.3515.4',
1651         '68.0.3440.106',
1652         '70.0.3515.3',
1653         '70.0.3515.2',
1654         '70.0.3515.1',
1655         '70.0.3515.0',
1656         '69.0.3497.30',
1657         '68.0.3440.105',
1658         '68.0.3440.104',
1659         '70.0.3514.2',
1660         '70.0.3514.1',
1661         '70.0.3514.0',
1662         '69.0.3497.29',
1663         '68.0.3440.103',
1664         '70.0.3513.1',
1665         '70.0.3513.0',
1666         '69.0.3497.28',
1667     )
1668     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672     'User-Agent': random_user_agent(),
1673     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675     'Accept-Encoding': 'gzip, deflate',
1676     'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688     'January', 'February', 'March', 'April', 'May', 'June',
1689     'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692     'en': ENGLISH_MONTH_NAMES,
1693     'fr': [
1694         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700     'flv', 'f4v', 'f4a', 'f4b',
1701     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702     'mkv', 'mka', 'mk3d',
1703     'avi', 'divx',
1704     'mov',
1705     'asf', 'wmv', 'wma',
1706     '3gp', '3g2',
1707     'mp3',
1708     'flac',
1709     'ape',
1710     'wav',
1711     'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1715                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719     '%d %B %Y',
1720     '%d %b %Y',
1721     '%B %d %Y',
1722     '%B %dst %Y',
1723     '%B %dnd %Y',
1724     '%B %drd %Y',
1725     '%B %dth %Y',
1726     '%b %d %Y',
1727     '%b %dst %Y',
1728     '%b %dnd %Y',
1729     '%b %drd %Y',
1730     '%b %dth %Y',
1731     '%b %dst %Y %I:%M',
1732     '%b %dnd %Y %I:%M',
1733     '%b %drd %Y %I:%M',
1734     '%b %dth %Y %I:%M',
1735     '%Y %m %d',
1736     '%Y-%m-%d',
1737     '%Y/%m/%d',
1738     '%Y/%m/%d %H:%M',
1739     '%Y/%m/%d %H:%M:%S',
1740     '%Y-%m-%d %H:%M',
1741     '%Y-%m-%d %H:%M:%S',
1742     '%Y-%m-%d %H:%M:%S.%f',
1743     '%d.%m.%Y %H:%M',
1744     '%d.%m.%Y %H.%M',
1745     '%Y-%m-%dT%H:%M:%SZ',
1746     '%Y-%m-%dT%H:%M:%S.%fZ',
1747     '%Y-%m-%dT%H:%M:%S.%f0Z',
1748     '%Y-%m-%dT%H:%M:%S',
1749     '%Y-%m-%dT%H:%M:%S.%f',
1750     '%Y-%m-%dT%H:%M',
1751     '%b %d %Y at %H:%M',
1752     '%b %d %Y at %H:%M:%S',
1753     '%B %d %Y at %H:%M',
1754     '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759     '%d-%m-%Y',
1760     '%d.%m.%Y',
1761     '%d.%m.%y',
1762     '%d/%m/%Y',
1763     '%d/%m/%y',
1764     '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769     '%m-%d-%Y',
1770     '%m.%d.%Y',
1771     '%m/%d/%Y',
1772     '%m/%d/%y',
1773     '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781     """Get preferred encoding.
1782
1783     Returns the best encoding scheme for the system, based on
1784     locale.getpreferredencoding() and some further tweaks.
1785     """
1786     try:
1787         pref = locale.getpreferredencoding()
1788         'TEST'.encode(pref)
1789     except Exception:
1790         pref = 'UTF-8'
1791
1792     return pref
1793
1794
1795 def write_json_file(obj, fn):
1796     """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798     fn = encodeFilename(fn)
1799     if sys.version_info < (3, 0) and sys.platform != 'win32':
1800         encoding = get_filesystem_encoding()
1801         # os.path.basename returns a bytes object, but NamedTemporaryFile
1802         # will fail if the filename contains non ascii characters unless we
1803         # use a unicode object
1804         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805         # the same for os.path.dirname
1806         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807     else:
1808         path_basename = os.path.basename
1809         path_dirname = os.path.dirname
1810
1811     args = {
1812         'suffix': '.tmp',
1813         'prefix': path_basename(fn) + '.',
1814         'dir': path_dirname(fn),
1815         'delete': False,
1816     }
1817
1818     # In Python 2.x, json.dump expects a bytestream.
1819     # In Python 3.x, it writes to a character stream
1820     if sys.version_info < (3, 0):
1821         args['mode'] = 'wb'
1822     else:
1823         args.update({
1824             'mode': 'w',
1825             'encoding': 'utf-8',
1826         })
1827
1828     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830     try:
1831         with tf:
1832             json.dump(obj, tf)
1833         if sys.platform == 'win32':
1834             # Need to remove existing file on Windows, else os.rename raises
1835             # WindowsError or FileExistsError.
1836             try:
1837                 os.unlink(fn)
1838             except OSError:
1839                 pass
1840         try:
1841             mask = os.umask(0)
1842             os.umask(mask)
1843             os.chmod(tf.name, 0o666 & ~mask)
1844         except OSError:
1845             pass
1846         os.rename(tf.name, fn)
1847     except Exception:
1848         try:
1849             os.remove(tf.name)
1850         except OSError:
1851             pass
1852         raise
1853
1854
1855 if sys.version_info >= (2, 7):
1856     def find_xpath_attr(node, xpath, key, val=None):
1857         """ Find the xpath xpath[@key=val] """
1858         assert re.match(r'^[a-zA-Z_-]+$', key)
1859         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1860         return node.find(expr)
1861 else:
1862     def find_xpath_attr(node, xpath, key, val=None):
1863         for f in node.findall(compat_xpath(xpath)):
1864             if key not in f.attrib:
1865                 continue
1866             if val is None or f.attrib.get(key) == val:
1867                 return f
1868         return None
1869
1870 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1871 # the namespace parameter
1872
1873
1874 def xpath_with_ns(path, ns_map):
1875     components = [c.split(':') for c in path.split('/')]
1876     replaced = []
1877     for c in components:
1878         if len(c) == 1:
1879             replaced.append(c[0])
1880         else:
1881             ns, tag = c
1882             replaced.append('{%s}%s' % (ns_map[ns], tag))
1883     return '/'.join(replaced)
1884
1885
1886 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1887     def _find_xpath(xpath):
1888         return node.find(compat_xpath(xpath))
1889
1890     if isinstance(xpath, (str, compat_str)):
1891         n = _find_xpath(xpath)
1892     else:
1893         for xp in xpath:
1894             n = _find_xpath(xp)
1895             if n is not None:
1896                 break
1897
1898     if n is None:
1899         if default is not NO_DEFAULT:
1900             return default
1901         elif fatal:
1902             name = xpath if name is None else name
1903             raise ExtractorError('Could not find XML element %s' % name)
1904         else:
1905             return None
1906     return n
1907
1908
1909 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1910     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911     if n is None or n == default:
1912         return n
1913     if n.text is None:
1914         if default is not NO_DEFAULT:
1915             return default
1916         elif fatal:
1917             name = xpath if name is None else name
1918             raise ExtractorError('Could not find XML element\'s text %s' % name)
1919         else:
1920             return None
1921     return n.text
1922
1923
1924 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925     n = find_xpath_attr(node, xpath, key)
1926     if n is None:
1927         if default is not NO_DEFAULT:
1928             return default
1929         elif fatal:
1930             name = '%s[@%s]' % (xpath, key) if name is None else name
1931             raise ExtractorError('Could not find XML attribute %s' % name)
1932         else:
1933             return None
1934     return n.attrib[key]
1935
1936
1937 def get_element_by_id(id, html):
1938     """Return the content of the tag with the specified ID in the passed HTML document"""
1939     return get_element_by_attribute('id', id, html)
1940
1941
1942 def get_element_by_class(class_name, html):
1943     """Return the content of the first tag with the specified class in the passed HTML document"""
1944     retval = get_elements_by_class(class_name, html)
1945     return retval[0] if retval else None
1946
1947
1948 def get_element_by_attribute(attribute, value, html, escape_value=True):
1949     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950     return retval[0] if retval else None
1951
1952
1953 def get_elements_by_class(class_name, html):
1954     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955     return get_elements_by_attribute(
1956         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957         html, escape_value=False)
1958
1959
1960 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1961     """Return the content of the tag with the specified attribute in the passed HTML document"""
1962
1963     value = re.escape(value) if escape_value else value
1964
1965     retlist = []
1966     for m in re.finditer(r'''(?xs)
1967         <([a-zA-Z0-9:._-]+)
1968          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1969          \s+%s=['"]?%s['"]?
1970          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1971         \s*>
1972         (?P<content>.*?)
1973         </\1>
1974     ''' % (re.escape(attribute), value), html):
1975         res = m.group('content')
1976
1977         if res.startswith('"') or res.startswith("'"):
1978             res = res[1:-1]
1979
1980         retlist.append(unescapeHTML(res))
1981
1982     return retlist
1983
1984
1985 class HTMLAttributeParser(compat_HTMLParser):
1986     """Trivial HTML parser to gather the attributes for a single element"""
1987     def __init__(self):
1988         self.attrs = {}
1989         compat_HTMLParser.__init__(self)
1990
1991     def handle_starttag(self, tag, attrs):
1992         self.attrs = dict(attrs)
1993
1994
1995 def extract_attributes(html_element):
1996     """Given a string for an HTML element such as
1997     <el
1998          a="foo" B="bar" c="&98;az" d=boz
1999          empty= noval entity="&amp;"
2000          sq='"' dq="'"
2001     >
2002     Decode and return a dictionary of attributes.
2003     {
2004         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2005         'empty': '', 'noval': None, 'entity': '&',
2006         'sq': '"', 'dq': '\''
2007     }.
2008     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2009     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2010     """
2011     parser = HTMLAttributeParser()
2012     try:
2013         parser.feed(html_element)
2014         parser.close()
2015     # Older Python may throw HTMLParseError in case of malformed HTML
2016     except compat_HTMLParseError:
2017         pass
2018     return parser.attrs
2019
2020
2021 def clean_html(html):
2022     """Clean an HTML snippet into a readable string"""
2023
2024     if html is None:  # Convenience for sanitizing descriptions etc.
2025         return html
2026
2027     # Newline vs <br />
2028     html = html.replace('\n', ' ')
2029     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2030     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2031     # Strip html tags
2032     html = re.sub('<.*?>', '', html)
2033     # Replace html entities
2034     html = unescapeHTML(html)
2035     return html.strip()
2036
2037
2038 def sanitize_open(filename, open_mode):
2039     """Try to open the given filename, and slightly tweak it if this fails.
2040
2041     Attempts to open the given filename. If this fails, it tries to change
2042     the filename slightly, step by step, until it's either able to open it
2043     or it fails and raises a final exception, like the standard open()
2044     function.
2045
2046     It returns the tuple (stream, definitive_file_name).
2047     """
2048     try:
2049         if filename == '-':
2050             if sys.platform == 'win32':
2051                 import msvcrt
2052                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2053             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2054         stream = open(encodeFilename(filename), open_mode)
2055         return (stream, filename)
2056     except (IOError, OSError) as err:
2057         if err.errno in (errno.EACCES,):
2058             raise
2059
2060         # In case of error, try to remove win32 forbidden chars
2061         alt_filename = sanitize_path(filename)
2062         if alt_filename == filename:
2063             raise
2064         else:
2065             # An exception here should be caught in the caller
2066             stream = open(encodeFilename(alt_filename), open_mode)
2067             return (stream, alt_filename)
2068
2069
2070 def timeconvert(timestr):
2071     """Convert RFC 2822 defined time string into system timestamp"""
2072     timestamp = None
2073     timetuple = email.utils.parsedate_tz(timestr)
2074     if timetuple is not None:
2075         timestamp = email.utils.mktime_tz(timetuple)
2076     return timestamp
2077
2078
2079 def sanitize_filename(s, restricted=False, is_id=False):
2080     """Sanitizes a string so it could be used as part of a filename.
2081     If restricted is set, use a stricter subset of allowed characters.
2082     Set is_id if this is not an arbitrary string, but an ID that should be kept
2083     if possible.
2084     """
2085     def replace_insane(char):
2086         if restricted and char in ACCENT_CHARS:
2087             return ACCENT_CHARS[char]
2088         if char == '?' or ord(char) < 32 or ord(char) == 127:
2089             return ''
2090         elif char == '"':
2091             return '' if restricted else '\''
2092         elif char == ':':
2093             return '_-' if restricted else ' -'
2094         elif char in '\\/|*<>':
2095             return '_'
2096         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2097             return '_'
2098         if restricted and ord(char) > 127:
2099             return '_'
2100         return char
2101
2102     # Handle timestamps
2103     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2104     result = ''.join(map(replace_insane, s))
2105     if not is_id:
2106         while '__' in result:
2107             result = result.replace('__', '_')
2108         result = result.strip('_')
2109         # Common case of "Foreign band name - English song title"
2110         if restricted and result.startswith('-_'):
2111             result = result[2:]
2112         if result.startswith('-'):
2113             result = '_' + result[len('-'):]
2114         result = result.lstrip('.')
2115         if not result:
2116             result = '_'
2117     return result
2118
2119
2120 def sanitize_path(s):
2121     """Sanitizes and normalizes path on Windows"""
2122     if sys.platform != 'win32':
2123         return s
2124     drive_or_unc, _ = os.path.splitdrive(s)
2125     if sys.version_info < (2, 7) and not drive_or_unc:
2126         drive_or_unc, _ = os.path.splitunc(s)
2127     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2128     if drive_or_unc:
2129         norm_path.pop(0)
2130     sanitized_path = [
2131         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2132         for path_part in norm_path]
2133     if drive_or_unc:
2134         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2135     return os.path.join(*sanitized_path)
2136
2137
2138 def sanitize_url(url):
2139     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2140     # the number of unwanted failures due to missing protocol
2141     if url.startswith('//'):
2142         return 'http:%s' % url
2143     # Fix some common typos seen so far
2144     COMMON_TYPOS = (
2145         # https://github.com/ytdl-org/youtube-dl/issues/15649
2146         (r'^httpss://', r'https://'),
2147         # https://bx1.be/lives/direct-tv/
2148         (r'^rmtp([es]?)://', r'rtmp\1://'),
2149     )
2150     for mistake, fixup in COMMON_TYPOS:
2151         if re.match(mistake, url):
2152             return re.sub(mistake, fixup, url)
2153     return url
2154
2155
2156 def sanitized_Request(url, *args, **kwargs):
2157     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2158
2159
2160 def expand_path(s):
2161     """Expand shell variables and ~"""
2162     return os.path.expandvars(compat_expanduser(s))
2163
2164
2165 def orderedSet(iterable):
2166     """ Remove all duplicates from the input iterable """
2167     res = []
2168     for el in iterable:
2169         if el not in res:
2170             res.append(el)
2171     return res
2172
2173
2174 def _htmlentity_transform(entity_with_semicolon):
2175     """Transforms an HTML entity to a character."""
2176     entity = entity_with_semicolon[:-1]
2177
2178     # Known non-numeric HTML entity
2179     if entity in compat_html_entities.name2codepoint:
2180         return compat_chr(compat_html_entities.name2codepoint[entity])
2181
2182     # TODO: HTML5 allows entities without a semicolon. For example,
2183     # '&Eacuteric' should be decoded as 'Éric'.
2184     if entity_with_semicolon in compat_html_entities_html5:
2185         return compat_html_entities_html5[entity_with_semicolon]
2186
2187     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2188     if mobj is not None:
2189         numstr = mobj.group(1)
2190         if numstr.startswith('x'):
2191             base = 16
2192             numstr = '0%s' % numstr
2193         else:
2194             base = 10
2195         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2196         try:
2197             return compat_chr(int(numstr, base))
2198         except ValueError:
2199             pass
2200
2201     # Unknown entity in name, return its literal representation
2202     return '&%s;' % entity
2203
2204
2205 def unescapeHTML(s):
2206     if s is None:
2207         return None
2208     assert type(s) == compat_str
2209
2210     return re.sub(
2211         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2212
2213
2214 def get_subprocess_encoding():
2215     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2216         # For subprocess calls, encode with locale encoding
2217         # Refer to http://stackoverflow.com/a/9951851/35070
2218         encoding = preferredencoding()
2219     else:
2220         encoding = sys.getfilesystemencoding()
2221     if encoding is None:
2222         encoding = 'utf-8'
2223     return encoding
2224
2225
2226 def encodeFilename(s, for_subprocess=False):
2227     """
2228     @param s The name of the file
2229     """
2230
2231     assert type(s) == compat_str
2232
2233     # Python 3 has a Unicode API
2234     if sys.version_info >= (3, 0):
2235         return s
2236
2237     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2238     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2239     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2240     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241         return s
2242
2243     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2244     if sys.platform.startswith('java'):
2245         return s
2246
2247     return s.encode(get_subprocess_encoding(), 'ignore')
2248
2249
2250 def decodeFilename(b, for_subprocess=False):
2251
2252     if sys.version_info >= (3, 0):
2253         return b
2254
2255     if not isinstance(b, bytes):
2256         return b
2257
2258     return b.decode(get_subprocess_encoding(), 'ignore')
2259
2260
2261 def encodeArgument(s):
2262     if not isinstance(s, compat_str):
2263         # Legacy code that uses byte strings
2264         # Uncomment the following line after fixing all post processors
2265         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2266         s = s.decode('ascii')
2267     return encodeFilename(s, True)
2268
2269
2270 def decodeArgument(b):
2271     return decodeFilename(b, True)
2272
2273
2274 def decodeOption(optval):
2275     if optval is None:
2276         return optval
2277     if isinstance(optval, bytes):
2278         optval = optval.decode(preferredencoding())
2279
2280     assert isinstance(optval, compat_str)
2281     return optval
2282
2283
2284 def formatSeconds(secs):
2285     if secs > 3600:
2286         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2287     elif secs > 60:
2288         return '%d:%02d' % (secs // 60, secs % 60)
2289     else:
2290         return '%d' % secs
2291
2292
2293 def make_HTTPS_handler(params, **kwargs):
2294     opts_no_check_certificate = params.get('nocheckcertificate', False)
2295     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2296         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2297         if opts_no_check_certificate:
2298             context.check_hostname = False
2299             context.verify_mode = ssl.CERT_NONE
2300         try:
2301             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2302         except TypeError:
2303             # Python 2.7.8
2304             # (create_default_context present but HTTPSHandler has no context=)
2305             pass
2306
2307     if sys.version_info < (3, 2):
2308         return YoutubeDLHTTPSHandler(params, **kwargs)
2309     else:  # Python < 3.4
2310         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2311         context.verify_mode = (ssl.CERT_NONE
2312                                if opts_no_check_certificate
2313                                else ssl.CERT_REQUIRED)
2314         context.set_default_verify_paths()
2315         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2316
2317
2318 def bug_reports_message():
2319     if ytdl_is_updateable():
2320         update_cmd = 'type  youtube-dl -U  to update'
2321     else:
2322         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2323     msg = '; please report this issue on https://yt-dl.org/bug .'
2324     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2325     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2326     return msg
2327
2328
2329 class YoutubeDLError(Exception):
2330     """Base exception for YoutubeDL errors."""
2331     pass
2332
2333
2334 class ExtractorError(YoutubeDLError):
2335     """Error during info extraction."""
2336
2337     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2338         """ tb, if given, is the original traceback (so that it can be printed out).
2339         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2340         """
2341
2342         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2343             expected = True
2344         if video_id is not None:
2345             msg = video_id + ': ' + msg
2346         if cause:
2347             msg += ' (caused by %r)' % cause
2348         if not expected:
2349             msg += bug_reports_message()
2350         super(ExtractorError, self).__init__(msg)
2351
2352         self.traceback = tb
2353         self.exc_info = sys.exc_info()  # preserve original exception
2354         self.cause = cause
2355         self.video_id = video_id
2356
2357     def format_traceback(self):
2358         if self.traceback is None:
2359             return None
2360         return ''.join(traceback.format_tb(self.traceback))
2361
2362
2363 class UnsupportedError(ExtractorError):
2364     def __init__(self, url):
2365         super(UnsupportedError, self).__init__(
2366             'Unsupported URL: %s' % url, expected=True)
2367         self.url = url
2368
2369
2370 class RegexNotFoundError(ExtractorError):
2371     """Error when a regex didn't match"""
2372     pass
2373
2374
2375 class GeoRestrictedError(ExtractorError):
2376     """Geographic restriction Error exception.
2377
2378     This exception may be thrown when a video is not available from your
2379     geographic location due to geographic restrictions imposed by a website.
2380     """
2381     def __init__(self, msg, countries=None):
2382         super(GeoRestrictedError, self).__init__(msg, expected=True)
2383         self.msg = msg
2384         self.countries = countries
2385
2386
2387 class DownloadError(YoutubeDLError):
2388     """Download Error exception.
2389
2390     This exception may be thrown by FileDownloader objects if they are not
2391     configured to continue on errors. They will contain the appropriate
2392     error message.
2393     """
2394
2395     def __init__(self, msg, exc_info=None):
2396         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2397         super(DownloadError, self).__init__(msg)
2398         self.exc_info = exc_info
2399
2400
2401 class SameFileError(YoutubeDLError):
2402     """Same File exception.
2403
2404     This exception will be thrown by FileDownloader objects if they detect
2405     multiple files would have to be downloaded to the same file on disk.
2406     """
2407     pass
2408
2409
2410 class PostProcessingError(YoutubeDLError):
2411     """Post Processing exception.
2412
2413     This exception may be raised by PostProcessor's .run() method to
2414     indicate an error in the postprocessing task.
2415     """
2416
2417     def __init__(self, msg):
2418         super(PostProcessingError, self).__init__(msg)
2419         self.msg = msg
2420
2421
2422 class MaxDownloadsReached(YoutubeDLError):
2423     """ --max-downloads limit has been reached. """
2424     pass
2425
2426
2427 class UnavailableVideoError(YoutubeDLError):
2428     """Unavailable Format exception.
2429
2430     This exception will be thrown when a video is requested
2431     in a format that is not available for that video.
2432     """
2433     pass
2434
2435
2436 class ContentTooShortError(YoutubeDLError):
2437     """Content Too Short exception.
2438
2439     This exception may be raised by FileDownloader objects when a file they
2440     download is too small for what the server announced first, indicating
2441     the connection was probably interrupted.
2442     """
2443
2444     def __init__(self, downloaded, expected):
2445         super(ContentTooShortError, self).__init__(
2446             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2447         )
2448         # Both in bytes
2449         self.downloaded = downloaded
2450         self.expected = expected
2451
2452
2453 class XAttrMetadataError(YoutubeDLError):
2454     def __init__(self, code=None, msg='Unknown error'):
2455         super(XAttrMetadataError, self).__init__(msg)
2456         self.code = code
2457         self.msg = msg
2458
2459         # Parsing code and msg
2460         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2461                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2462             self.reason = 'NO_SPACE'
2463         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2464             self.reason = 'VALUE_TOO_LONG'
2465         else:
2466             self.reason = 'NOT_SUPPORTED'
2467
2468
2469 class XAttrUnavailableError(YoutubeDLError):
2470     pass
2471
2472
2473 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2474     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2475     # expected HTTP responses to meet HTTP/1.0 or later (see also
2476     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2477     if sys.version_info < (3, 0):
2478         kwargs['strict'] = True
2479     hc = http_class(*args, **compat_kwargs(kwargs))
2480     source_address = ydl_handler._params.get('source_address')
2481
2482     if source_address is not None:
2483         # This is to workaround _create_connection() from socket where it will try all
2484         # address data from getaddrinfo() including IPv6. This filters the result from
2485         # getaddrinfo() based on the source_address value.
2486         # This is based on the cpython socket.create_connection() function.
2487         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2488         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2489             host, port = address
2490             err = None
2491             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2492             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2493             ip_addrs = [addr for addr in addrs if addr[0] == af]
2494             if addrs and not ip_addrs:
2495                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2496                 raise socket.error(
2497                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2498                     % (ip_version, source_address[0]))
2499             for res in ip_addrs:
2500                 af, socktype, proto, canonname, sa = res
2501                 sock = None
2502                 try:
2503                     sock = socket.socket(af, socktype, proto)
2504                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2505                         sock.settimeout(timeout)
2506                     sock.bind(source_address)
2507                     sock.connect(sa)
2508                     err = None  # Explicitly break reference cycle
2509                     return sock
2510                 except socket.error as _:
2511                     err = _
2512                     if sock is not None:
2513                         sock.close()
2514             if err is not None:
2515                 raise err
2516             else:
2517                 raise socket.error('getaddrinfo returns an empty list')
2518         if hasattr(hc, '_create_connection'):
2519             hc._create_connection = _create_connection
2520         sa = (source_address, 0)
2521         if hasattr(hc, 'source_address'):  # Python 2.7+
2522             hc.source_address = sa
2523         else:  # Python 2.6
2524             def _hc_connect(self, *args, **kwargs):
2525                 sock = _create_connection(
2526                     (self.host, self.port), self.timeout, sa)
2527                 if is_https:
2528                     self.sock = ssl.wrap_socket(
2529                         sock, self.key_file, self.cert_file,
2530                         ssl_version=ssl.PROTOCOL_TLSv1)
2531                 else:
2532                     self.sock = sock
2533             hc.connect = functools.partial(_hc_connect, hc)
2534
2535     return hc
2536
2537
2538 def handle_youtubedl_headers(headers):
2539     filtered_headers = headers
2540
2541     if 'Youtubedl-no-compression' in filtered_headers:
2542         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2543         del filtered_headers['Youtubedl-no-compression']
2544
2545     return filtered_headers
2546
2547
2548 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2549     """Handler for HTTP requests and responses.
2550
2551     This class, when installed with an OpenerDirector, automatically adds
2552     the standard headers to every HTTP request and handles gzipped and
2553     deflated responses from web servers. If compression is to be avoided in
2554     a particular request, the original request in the program code only has
2555     to include the HTTP header "Youtubedl-no-compression", which will be
2556     removed before making the real request.
2557
2558     Part of this code was copied from:
2559
2560     http://techknack.net/python-urllib2-handlers/
2561
2562     Andrew Rowls, the author of that code, agreed to release it to the
2563     public domain.
2564     """
2565
2566     def __init__(self, params, *args, **kwargs):
2567         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2568         self._params = params
2569
2570     def http_open(self, req):
2571         conn_class = compat_http_client.HTTPConnection
2572
2573         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2574         if socks_proxy:
2575             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2576             del req.headers['Ytdl-socks-proxy']
2577
2578         return self.do_open(functools.partial(
2579             _create_http_connection, self, conn_class, False),
2580             req)
2581
2582     @staticmethod
2583     def deflate(data):
2584         try:
2585             return zlib.decompress(data, -zlib.MAX_WBITS)
2586         except zlib.error:
2587             return zlib.decompress(data)
2588
2589     def http_request(self, req):
2590         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2591         # always respected by websites, some tend to give out URLs with non percent-encoded
2592         # non-ASCII characters (see telemb.py, ard.py [#3412])
2593         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2594         # To work around aforementioned issue we will replace request's original URL with
2595         # percent-encoded one
2596         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2597         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2598         url = req.get_full_url()
2599         url_escaped = escape_url(url)
2600
2601         # Substitute URL if any change after escaping
2602         if url != url_escaped:
2603             req = update_Request(req, url=url_escaped)
2604
2605         for h, v in std_headers.items():
2606             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2607             # The dict keys are capitalized because of this bug by urllib
2608             if h.capitalize() not in req.headers:
2609                 req.add_header(h, v)
2610
2611         req.headers = handle_youtubedl_headers(req.headers)
2612
2613         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2614             # Python 2.6 is brain-dead when it comes to fragments
2615             req._Request__original = req._Request__original.partition('#')[0]
2616             req._Request__r_type = req._Request__r_type.partition('#')[0]
2617
2618         return req
2619
2620     def http_response(self, req, resp):
2621         old_resp = resp
2622         # gzip
2623         if resp.headers.get('Content-encoding', '') == 'gzip':
2624             content = resp.read()
2625             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2626             try:
2627                 uncompressed = io.BytesIO(gz.read())
2628             except IOError as original_ioerror:
2629                 # There may be junk add the end of the file
2630                 # See http://stackoverflow.com/q/4928560/35070 for details
2631                 for i in range(1, 1024):
2632                     try:
2633                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2634                         uncompressed = io.BytesIO(gz.read())
2635                     except IOError:
2636                         continue
2637                     break
2638                 else:
2639                     raise original_ioerror
2640             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2641             resp.msg = old_resp.msg
2642             del resp.headers['Content-encoding']
2643         # deflate
2644         if resp.headers.get('Content-encoding', '') == 'deflate':
2645             gz = io.BytesIO(self.deflate(resp.read()))
2646             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2647             resp.msg = old_resp.msg
2648             del resp.headers['Content-encoding']
2649         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2650         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2651         if 300 <= resp.code < 400:
2652             location = resp.headers.get('Location')
2653             if location:
2654                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2655                 if sys.version_info >= (3, 0):
2656                     location = location.encode('iso-8859-1').decode('utf-8')
2657                 else:
2658                     location = location.decode('utf-8')
2659                 location_escaped = escape_url(location)
2660                 if location != location_escaped:
2661                     del resp.headers['Location']
2662                     if sys.version_info < (3, 0):
2663                         location_escaped = location_escaped.encode('utf-8')
2664                     resp.headers['Location'] = location_escaped
2665         return resp
2666
2667     https_request = http_request
2668     https_response = http_response
2669
2670
2671 def make_socks_conn_class(base_class, socks_proxy):
2672     assert issubclass(base_class, (
2673         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2674
2675     url_components = compat_urlparse.urlparse(socks_proxy)
2676     if url_components.scheme.lower() == 'socks5':
2677         socks_type = ProxyType.SOCKS5
2678     elif url_components.scheme.lower() in ('socks', 'socks4'):
2679         socks_type = ProxyType.SOCKS4
2680     elif url_components.scheme.lower() == 'socks4a':
2681         socks_type = ProxyType.SOCKS4A
2682
2683     def unquote_if_non_empty(s):
2684         if not s:
2685             return s
2686         return compat_urllib_parse_unquote_plus(s)
2687
2688     proxy_args = (
2689         socks_type,
2690         url_components.hostname, url_components.port or 1080,
2691         True,  # Remote DNS
2692         unquote_if_non_empty(url_components.username),
2693         unquote_if_non_empty(url_components.password),
2694     )
2695
2696     class SocksConnection(base_class):
2697         def connect(self):
2698             self.sock = sockssocket()
2699             self.sock.setproxy(*proxy_args)
2700             if type(self.timeout) in (int, float):
2701                 self.sock.settimeout(self.timeout)
2702             self.sock.connect((self.host, self.port))
2703
2704             if isinstance(self, compat_http_client.HTTPSConnection):
2705                 if hasattr(self, '_context'):  # Python > 2.6
2706                     self.sock = self._context.wrap_socket(
2707                         self.sock, server_hostname=self.host)
2708                 else:
2709                     self.sock = ssl.wrap_socket(self.sock)
2710
2711     return SocksConnection
2712
2713
2714 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2715     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2716         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2717         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2718         self._params = params
2719
2720     def https_open(self, req):
2721         kwargs = {}
2722         conn_class = self._https_conn_class
2723
2724         if hasattr(self, '_context'):  # python > 2.6
2725             kwargs['context'] = self._context
2726         if hasattr(self, '_check_hostname'):  # python 3.x
2727             kwargs['check_hostname'] = self._check_hostname
2728
2729         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2730         if socks_proxy:
2731             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2732             del req.headers['Ytdl-socks-proxy']
2733
2734         return self.do_open(functools.partial(
2735             _create_http_connection, self, conn_class, True),
2736             req, **kwargs)
2737
2738
2739 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2740     """
2741     See [1] for cookie file format.
2742
2743     1. https://curl.haxx.se/docs/http-cookies.html
2744     """
2745     _HTTPONLY_PREFIX = '#HttpOnly_'
2746     _ENTRY_LEN = 7
2747     _HEADER = '''# Netscape HTTP Cookie File
2748 # This file is generated by youtube-dl.  Do not edit.
2749
2750 '''
2751     _CookieFileEntry = collections.namedtuple(
2752         'CookieFileEntry',
2753         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2754
2755     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2756         """
2757         Save cookies to a file.
2758
2759         Most of the code is taken from CPython 3.8 and slightly adapted
2760         to support cookie files with UTF-8 in both python 2 and 3.
2761         """
2762         if filename is None:
2763             if self.filename is not None:
2764                 filename = self.filename
2765             else:
2766                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2767
2768         # Store session cookies with `expires` set to 0 instead of an empty
2769         # string
2770         for cookie in self:
2771             if cookie.expires is None:
2772                 cookie.expires = 0
2773
2774         with io.open(filename, 'w', encoding='utf-8') as f:
2775             f.write(self._HEADER)
2776             now = time.time()
2777             for cookie in self:
2778                 if not ignore_discard and cookie.discard:
2779                     continue
2780                 if not ignore_expires and cookie.is_expired(now):
2781                     continue
2782                 if cookie.secure:
2783                     secure = 'TRUE'
2784                 else:
2785                     secure = 'FALSE'
2786                 if cookie.domain.startswith('.'):
2787                     initial_dot = 'TRUE'
2788                 else:
2789                     initial_dot = 'FALSE'
2790                 if cookie.expires is not None:
2791                     expires = compat_str(cookie.expires)
2792                 else:
2793                     expires = ''
2794                 if cookie.value is None:
2795                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2796                     # with no name, whereas http.cookiejar regards it as a
2797                     # cookie with no value.
2798                     name = ''
2799                     value = cookie.name
2800                 else:
2801                     name = cookie.name
2802                     value = cookie.value
2803                 f.write(
2804                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2805                                secure, expires, name, value]) + '\n')
2806
2807     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2808         """Load cookies from a file."""
2809         if filename is None:
2810             if self.filename is not None:
2811                 filename = self.filename
2812             else:
2813                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
2815         def prepare_line(line):
2816             if line.startswith(self._HTTPONLY_PREFIX):
2817                 line = line[len(self._HTTPONLY_PREFIX):]
2818             # comments and empty lines are fine
2819             if line.startswith('#') or not line.strip():
2820                 return line
2821             cookie_list = line.split('\t')
2822             if len(cookie_list) != self._ENTRY_LEN:
2823                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2824             cookie = self._CookieFileEntry(*cookie_list)
2825             if cookie.expires_at and not cookie.expires_at.isdigit():
2826                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2827             return line
2828
2829         cf = io.StringIO()
2830         with io.open(filename, encoding='utf-8') as f:
2831             for line in f:
2832                 try:
2833                     cf.write(prepare_line(line))
2834                 except compat_cookiejar.LoadError as e:
2835                     write_string(
2836                         'WARNING: skipping cookie file entry due to %s: %r\n'
2837                         % (e, line), sys.stderr)
2838                     continue
2839         cf.seek(0)
2840         self._really_load(cf, filename, ignore_discard, ignore_expires)
2841         # Session cookies are denoted by either `expires` field set to
2842         # an empty string or 0. MozillaCookieJar only recognizes the former
2843         # (see [1]). So we need force the latter to be recognized as session
2844         # cookies on our own.
2845         # Session cookies may be important for cookies-based authentication,
2846         # e.g. usually, when user does not check 'Remember me' check box while
2847         # logging in on a site, some important cookies are stored as session
2848         # cookies so that not recognizing them will result in failed login.
2849         # 1. https://bugs.python.org/issue17164
2850         for cookie in self:
2851             # Treat `expires=0` cookies as session cookies
2852             if cookie.expires == 0:
2853                 cookie.expires = None
2854                 cookie.discard = True
2855
2856
2857 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2858     def __init__(self, cookiejar=None):
2859         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2860
2861     def http_response(self, request, response):
2862         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2863         # characters in Set-Cookie HTTP header of last response (see
2864         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2865         # In order to at least prevent crashing we will percent encode Set-Cookie
2866         # header before HTTPCookieProcessor starts processing it.
2867         # if sys.version_info < (3, 0) and response.headers:
2868         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2869         #         set_cookie = response.headers.get(set_cookie_header)
2870         #         if set_cookie:
2871         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2872         #             if set_cookie != set_cookie_escaped:
2873         #                 del response.headers[set_cookie_header]
2874         #                 response.headers[set_cookie_header] = set_cookie_escaped
2875         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2876
2877     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2878     https_response = http_response
2879
2880
2881 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2882     if sys.version_info[0] < 3:
2883         def redirect_request(self, req, fp, code, msg, headers, newurl):
2884             # On python 2 urlh.geturl() may sometimes return redirect URL
2885             # as byte string instead of unicode. This workaround allows
2886             # to force it always return unicode.
2887             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2888
2889
2890 def extract_timezone(date_str):
2891     m = re.search(
2892         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2893         date_str)
2894     if not m:
2895         timezone = datetime.timedelta()
2896     else:
2897         date_str = date_str[:-len(m.group('tz'))]
2898         if not m.group('sign'):
2899             timezone = datetime.timedelta()
2900         else:
2901             sign = 1 if m.group('sign') == '+' else -1
2902             timezone = datetime.timedelta(
2903                 hours=sign * int(m.group('hours')),
2904                 minutes=sign * int(m.group('minutes')))
2905     return timezone, date_str
2906
2907
2908 def parse_iso8601(date_str, delimiter='T', timezone=None):
2909     """ Return a UNIX timestamp from the given date """
2910
2911     if date_str is None:
2912         return None
2913
2914     date_str = re.sub(r'\.[0-9]+', '', date_str)
2915
2916     if timezone is None:
2917         timezone, date_str = extract_timezone(date_str)
2918
2919     try:
2920         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2921         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2922         return calendar.timegm(dt.timetuple())
2923     except ValueError:
2924         pass
2925
2926
2927 def date_formats(day_first=True):
2928     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2929
2930
2931 def unified_strdate(date_str, day_first=True):
2932     """Return a string with the date in the format YYYYMMDD"""
2933
2934     if date_str is None:
2935         return None
2936     upload_date = None
2937     # Replace commas
2938     date_str = date_str.replace(',', ' ')
2939     # Remove AM/PM + timezone
2940     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2941     _, date_str = extract_timezone(date_str)
2942
2943     for expression in date_formats(day_first):
2944         try:
2945             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2946         except ValueError:
2947             pass
2948     if upload_date is None:
2949         timetuple = email.utils.parsedate_tz(date_str)
2950         if timetuple:
2951             try:
2952                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2953             except ValueError:
2954                 pass
2955     if upload_date is not None:
2956         return compat_str(upload_date)
2957
2958
2959 def unified_timestamp(date_str, day_first=True):
2960     if date_str is None:
2961         return None
2962
2963     date_str = re.sub(r'[,|]', '', date_str)
2964
2965     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2966     timezone, date_str = extract_timezone(date_str)
2967
2968     # Remove AM/PM + timezone
2969     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2970
2971     # Remove unrecognized timezones from ISO 8601 alike timestamps
2972     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2973     if m:
2974         date_str = date_str[:-len(m.group('tz'))]
2975
2976     # Python only supports microseconds, so remove nanoseconds
2977     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2978     if m:
2979         date_str = m.group(1)
2980
2981     for expression in date_formats(day_first):
2982         try:
2983             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2984             return calendar.timegm(dt.timetuple())
2985         except ValueError:
2986             pass
2987     timetuple = email.utils.parsedate_tz(date_str)
2988     if timetuple:
2989         return calendar.timegm(timetuple) + pm_delta * 3600
2990
2991
2992 def determine_ext(url, default_ext='unknown_video'):
2993     if url is None or '.' not in url:
2994         return default_ext
2995     guess = url.partition('?')[0].rpartition('.')[2]
2996     if re.match(r'^[A-Za-z0-9]+$', guess):
2997         return guess
2998     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2999     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3000         return guess.rstrip('/')
3001     else:
3002         return default_ext
3003
3004
3005 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3006     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3007
3008
3009 def date_from_str(date_str):
3010     """
3011     Return a datetime object from a string in the format YYYYMMDD or
3012     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3013     today = datetime.date.today()
3014     if date_str in ('now', 'today'):
3015         return today
3016     if date_str == 'yesterday':
3017         return today - datetime.timedelta(days=1)
3018     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3019     if match is not None:
3020         sign = match.group('sign')
3021         time = int(match.group('time'))
3022         if sign == '-':
3023             time = -time
3024         unit = match.group('unit')
3025         # A bad approximation?
3026         if unit == 'month':
3027             unit = 'day'
3028             time *= 30
3029         elif unit == 'year':
3030             unit = 'day'
3031             time *= 365
3032         unit += 's'
3033         delta = datetime.timedelta(**{unit: time})
3034         return today + delta
3035     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3036
3037
3038 def hyphenate_date(date_str):
3039     """
3040     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3041     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3042     if match is not None:
3043         return '-'.join(match.groups())
3044     else:
3045         return date_str
3046
3047
3048 class DateRange(object):
3049     """Represents a time interval between two dates"""
3050
3051     def __init__(self, start=None, end=None):
3052         """start and end must be strings in the format accepted by date"""
3053         if start is not None:
3054             self.start = date_from_str(start)
3055         else:
3056             self.start = datetime.datetime.min.date()
3057         if end is not None:
3058             self.end = date_from_str(end)
3059         else:
3060             self.end = datetime.datetime.max.date()
3061         if self.start > self.end:
3062             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3063
3064     @classmethod
3065     def day(cls, day):
3066         """Returns a range that only contains the given day"""
3067         return cls(day, day)
3068
3069     def __contains__(self, date):
3070         """Check if the date is in the range"""
3071         if not isinstance(date, datetime.date):
3072             date = date_from_str(date)
3073         return self.start <= date <= self.end
3074
3075     def __str__(self):
3076         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3077
3078
3079 def platform_name():
3080     """ Returns the platform name as a compat_str """
3081     res = platform.platform()
3082     if isinstance(res, bytes):
3083         res = res.decode(preferredencoding())
3084
3085     assert isinstance(res, compat_str)
3086     return res
3087
3088
3089 def _windows_write_string(s, out):
3090     """ Returns True if the string was written using special methods,
3091     False if it has yet to be written out."""
3092     # Adapted from http://stackoverflow.com/a/3259271/35070
3093
3094     import ctypes
3095     import ctypes.wintypes
3096
3097     WIN_OUTPUT_IDS = {
3098         1: -11,
3099         2: -12,
3100     }
3101
3102     try:
3103         fileno = out.fileno()
3104     except AttributeError:
3105         # If the output stream doesn't have a fileno, it's virtual
3106         return False
3107     except io.UnsupportedOperation:
3108         # Some strange Windows pseudo files?
3109         return False
3110     if fileno not in WIN_OUTPUT_IDS:
3111         return False
3112
3113     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3114         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3115         ('GetStdHandle', ctypes.windll.kernel32))
3116     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3117
3118     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3119         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3120         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3121         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3122     written = ctypes.wintypes.DWORD(0)
3123
3124     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3125     FILE_TYPE_CHAR = 0x0002
3126     FILE_TYPE_REMOTE = 0x8000
3127     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3128         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3129         ctypes.POINTER(ctypes.wintypes.DWORD))(
3130         ('GetConsoleMode', ctypes.windll.kernel32))
3131     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3132
3133     def not_a_console(handle):
3134         if handle == INVALID_HANDLE_VALUE or handle is None:
3135             return True
3136         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3137                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3138
3139     if not_a_console(h):
3140         return False
3141
3142     def next_nonbmp_pos(s):
3143         try:
3144             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3145         except StopIteration:
3146             return len(s)
3147
3148     while s:
3149         count = min(next_nonbmp_pos(s), 1024)
3150
3151         ret = WriteConsoleW(
3152             h, s, count if count else 2, ctypes.byref(written), None)
3153         if ret == 0:
3154             raise OSError('Failed to write string')
3155         if not count:  # We just wrote a non-BMP character
3156             assert written.value == 2
3157             s = s[1:]
3158         else:
3159             assert written.value > 0
3160             s = s[written.value:]
3161     return True
3162
3163
3164 def write_string(s, out=None, encoding=None):
3165     if out is None:
3166         out = sys.stderr
3167     assert type(s) == compat_str
3168
3169     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3170         if _windows_write_string(s, out):
3171             return
3172
3173     if ('b' in getattr(out, 'mode', '')
3174             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3175         byt = s.encode(encoding or preferredencoding(), 'ignore')
3176         out.write(byt)
3177     elif hasattr(out, 'buffer'):
3178         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3179         byt = s.encode(enc, 'ignore')
3180         out.buffer.write(byt)
3181     else:
3182         out.write(s)
3183     out.flush()
3184
3185
3186 def bytes_to_intlist(bs):
3187     if not bs:
3188         return []
3189     if isinstance(bs[0], int):  # Python 3
3190         return list(bs)
3191     else:
3192         return [ord(c) for c in bs]
3193
3194
3195 def intlist_to_bytes(xs):
3196     if not xs:
3197         return b''
3198     return compat_struct_pack('%dB' % len(xs), *xs)
3199
3200
3201 # Cross-platform file locking
3202 if sys.platform == 'win32':
3203     import ctypes.wintypes
3204     import msvcrt
3205
3206     class OVERLAPPED(ctypes.Structure):
3207         _fields_ = [
3208             ('Internal', ctypes.wintypes.LPVOID),
3209             ('InternalHigh', ctypes.wintypes.LPVOID),
3210             ('Offset', ctypes.wintypes.DWORD),
3211             ('OffsetHigh', ctypes.wintypes.DWORD),
3212             ('hEvent', ctypes.wintypes.HANDLE),
3213         ]
3214
3215     kernel32 = ctypes.windll.kernel32
3216     LockFileEx = kernel32.LockFileEx
3217     LockFileEx.argtypes = [
3218         ctypes.wintypes.HANDLE,     # hFile
3219         ctypes.wintypes.DWORD,      # dwFlags
3220         ctypes.wintypes.DWORD,      # dwReserved
3221         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3222         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3223         ctypes.POINTER(OVERLAPPED)  # Overlapped
3224     ]
3225     LockFileEx.restype = ctypes.wintypes.BOOL
3226     UnlockFileEx = kernel32.UnlockFileEx
3227     UnlockFileEx.argtypes = [
3228         ctypes.wintypes.HANDLE,     # hFile
3229         ctypes.wintypes.DWORD,      # dwReserved
3230         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3231         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3232         ctypes.POINTER(OVERLAPPED)  # Overlapped
3233     ]
3234     UnlockFileEx.restype = ctypes.wintypes.BOOL
3235     whole_low = 0xffffffff
3236     whole_high = 0x7fffffff
3237
3238     def _lock_file(f, exclusive):
3239         overlapped = OVERLAPPED()
3240         overlapped.Offset = 0
3241         overlapped.OffsetHigh = 0
3242         overlapped.hEvent = 0
3243         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3244         handle = msvcrt.get_osfhandle(f.fileno())
3245         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3246                           whole_low, whole_high, f._lock_file_overlapped_p):
3247             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3248
3249     def _unlock_file(f):
3250         assert f._lock_file_overlapped_p
3251         handle = msvcrt.get_osfhandle(f.fileno())
3252         if not UnlockFileEx(handle, 0,
3253                             whole_low, whole_high, f._lock_file_overlapped_p):
3254             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3255
3256 else:
3257     # Some platforms, such as Jython, is missing fcntl
3258     try:
3259         import fcntl
3260
3261         def _lock_file(f, exclusive):
3262             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3263
3264         def _unlock_file(f):
3265             fcntl.flock(f, fcntl.LOCK_UN)
3266     except ImportError:
3267         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3268
3269         def _lock_file(f, exclusive):
3270             raise IOError(UNSUPPORTED_MSG)
3271
3272         def _unlock_file(f):
3273             raise IOError(UNSUPPORTED_MSG)
3274
3275
3276 class locked_file(object):
3277     def __init__(self, filename, mode, encoding=None):
3278         assert mode in ['r', 'a', 'w']
3279         self.f = io.open(filename, mode, encoding=encoding)
3280         self.mode = mode
3281
3282     def __enter__(self):
3283         exclusive = self.mode != 'r'
3284         try:
3285             _lock_file(self.f, exclusive)
3286         except IOError:
3287             self.f.close()
3288             raise
3289         return self
3290
3291     def __exit__(self, etype, value, traceback):
3292         try:
3293             _unlock_file(self.f)
3294         finally:
3295             self.f.close()
3296
3297     def __iter__(self):
3298         return iter(self.f)
3299
3300     def write(self, *args):
3301         return self.f.write(*args)
3302
3303     def read(self, *args):
3304         return self.f.read(*args)
3305
3306
3307 def get_filesystem_encoding():
3308     encoding = sys.getfilesystemencoding()
3309     return encoding if encoding is not None else 'utf-8'
3310
3311
3312 def shell_quote(args):
3313     quoted_args = []
3314     encoding = get_filesystem_encoding()
3315     for a in args:
3316         if isinstance(a, bytes):
3317             # We may get a filename encoded with 'encodeFilename'
3318             a = a.decode(encoding)
3319         quoted_args.append(compat_shlex_quote(a))
3320     return ' '.join(quoted_args)
3321
3322
3323 def smuggle_url(url, data):
3324     """ Pass additional data in a URL for internal use. """
3325
3326     url, idata = unsmuggle_url(url, {})
3327     data.update(idata)
3328     sdata = compat_urllib_parse_urlencode(
3329         {'__youtubedl_smuggle': json.dumps(data)})
3330     return url + '#' + sdata
3331
3332
3333 def unsmuggle_url(smug_url, default=None):
3334     if '#__youtubedl_smuggle' not in smug_url:
3335         return smug_url, default
3336     url, _, sdata = smug_url.rpartition('#')
3337     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3338     data = json.loads(jsond)
3339     return url, data
3340
3341
3342 def format_bytes(bytes):
3343     if bytes is None:
3344         return 'N/A'
3345     if type(bytes) is str:
3346         bytes = float(bytes)
3347     if bytes == 0.0:
3348         exponent = 0
3349     else:
3350         exponent = int(math.log(bytes, 1024.0))
3351     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3352     converted = float(bytes) / float(1024 ** exponent)
3353     return '%.2f%s' % (converted, suffix)
3354
3355
3356 def lookup_unit_table(unit_table, s):
3357     units_re = '|'.join(re.escape(u) for u in unit_table)
3358     m = re.match(
3359         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3360     if not m:
3361         return None
3362     num_str = m.group('num').replace(',', '.')
3363     mult = unit_table[m.group('unit')]
3364     return int(float(num_str) * mult)
3365
3366
3367 def parse_filesize(s):
3368     if s is None:
3369         return None
3370
3371     # The lower-case forms are of course incorrect and unofficial,
3372     # but we support those too
3373     _UNIT_TABLE = {
3374         'B': 1,
3375         'b': 1,
3376         'bytes': 1,
3377         'KiB': 1024,
3378         'KB': 1000,
3379         'kB': 1024,
3380         'Kb': 1000,
3381         'kb': 1000,
3382         'kilobytes': 1000,
3383         'kibibytes': 1024,
3384         'MiB': 1024 ** 2,
3385         'MB': 1000 ** 2,
3386         'mB': 1024 ** 2,
3387         'Mb': 1000 ** 2,
3388         'mb': 1000 ** 2,
3389         'megabytes': 1000 ** 2,
3390         'mebibytes': 1024 ** 2,
3391         'GiB': 1024 ** 3,
3392         'GB': 1000 ** 3,
3393         'gB': 1024 ** 3,
3394         'Gb': 1000 ** 3,
3395         'gb': 1000 ** 3,
3396         'gigabytes': 1000 ** 3,
3397         'gibibytes': 1024 ** 3,
3398         'TiB': 1024 ** 4,
3399         'TB': 1000 ** 4,
3400         'tB': 1024 ** 4,
3401         'Tb': 1000 ** 4,
3402         'tb': 1000 ** 4,
3403         'terabytes': 1000 ** 4,
3404         'tebibytes': 1024 ** 4,
3405         'PiB': 1024 ** 5,
3406         'PB': 1000 ** 5,
3407         'pB': 1024 ** 5,
3408         'Pb': 1000 ** 5,
3409         'pb': 1000 ** 5,
3410         'petabytes': 1000 ** 5,
3411         'pebibytes': 1024 ** 5,
3412         'EiB': 1024 ** 6,
3413         'EB': 1000 ** 6,
3414         'eB': 1024 ** 6,
3415         'Eb': 1000 ** 6,
3416         'eb': 1000 ** 6,
3417         'exabytes': 1000 ** 6,
3418         'exbibytes': 1024 ** 6,
3419         'ZiB': 1024 ** 7,
3420         'ZB': 1000 ** 7,
3421         'zB': 1024 ** 7,
3422         'Zb': 1000 ** 7,
3423         'zb': 1000 ** 7,
3424         'zettabytes': 1000 ** 7,
3425         'zebibytes': 1024 ** 7,
3426         'YiB': 1024 ** 8,
3427         'YB': 1000 ** 8,
3428         'yB': 1024 ** 8,
3429         'Yb': 1000 ** 8,
3430         'yb': 1000 ** 8,
3431         'yottabytes': 1000 ** 8,
3432         'yobibytes': 1024 ** 8,
3433     }
3434
3435     return lookup_unit_table(_UNIT_TABLE, s)
3436
3437
3438 def parse_count(s):
3439     if s is None:
3440         return None
3441
3442     s = s.strip()
3443
3444     if re.match(r'^[\d,.]+$', s):
3445         return str_to_int(s)
3446
3447     _UNIT_TABLE = {
3448         'k': 1000,
3449         'K': 1000,
3450         'm': 1000 ** 2,
3451         'M': 1000 ** 2,
3452         'kk': 1000 ** 2,
3453         'KK': 1000 ** 2,
3454     }
3455
3456     return lookup_unit_table(_UNIT_TABLE, s)
3457
3458
3459 def parse_resolution(s):
3460     if s is None:
3461         return {}
3462
3463     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3464     if mobj:
3465         return {
3466             'width': int(mobj.group('w')),
3467             'height': int(mobj.group('h')),
3468         }
3469
3470     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3471     if mobj:
3472         return {'height': int(mobj.group(1))}
3473
3474     mobj = re.search(r'\b([48])[kK]\b', s)
3475     if mobj:
3476         return {'height': int(mobj.group(1)) * 540}
3477
3478     return {}
3479
3480
3481 def parse_bitrate(s):
3482     if not isinstance(s, compat_str):
3483         return
3484     mobj = re.search(r'\b(\d+)\s*kbps', s)
3485     if mobj:
3486         return int(mobj.group(1))
3487
3488
3489 def month_by_name(name, lang='en'):
3490     """ Return the number of a month by (locale-independently) English name """
3491
3492     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3493
3494     try:
3495         return month_names.index(name) + 1
3496     except ValueError:
3497         return None
3498
3499
3500 def month_by_abbreviation(abbrev):
3501     """ Return the number of a month by (locale-independently) English
3502         abbreviations """
3503
3504     try:
3505         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3506     except ValueError:
3507         return None
3508
3509
3510 def fix_xml_ampersands(xml_str):
3511     """Replace all the '&' by '&amp;' in XML"""
3512     return re.sub(
3513         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3514         '&amp;',
3515         xml_str)
3516
3517
3518 def setproctitle(title):
3519     assert isinstance(title, compat_str)
3520
3521     # ctypes in Jython is not complete
3522     # http://bugs.jython.org/issue2148
3523     if sys.platform.startswith('java'):
3524         return
3525
3526     try:
3527         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3528     except OSError:
3529         return
3530     except TypeError:
3531         # LoadLibrary in Windows Python 2.7.13 only expects
3532         # a bytestring, but since unicode_literals turns
3533         # every string into a unicode string, it fails.
3534         return
3535     title_bytes = title.encode('utf-8')
3536     buf = ctypes.create_string_buffer(len(title_bytes))
3537     buf.value = title_bytes
3538     try:
3539         libc.prctl(15, buf, 0, 0, 0)
3540     except AttributeError:
3541         return  # Strange libc, just skip this
3542
3543
3544 def remove_start(s, start):
3545     return s[len(start):] if s is not None and s.startswith(start) else s
3546
3547
3548 def remove_end(s, end):
3549     return s[:-len(end)] if s is not None and s.endswith(end) else s
3550
3551
3552 def remove_quotes(s):
3553     if s is None or len(s) < 2:
3554         return s
3555     for quote in ('"', "'", ):
3556         if s[0] == quote and s[-1] == quote:
3557             return s[1:-1]
3558     return s
3559
3560
3561 def url_basename(url):
3562     path = compat_urlparse.urlparse(url).path
3563     return path.strip('/').split('/')[-1]
3564
3565
3566 def base_url(url):
3567     return re.match(r'https?://[^?#&]+/', url).group()
3568
3569
3570 def urljoin(base, path):
3571     if isinstance(path, bytes):
3572         path = path.decode('utf-8')
3573     if not isinstance(path, compat_str) or not path:
3574         return None
3575     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3576         return path
3577     if isinstance(base, bytes):
3578         base = base.decode('utf-8')
3579     if not isinstance(base, compat_str) or not re.match(
3580             r'^(?:https?:)?//', base):
3581         return None
3582     return compat_urlparse.urljoin(base, path)
3583
3584
3585 class HEADRequest(compat_urllib_request.Request):
3586     def get_method(self):
3587         return 'HEAD'
3588
3589
3590 class PUTRequest(compat_urllib_request.Request):
3591     def get_method(self):
3592         return 'PUT'
3593
3594
3595 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3596     if get_attr:
3597         if v is not None:
3598             v = getattr(v, get_attr, None)
3599     if v == '':
3600         v = None
3601     if v is None:
3602         return default
3603     try:
3604         return int(v) * invscale // scale
3605     except (ValueError, TypeError):
3606         return default
3607
3608
3609 def str_or_none(v, default=None):
3610     return default if v is None else compat_str(v)
3611
3612
3613 def str_to_int(int_str):
3614     """ A more relaxed version of int_or_none """
3615     if isinstance(int_str, compat_integer_types):
3616         return int_str
3617     elif isinstance(int_str, compat_str):
3618         int_str = re.sub(r'[,\.\+]', '', int_str)
3619         return int_or_none(int_str)
3620
3621
3622 def float_or_none(v, scale=1, invscale=1, default=None):
3623     if v is None:
3624         return default
3625     try:
3626         return float(v) * invscale / scale
3627     except (ValueError, TypeError):
3628         return default
3629
3630
3631 def bool_or_none(v, default=None):
3632     return v if isinstance(v, bool) else default
3633
3634
3635 def strip_or_none(v, default=None):
3636     return v.strip() if isinstance(v, compat_str) else default
3637
3638
3639 def url_or_none(url):
3640     if not url or not isinstance(url, compat_str):
3641         return None
3642     url = url.strip()
3643     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3644
3645
3646 def parse_duration(s):
3647     if not isinstance(s, compat_basestring):
3648         return None
3649
3650     s = s.strip()
3651
3652     days, hours, mins, secs, ms = [None] * 5
3653     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3654     if m:
3655         days, hours, mins, secs, ms = m.groups()
3656     else:
3657         m = re.match(
3658             r'''(?ix)(?:P?
3659                 (?:
3660                     [0-9]+\s*y(?:ears?)?\s*
3661                 )?
3662                 (?:
3663                     [0-9]+\s*m(?:onths?)?\s*
3664                 )?
3665                 (?:
3666                     [0-9]+\s*w(?:eeks?)?\s*
3667                 )?
3668                 (?:
3669                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3670                 )?
3671                 T)?
3672                 (?:
3673                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3674                 )?
3675                 (?:
3676                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3677                 )?
3678                 (?:
3679                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3680                 )?Z?$''', s)
3681         if m:
3682             days, hours, mins, secs, ms = m.groups()
3683         else:
3684             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3685             if m:
3686                 hours, mins = m.groups()
3687             else:
3688                 return None
3689
3690     duration = 0
3691     if secs:
3692         duration += float(secs)
3693     if mins:
3694         duration += float(mins) * 60
3695     if hours:
3696         duration += float(hours) * 60 * 60
3697     if days:
3698         duration += float(days) * 24 * 60 * 60
3699     if ms:
3700         duration += float(ms)
3701     return duration
3702
3703
3704 def prepend_extension(filename, ext, expected_real_ext=None):
3705     name, real_ext = os.path.splitext(filename)
3706     return (
3707         '{0}.{1}{2}'.format(name, ext, real_ext)
3708         if not expected_real_ext or real_ext[1:] == expected_real_ext
3709         else '{0}.{1}'.format(filename, ext))
3710
3711
3712 def replace_extension(filename, ext, expected_real_ext=None):
3713     name, real_ext = os.path.splitext(filename)
3714     return '{0}.{1}'.format(
3715         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3716         ext)
3717
3718
3719 def check_executable(exe, args=[]):
3720     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3721     args can be a list of arguments for a short output (like -version) """
3722     try:
3723         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3724     except OSError:
3725         return False
3726     return exe
3727
3728
3729 def get_exe_version(exe, args=['--version'],
3730                     version_re=None, unrecognized='present'):
3731     """ Returns the version of the specified executable,
3732     or False if the executable is not present """
3733     try:
3734         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3735         # SIGTTOU if youtube-dl is run in the background.
3736         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3737         out, _ = subprocess.Popen(
3738             [encodeArgument(exe)] + args,
3739             stdin=subprocess.PIPE,
3740             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3741     except OSError:
3742         return False
3743     if isinstance(out, bytes):  # Python 2.x
3744         out = out.decode('ascii', 'ignore')
3745     return detect_exe_version(out, version_re, unrecognized)
3746
3747
3748 def detect_exe_version(output, version_re=None, unrecognized='present'):
3749     assert isinstance(output, compat_str)
3750     if version_re is None:
3751         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3752     m = re.search(version_re, output)
3753     if m:
3754         return m.group(1)
3755     else:
3756         return unrecognized
3757
3758
3759 class PagedList(object):
3760     def __len__(self):
3761         # This is only useful for tests
3762         return len(self.getslice())
3763
3764
3765 class OnDemandPagedList(PagedList):
3766     def __init__(self, pagefunc, pagesize, use_cache=True):
3767         self._pagefunc = pagefunc
3768         self._pagesize = pagesize
3769         self._use_cache = use_cache
3770         if use_cache:
3771             self._cache = {}
3772
3773     def getslice(self, start=0, end=None):
3774         res = []
3775         for pagenum in itertools.count(start // self._pagesize):
3776             firstid = pagenum * self._pagesize
3777             nextfirstid = pagenum * self._pagesize + self._pagesize
3778             if start >= nextfirstid:
3779                 continue
3780
3781             page_results = None
3782             if self._use_cache:
3783                 page_results = self._cache.get(pagenum)
3784             if page_results is None:
3785                 page_results = list(self._pagefunc(pagenum))
3786             if self._use_cache:
3787                 self._cache[pagenum] = page_results
3788
3789             startv = (
3790                 start % self._pagesize
3791                 if firstid <= start < nextfirstid
3792                 else 0)
3793
3794             endv = (
3795                 ((end - 1) % self._pagesize) + 1
3796                 if (end is not None and firstid <= end <= nextfirstid)
3797                 else None)
3798
3799             if startv != 0 or endv is not None:
3800                 page_results = page_results[startv:endv]
3801             res.extend(page_results)
3802
3803             # A little optimization - if current page is not "full", ie. does
3804             # not contain page_size videos then we can assume that this page
3805             # is the last one - there are no more ids on further pages -
3806             # i.e. no need to query again.
3807             if len(page_results) + startv < self._pagesize:
3808                 break
3809
3810             # If we got the whole page, but the next page is not interesting,
3811             # break out early as well
3812             if end == nextfirstid:
3813                 break
3814         return res
3815
3816
3817 class InAdvancePagedList(PagedList):
3818     def __init__(self, pagefunc, pagecount, pagesize):
3819         self._pagefunc = pagefunc
3820         self._pagecount = pagecount
3821         self._pagesize = pagesize
3822
3823     def getslice(self, start=0, end=None):
3824         res = []
3825         start_page = start // self._pagesize
3826         end_page = (
3827             self._pagecount if end is None else (end // self._pagesize + 1))
3828         skip_elems = start - start_page * self._pagesize
3829         only_more = None if end is None else end - start
3830         for pagenum in range(start_page, end_page):
3831             page = list(self._pagefunc(pagenum))
3832             if skip_elems:
3833                 page = page[skip_elems:]
3834                 skip_elems = None
3835             if only_more is not None:
3836                 if len(page) < only_more:
3837                     only_more -= len(page)
3838                 else:
3839                     page = page[:only_more]
3840                     res.extend(page)
3841                     break
3842             res.extend(page)
3843         return res
3844
3845
3846 def uppercase_escape(s):
3847     unicode_escape = codecs.getdecoder('unicode_escape')
3848     return re.sub(
3849         r'\\U[0-9a-fA-F]{8}',
3850         lambda m: unicode_escape(m.group(0))[0],
3851         s)
3852
3853
3854 def lowercase_escape(s):
3855     unicode_escape = codecs.getdecoder('unicode_escape')
3856     return re.sub(
3857         r'\\u[0-9a-fA-F]{4}',
3858         lambda m: unicode_escape(m.group(0))[0],
3859         s)
3860
3861
3862 def escape_rfc3986(s):
3863     """Escape non-ASCII characters as suggested by RFC 3986"""
3864     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3865         s = s.encode('utf-8')
3866     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3867
3868
3869 def escape_url(url):
3870     """Escape URL as suggested by RFC 3986"""
3871     url_parsed = compat_urllib_parse_urlparse(url)
3872     return url_parsed._replace(
3873         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3874         path=escape_rfc3986(url_parsed.path),
3875         params=escape_rfc3986(url_parsed.params),
3876         query=escape_rfc3986(url_parsed.query),
3877         fragment=escape_rfc3986(url_parsed.fragment)
3878     ).geturl()
3879
3880
3881 def read_batch_urls(batch_fd):
3882     def fixup(url):
3883         if not isinstance(url, compat_str):
3884             url = url.decode('utf-8', 'replace')
3885         BOM_UTF8 = '\xef\xbb\xbf'
3886         if url.startswith(BOM_UTF8):
3887             url = url[len(BOM_UTF8):]
3888         url = url.strip()
3889         if url.startswith(('#', ';', ']')):
3890             return False
3891         return url
3892
3893     with contextlib.closing(batch_fd) as fd:
3894         return [url for url in map(fixup, fd) if url]
3895
3896
3897 def urlencode_postdata(*args, **kargs):
3898     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3899
3900
3901 def update_url_query(url, query):
3902     if not query:
3903         return url
3904     parsed_url = compat_urlparse.urlparse(url)
3905     qs = compat_parse_qs(parsed_url.query)
3906     qs.update(query)
3907     return compat_urlparse.urlunparse(parsed_url._replace(
3908         query=compat_urllib_parse_urlencode(qs, True)))
3909
3910
3911 def update_Request(req, url=None, data=None, headers={}, query={}):
3912     req_headers = req.headers.copy()
3913     req_headers.update(headers)
3914     req_data = data or req.data
3915     req_url = update_url_query(url or req.get_full_url(), query)
3916     req_get_method = req.get_method()
3917     if req_get_method == 'HEAD':
3918         req_type = HEADRequest
3919     elif req_get_method == 'PUT':
3920         req_type = PUTRequest
3921     else:
3922         req_type = compat_urllib_request.Request
3923     new_req = req_type(
3924         req_url, data=req_data, headers=req_headers,
3925         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3926     if hasattr(req, 'timeout'):
3927         new_req.timeout = req.timeout
3928     return new_req
3929
3930
3931 def _multipart_encode_impl(data, boundary):
3932     content_type = 'multipart/form-data; boundary=%s' % boundary
3933
3934     out = b''
3935     for k, v in data.items():
3936         out += b'--' + boundary.encode('ascii') + b'\r\n'
3937         if isinstance(k, compat_str):
3938             k = k.encode('utf-8')
3939         if isinstance(v, compat_str):
3940             v = v.encode('utf-8')
3941         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3942         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3943         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3944         if boundary.encode('ascii') in content:
3945             raise ValueError('Boundary overlaps with data')
3946         out += content
3947
3948     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3949
3950     return out, content_type
3951
3952
3953 def multipart_encode(data, boundary=None):
3954     '''
3955     Encode a dict to RFC 7578-compliant form-data
3956
3957     data:
3958         A dict where keys and values can be either Unicode or bytes-like
3959         objects.
3960     boundary:
3961         If specified a Unicode object, it's used as the boundary. Otherwise
3962         a random boundary is generated.
3963
3964     Reference: https://tools.ietf.org/html/rfc7578
3965     '''
3966     has_specified_boundary = boundary is not None
3967
3968     while True:
3969         if boundary is None:
3970             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3971
3972         try:
3973             out, content_type = _multipart_encode_impl(data, boundary)
3974             break
3975         except ValueError:
3976             if has_specified_boundary:
3977                 raise
3978             boundary = None
3979
3980     return out, content_type
3981
3982
3983 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3984     if isinstance(key_or_keys, (list, tuple)):
3985         for key in key_or_keys:
3986             if key not in d or d[key] is None or skip_false_values and not d[key]:
3987                 continue
3988             return d[key]
3989         return default
3990     return d.get(key_or_keys, default)
3991
3992
3993 def try_get(src, getter, expected_type=None):
3994     if not isinstance(getter, (list, tuple)):
3995         getter = [getter]
3996     for get in getter:
3997         try:
3998             v = get(src)
3999         except (AttributeError, KeyError, TypeError, IndexError):
4000             pass
4001         else:
4002             if expected_type is None or isinstance(v, expected_type):
4003                 return v
4004
4005
4006 def merge_dicts(*dicts):
4007     merged = {}
4008     for a_dict in dicts:
4009         for k, v in a_dict.items():
4010             if v is None:
4011                 continue
4012             if (k not in merged
4013                     or (isinstance(v, compat_str) and v
4014                         and isinstance(merged[k], compat_str)
4015                         and not merged[k])):
4016                 merged[k] = v
4017     return merged
4018
4019
4020 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4021     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4022
4023
4024 US_RATINGS = {
4025     'G': 0,
4026     'PG': 10,
4027     'PG-13': 13,
4028     'R': 16,
4029     'NC': 18,
4030 }
4031
4032
4033 TV_PARENTAL_GUIDELINES = {
4034     'TV-Y': 0,
4035     'TV-Y7': 7,
4036     'TV-G': 0,
4037     'TV-PG': 0,
4038     'TV-14': 14,
4039     'TV-MA': 17,
4040 }
4041
4042
4043 def parse_age_limit(s):
4044     if type(s) == int:
4045         return s if 0 <= s <= 21 else None
4046     if not isinstance(s, compat_basestring):
4047         return None
4048     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4049     if m:
4050         return int(m.group('age'))
4051     if s in US_RATINGS:
4052         return US_RATINGS[s]
4053     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4054     if m:
4055         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4056     return None
4057
4058
4059 def strip_jsonp(code):
4060     return re.sub(
4061         r'''(?sx)^
4062             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4063             (?:\s*&&\s*(?P=func_name))?
4064             \s*\(\s*(?P<callback_data>.*)\);?
4065             \s*?(?://[^\n]*)*$''',
4066         r'\g<callback_data>', code)
4067
4068
4069 def js_to_json(code):
4070     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4071     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4072     INTEGER_TABLE = (
4073         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4074         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4075     )
4076
4077     def fix_kv(m):
4078         v = m.group(0)
4079         if v in ('true', 'false', 'null'):
4080             return v
4081         elif v.startswith('/*') or v.startswith('//') or v == ',':
4082             return ""
4083
4084         if v[0] in ("'", '"'):
4085             v = re.sub(r'(?s)\\.|"', lambda m: {
4086                 '"': '\\"',
4087                 "\\'": "'",
4088                 '\\\n': '',
4089                 '\\x': '\\u00',
4090             }.get(m.group(0), m.group(0)), v[1:-1])
4091
4092         for regex, base in INTEGER_TABLE:
4093             im = re.match(regex, v)
4094             if im:
4095                 i = int(im.group(1), base)
4096                 return '"%d":' % i if v.endswith(':') else '%d' % i
4097
4098         return '"%s"' % v
4099
4100     return re.sub(r'''(?sx)
4101         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4102         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4103         {comment}|,(?={skip}[\]}}])|
4104         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4105         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4106         [0-9]+(?={skip}:)
4107         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4108
4109
4110 def qualities(quality_ids):
4111     """ Get a numeric quality value out of a list of possible values """
4112     def q(qid):
4113         try:
4114             return quality_ids.index(qid)
4115         except ValueError:
4116             return -1
4117     return q
4118
4119
4120 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4121
4122
4123 def limit_length(s, length):
4124     """ Add ellipses to overly long strings """
4125     if s is None:
4126         return None
4127     ELLIPSES = '...'
4128     if len(s) > length:
4129         return s[:length - len(ELLIPSES)] + ELLIPSES
4130     return s
4131
4132
4133 def version_tuple(v):
4134     return tuple(int(e) for e in re.split(r'[-.]', v))
4135
4136
4137 def is_outdated_version(version, limit, assume_new=True):
4138     if not version:
4139         return not assume_new
4140     try:
4141         return version_tuple(version) < version_tuple(limit)
4142     except ValueError:
4143         return not assume_new
4144
4145
4146 def ytdl_is_updateable():
4147     """ Returns if youtube-dl can be updated with -U """
4148     from zipimport import zipimporter
4149
4150     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4151
4152
4153 def args_to_str(args):
4154     # Get a short string representation for a subprocess command
4155     return ' '.join(compat_shlex_quote(a) for a in args)
4156
4157
4158 def error_to_compat_str(err):
4159     err_str = str(err)
4160     # On python 2 error byte string must be decoded with proper
4161     # encoding rather than ascii
4162     if sys.version_info[0] < 3:
4163         err_str = err_str.decode(preferredencoding())
4164     return err_str
4165
4166
4167 def mimetype2ext(mt):
4168     if mt is None:
4169         return None
4170
4171     ext = {
4172         'audio/mp4': 'm4a',
4173         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4174         # it's the most popular one
4175         'audio/mpeg': 'mp3',
4176     }.get(mt)
4177     if ext is not None:
4178         return ext
4179
4180     _, _, res = mt.rpartition('/')
4181     res = res.split(';')[0].strip().lower()
4182
4183     return {
4184         '3gpp': '3gp',
4185         'smptett+xml': 'tt',
4186         'ttaf+xml': 'dfxp',
4187         'ttml+xml': 'ttml',
4188         'x-flv': 'flv',
4189         'x-mp4-fragmented': 'mp4',
4190         'x-ms-sami': 'sami',
4191         'x-ms-wmv': 'wmv',
4192         'mpegurl': 'm3u8',
4193         'x-mpegurl': 'm3u8',
4194         'vnd.apple.mpegurl': 'm3u8',
4195         'dash+xml': 'mpd',
4196         'f4m+xml': 'f4m',
4197         'hds+xml': 'f4m',
4198         'vnd.ms-sstr+xml': 'ism',
4199         'quicktime': 'mov',
4200         'mp2t': 'ts',
4201     }.get(res, res)
4202
4203
4204 def parse_codecs(codecs_str):
4205     # http://tools.ietf.org/html/rfc6381
4206     if not codecs_str:
4207         return {}
4208     splited_codecs = list(filter(None, map(
4209         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4210     vcodec, acodec = None, None
4211     for full_codec in splited_codecs:
4212         codec = full_codec.split('.')[0]
4213         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4214             if not vcodec:
4215                 vcodec = full_codec
4216         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4217             if not acodec:
4218                 acodec = full_codec
4219         else:
4220             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4221     if not vcodec and not acodec:
4222         if len(splited_codecs) == 2:
4223             return {
4224                 'vcodec': splited_codecs[0],
4225                 'acodec': splited_codecs[1],
4226             }
4227     else:
4228         return {
4229             'vcodec': vcodec or 'none',
4230             'acodec': acodec or 'none',
4231         }
4232     return {}
4233
4234
4235 def urlhandle_detect_ext(url_handle):
4236     getheader = url_handle.headers.get
4237
4238     cd = getheader('Content-Disposition')
4239     if cd:
4240         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4241         if m:
4242             e = determine_ext(m.group('filename'), default_ext=None)
4243             if e:
4244                 return e
4245
4246     return mimetype2ext(getheader('Content-Type'))
4247
4248
4249 def encode_data_uri(data, mime_type):
4250     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4251
4252
4253 def age_restricted(content_limit, age_limit):
4254     """ Returns True iff the content should be blocked """
4255
4256     if age_limit is None:  # No limit set
4257         return False
4258     if content_limit is None:
4259         return False  # Content available for everyone
4260     return age_limit < content_limit
4261
4262
4263 def is_html(first_bytes):
4264     """ Detect whether a file contains HTML by examining its first bytes. """
4265
4266     BOMS = [
4267         (b'\xef\xbb\xbf', 'utf-8'),
4268         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4269         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4270         (b'\xff\xfe', 'utf-16-le'),
4271         (b'\xfe\xff', 'utf-16-be'),
4272     ]
4273     for bom, enc in BOMS:
4274         if first_bytes.startswith(bom):
4275             s = first_bytes[len(bom):].decode(enc, 'replace')
4276             break
4277     else:
4278         s = first_bytes.decode('utf-8', 'replace')
4279
4280     return re.match(r'^\s*<', s)
4281
4282
4283 def determine_protocol(info_dict):
4284     protocol = info_dict.get('protocol')
4285     if protocol is not None:
4286         return protocol
4287
4288     url = info_dict['url']
4289     if url.startswith('rtmp'):
4290         return 'rtmp'
4291     elif url.startswith('mms'):
4292         return 'mms'
4293     elif url.startswith('rtsp'):
4294         return 'rtsp'
4295
4296     ext = determine_ext(url)
4297     if ext == 'm3u8':
4298         return 'm3u8'
4299     elif ext == 'f4m':
4300         return 'f4m'
4301
4302     return compat_urllib_parse_urlparse(url).scheme
4303
4304
4305 def render_table(header_row, data):
4306     """ Render a list of rows, each as a list of values """
4307     table = [header_row] + data
4308     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4309     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4310     return '\n'.join(format_str % tuple(row) for row in table)
4311
4312
4313 def _match_one(filter_part, dct):
4314     COMPARISON_OPERATORS = {
4315         '<': operator.lt,
4316         '<=': operator.le,
4317         '>': operator.gt,
4318         '>=': operator.ge,
4319         '=': operator.eq,
4320         '!=': operator.ne,
4321     }
4322     operator_rex = re.compile(r'''(?x)\s*
4323         (?P<key>[a-z_]+)
4324         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4325         (?:
4326             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4327             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4328             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4329         )
4330         \s*$
4331         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4332     m = operator_rex.search(filter_part)
4333     if m:
4334         op = COMPARISON_OPERATORS[m.group('op')]
4335         actual_value = dct.get(m.group('key'))
4336         if (m.group('quotedstrval') is not None
4337             or m.group('strval') is not None
4338             # If the original field is a string and matching comparisonvalue is
4339             # a number we should respect the origin of the original field
4340             # and process comparison value as a string (see
4341             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4342             or actual_value is not None and m.group('intval') is not None
4343                 and isinstance(actual_value, compat_str)):
4344             if m.group('op') not in ('=', '!='):
4345                 raise ValueError(
4346                     'Operator %s does not support string values!' % m.group('op'))
4347             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4348             quote = m.group('quote')
4349             if quote is not None:
4350                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4351         else:
4352             try:
4353                 comparison_value = int(m.group('intval'))
4354             except ValueError:
4355                 comparison_value = parse_filesize(m.group('intval'))
4356                 if comparison_value is None:
4357                     comparison_value = parse_filesize(m.group('intval') + 'B')
4358                 if comparison_value is None:
4359                     raise ValueError(
4360                         'Invalid integer value %r in filter part %r' % (
4361                             m.group('intval'), filter_part))
4362         if actual_value is None:
4363             return m.group('none_inclusive')
4364         return op(actual_value, comparison_value)
4365
4366     UNARY_OPERATORS = {
4367         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4368         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4369     }
4370     operator_rex = re.compile(r'''(?x)\s*
4371         (?P<op>%s)\s*(?P<key>[a-z_]+)
4372         \s*$
4373         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4374     m = operator_rex.search(filter_part)
4375     if m:
4376         op = UNARY_OPERATORS[m.group('op')]
4377         actual_value = dct.get(m.group('key'))
4378         return op(actual_value)
4379
4380     raise ValueError('Invalid filter part %r' % filter_part)
4381
4382
4383 def match_str(filter_str, dct):
4384     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4385
4386     return all(
4387         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4388
4389
4390 def match_filter_func(filter_str):
4391     def _match_func(info_dict):
4392         if match_str(filter_str, info_dict):
4393             return None
4394         else:
4395             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4396             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4397     return _match_func
4398
4399
4400 def parse_dfxp_time_expr(time_expr):
4401     if not time_expr:
4402         return
4403
4404     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4405     if mobj:
4406         return float(mobj.group('time_offset'))
4407
4408     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4409     if mobj:
4410         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4411
4412
4413 def srt_subtitles_timecode(seconds):
4414     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4415
4416
4417 def dfxp2srt(dfxp_data):
4418     '''
4419     @param dfxp_data A bytes-like object containing DFXP data
4420     @returns A unicode object containing converted SRT data
4421     '''
4422     LEGACY_NAMESPACES = (
4423         (b'http://www.w3.org/ns/ttml', [
4424             b'http://www.w3.org/2004/11/ttaf1',
4425             b'http://www.w3.org/2006/04/ttaf1',
4426             b'http://www.w3.org/2006/10/ttaf1',
4427         ]),
4428         (b'http://www.w3.org/ns/ttml#styling', [
4429             b'http://www.w3.org/ns/ttml#style',
4430         ]),
4431     )
4432
4433     SUPPORTED_STYLING = [
4434         'color',
4435         'fontFamily',
4436         'fontSize',
4437         'fontStyle',
4438         'fontWeight',
4439         'textDecoration'
4440     ]
4441
4442     _x = functools.partial(xpath_with_ns, ns_map={
4443         'xml': 'http://www.w3.org/XML/1998/namespace',
4444         'ttml': 'http://www.w3.org/ns/ttml',
4445         'tts': 'http://www.w3.org/ns/ttml#styling',
4446     })
4447
4448     styles = {}
4449     default_style = {}
4450
4451     class TTMLPElementParser(object):
4452         _out = ''
4453         _unclosed_elements = []
4454         _applied_styles = []
4455
4456         def start(self, tag, attrib):
4457             if tag in (_x('ttml:br'), 'br'):
4458                 self._out += '\n'
4459             else:
4460                 unclosed_elements = []
4461                 style = {}
4462                 element_style_id = attrib.get('style')
4463                 if default_style:
4464                     style.update(default_style)
4465                 if element_style_id:
4466                     style.update(styles.get(element_style_id, {}))
4467                 for prop in SUPPORTED_STYLING:
4468                     prop_val = attrib.get(_x('tts:' + prop))
4469                     if prop_val:
4470                         style[prop] = prop_val
4471                 if style:
4472                     font = ''
4473                     for k, v in sorted(style.items()):
4474                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4475                             continue
4476                         if k == 'color':
4477                             font += ' color="%s"' % v
4478                         elif k == 'fontSize':
4479                             font += ' size="%s"' % v
4480                         elif k == 'fontFamily':
4481                             font += ' face="%s"' % v
4482                         elif k == 'fontWeight' and v == 'bold':
4483                             self._out += '<b>'
4484                             unclosed_elements.append('b')
4485                         elif k == 'fontStyle' and v == 'italic':
4486                             self._out += '<i>'
4487                             unclosed_elements.append('i')
4488                         elif k == 'textDecoration' and v == 'underline':
4489                             self._out += '<u>'
4490                             unclosed_elements.append('u')
4491                     if font:
4492                         self._out += '<font' + font + '>'
4493                         unclosed_elements.append('font')
4494                     applied_style = {}
4495                     if self._applied_styles:
4496                         applied_style.update(self._applied_styles[-1])
4497                     applied_style.update(style)
4498                     self._applied_styles.append(applied_style)
4499                 self._unclosed_elements.append(unclosed_elements)
4500
4501         def end(self, tag):
4502             if tag not in (_x('ttml:br'), 'br'):
4503                 unclosed_elements = self._unclosed_elements.pop()
4504                 for element in reversed(unclosed_elements):
4505                     self._out += '</%s>' % element
4506                 if unclosed_elements and self._applied_styles:
4507                     self._applied_styles.pop()
4508
4509         def data(self, data):
4510             self._out += data
4511
4512         def close(self):
4513             return self._out.strip()
4514
4515     def parse_node(node):
4516         target = TTMLPElementParser()
4517         parser = xml.etree.ElementTree.XMLParser(target=target)
4518         parser.feed(xml.etree.ElementTree.tostring(node))
4519         return parser.close()
4520
4521     for k, v in LEGACY_NAMESPACES:
4522         for ns in v:
4523             dfxp_data = dfxp_data.replace(ns, k)
4524
4525     dfxp = compat_etree_fromstring(dfxp_data)
4526     out = []
4527     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4528
4529     if not paras:
4530         raise ValueError('Invalid dfxp/TTML subtitle')
4531
4532     repeat = False
4533     while True:
4534         for style in dfxp.findall(_x('.//ttml:style')):
4535             style_id = style.get('id') or style.get(_x('xml:id'))
4536             if not style_id:
4537                 continue
4538             parent_style_id = style.get('style')
4539             if parent_style_id:
4540                 if parent_style_id not in styles:
4541                     repeat = True
4542                     continue
4543                 styles[style_id] = styles[parent_style_id].copy()
4544             for prop in SUPPORTED_STYLING:
4545                 prop_val = style.get(_x('tts:' + prop))
4546                 if prop_val:
4547                     styles.setdefault(style_id, {})[prop] = prop_val
4548         if repeat:
4549             repeat = False
4550         else:
4551             break
4552
4553     for p in ('body', 'div'):
4554         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4555         if ele is None:
4556             continue
4557         style = styles.get(ele.get('style'))
4558         if not style:
4559             continue
4560         default_style.update(style)
4561
4562     for para, index in zip(paras, itertools.count(1)):
4563         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4564         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4565         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4566         if begin_time is None:
4567             continue
4568         if not end_time:
4569             if not dur:
4570                 continue
4571             end_time = begin_time + dur
4572         out.append('%d\n%s --> %s\n%s\n\n' % (
4573             index,
4574             srt_subtitles_timecode(begin_time),
4575             srt_subtitles_timecode(end_time),
4576             parse_node(para)))
4577
4578     return ''.join(out)
4579
4580
4581 def cli_option(params, command_option, param):
4582     param = params.get(param)
4583     if param:
4584         param = compat_str(param)
4585     return [command_option, param] if param is not None else []
4586
4587
4588 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4589     param = params.get(param)
4590     if param is None:
4591         return []
4592     assert isinstance(param, bool)
4593     if separator:
4594         return [command_option + separator + (true_value if param else false_value)]
4595     return [command_option, true_value if param else false_value]
4596
4597
4598 def cli_valueless_option(params, command_option, param, expected_value=True):
4599     param = params.get(param)
4600     return [command_option] if param == expected_value else []
4601
4602
4603 def cli_configuration_args(params, param, default=[]):
4604     ex_args = params.get(param)
4605     if ex_args is None:
4606         return default
4607     assert isinstance(ex_args, list)
4608     return ex_args
4609
4610
4611 class ISO639Utils(object):
4612     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4613     _lang_map = {
4614         'aa': 'aar',
4615         'ab': 'abk',
4616         'ae': 'ave',
4617         'af': 'afr',
4618         'ak': 'aka',
4619         'am': 'amh',
4620         'an': 'arg',
4621         'ar': 'ara',
4622         'as': 'asm',
4623         'av': 'ava',
4624         'ay': 'aym',
4625         'az': 'aze',
4626         'ba': 'bak',
4627         'be': 'bel',
4628         'bg': 'bul',
4629         'bh': 'bih',
4630         'bi': 'bis',
4631         'bm': 'bam',
4632         'bn': 'ben',
4633         'bo': 'bod',
4634         'br': 'bre',
4635         'bs': 'bos',
4636         'ca': 'cat',
4637         'ce': 'che',
4638         'ch': 'cha',
4639         'co': 'cos',
4640         'cr': 'cre',
4641         'cs': 'ces',
4642         'cu': 'chu',
4643         'cv': 'chv',
4644         'cy': 'cym',
4645         'da': 'dan',
4646         'de': 'deu',
4647         'dv': 'div',
4648         'dz': 'dzo',
4649         'ee': 'ewe',
4650         'el': 'ell',
4651         'en': 'eng',
4652         'eo': 'epo',
4653         'es': 'spa',
4654         'et': 'est',
4655         'eu': 'eus',
4656         'fa': 'fas',
4657         'ff': 'ful',
4658         'fi': 'fin',
4659         'fj': 'fij',
4660         'fo': 'fao',
4661         'fr': 'fra',
4662         'fy': 'fry',
4663         'ga': 'gle',
4664         'gd': 'gla',
4665         'gl': 'glg',
4666         'gn': 'grn',
4667         'gu': 'guj',
4668         'gv': 'glv',
4669         'ha': 'hau',
4670         'he': 'heb',
4671         'iw': 'heb',  # Replaced by he in 1989 revision
4672         'hi': 'hin',
4673         'ho': 'hmo',
4674         'hr': 'hrv',
4675         'ht': 'hat',
4676         'hu': 'hun',
4677         'hy': 'hye',
4678         'hz': 'her',
4679         'ia': 'ina',
4680         'id': 'ind',
4681         'in': 'ind',  # Replaced by id in 1989 revision
4682         'ie': 'ile',
4683         'ig': 'ibo',
4684         'ii': 'iii',
4685         'ik': 'ipk',
4686         'io': 'ido',
4687         'is': 'isl',
4688         'it': 'ita',
4689         'iu': 'iku',
4690         'ja': 'jpn',
4691         'jv': 'jav',
4692         'ka': 'kat',
4693         'kg': 'kon',
4694         'ki': 'kik',
4695         'kj': 'kua',
4696         'kk': 'kaz',
4697         'kl': 'kal',
4698         'km': 'khm',
4699         'kn': 'kan',
4700         'ko': 'kor',
4701         'kr': 'kau',
4702         'ks': 'kas',
4703         'ku': 'kur',
4704         'kv': 'kom',
4705         'kw': 'cor',
4706         'ky': 'kir',
4707         'la': 'lat',
4708         'lb': 'ltz',
4709         'lg': 'lug',
4710         'li': 'lim',
4711         'ln': 'lin',
4712         'lo': 'lao',
4713         'lt': 'lit',
4714         'lu': 'lub',
4715         'lv': 'lav',
4716         'mg': 'mlg',
4717         'mh': 'mah',
4718         'mi': 'mri',
4719         'mk': 'mkd',
4720         'ml': 'mal',
4721         'mn': 'mon',
4722         'mr': 'mar',
4723         'ms': 'msa',
4724         'mt': 'mlt',
4725         'my': 'mya',
4726         'na': 'nau',
4727         'nb': 'nob',
4728         'nd': 'nde',
4729         'ne': 'nep',
4730         'ng': 'ndo',
4731         'nl': 'nld',
4732         'nn': 'nno',
4733         'no': 'nor',
4734         'nr': 'nbl',
4735         'nv': 'nav',
4736         'ny': 'nya',
4737         'oc': 'oci',
4738         'oj': 'oji',
4739         'om': 'orm',
4740         'or': 'ori',
4741         'os': 'oss',
4742         'pa': 'pan',
4743         'pi': 'pli',
4744         'pl': 'pol',
4745         'ps': 'pus',
4746         'pt': 'por',
4747         'qu': 'que',
4748         'rm': 'roh',
4749         'rn': 'run',
4750         'ro': 'ron',
4751         'ru': 'rus',
4752         'rw': 'kin',
4753         'sa': 'san',
4754         'sc': 'srd',
4755         'sd': 'snd',
4756         'se': 'sme',
4757         'sg': 'sag',
4758         'si': 'sin',
4759         'sk': 'slk',
4760         'sl': 'slv',
4761         'sm': 'smo',
4762         'sn': 'sna',
4763         'so': 'som',
4764         'sq': 'sqi',
4765         'sr': 'srp',
4766         'ss': 'ssw',
4767         'st': 'sot',
4768         'su': 'sun',
4769         'sv': 'swe',
4770         'sw': 'swa',
4771         'ta': 'tam',
4772         'te': 'tel',
4773         'tg': 'tgk',
4774         'th': 'tha',
4775         'ti': 'tir',
4776         'tk': 'tuk',
4777         'tl': 'tgl',
4778         'tn': 'tsn',
4779         'to': 'ton',
4780         'tr': 'tur',
4781         'ts': 'tso',
4782         'tt': 'tat',
4783         'tw': 'twi',
4784         'ty': 'tah',
4785         'ug': 'uig',
4786         'uk': 'ukr',
4787         'ur': 'urd',
4788         'uz': 'uzb',
4789         've': 'ven',
4790         'vi': 'vie',
4791         'vo': 'vol',
4792         'wa': 'wln',
4793         'wo': 'wol',
4794         'xh': 'xho',
4795         'yi': 'yid',
4796         'ji': 'yid',  # Replaced by yi in 1989 revision
4797         'yo': 'yor',
4798         'za': 'zha',
4799         'zh': 'zho',
4800         'zu': 'zul',
4801     }
4802
4803     @classmethod
4804     def short2long(cls, code):
4805         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4806         return cls._lang_map.get(code[:2])
4807
4808     @classmethod
4809     def long2short(cls, code):
4810         """Convert language code from ISO 639-2/T to ISO 639-1"""
4811         for short_name, long_name in cls._lang_map.items():
4812             if long_name == code:
4813                 return short_name
4814
4815
4816 class ISO3166Utils(object):
4817     # From http://data.okfn.org/data/core/country-list
4818     _country_map = {
4819         'AF': 'Afghanistan',
4820         'AX': 'Åland Islands',
4821         'AL': 'Albania',
4822         'DZ': 'Algeria',
4823         'AS': 'American Samoa',
4824         'AD': 'Andorra',
4825         'AO': 'Angola',
4826         'AI': 'Anguilla',
4827         'AQ': 'Antarctica',
4828         'AG': 'Antigua and Barbuda',
4829         'AR': 'Argentina',
4830         'AM': 'Armenia',
4831         'AW': 'Aruba',
4832         'AU': 'Australia',
4833         'AT': 'Austria',
4834         'AZ': 'Azerbaijan',
4835         'BS': 'Bahamas',
4836         'BH': 'Bahrain',
4837         'BD': 'Bangladesh',
4838         'BB': 'Barbados',
4839         'BY': 'Belarus',
4840         'BE': 'Belgium',
4841         'BZ': 'Belize',
4842         'BJ': 'Benin',
4843         'BM': 'Bermuda',
4844         'BT': 'Bhutan',
4845         'BO': 'Bolivia, Plurinational State of',
4846         'BQ': 'Bonaire, Sint Eustatius and Saba',
4847         'BA': 'Bosnia and Herzegovina',
4848         'BW': 'Botswana',
4849         'BV': 'Bouvet Island',
4850         'BR': 'Brazil',
4851         'IO': 'British Indian Ocean Territory',
4852         'BN': 'Brunei Darussalam',
4853         'BG': 'Bulgaria',
4854         'BF': 'Burkina Faso',
4855         'BI': 'Burundi',
4856         'KH': 'Cambodia',
4857         'CM': 'Cameroon',
4858         'CA': 'Canada',
4859         'CV': 'Cape Verde',
4860         'KY': 'Cayman Islands',
4861         'CF': 'Central African Republic',
4862         'TD': 'Chad',
4863         'CL': 'Chile',
4864         'CN': 'China',
4865         'CX': 'Christmas Island',
4866         'CC': 'Cocos (Keeling) Islands',
4867         'CO': 'Colombia',
4868         'KM': 'Comoros',
4869         'CG': 'Congo',
4870         'CD': 'Congo, the Democratic Republic of the',
4871         'CK': 'Cook Islands',
4872         'CR': 'Costa Rica',
4873         'CI': 'Côte d\'Ivoire',
4874         'HR': 'Croatia',
4875         'CU': 'Cuba',
4876         'CW': 'Curaçao',
4877         'CY': 'Cyprus',
4878         'CZ': 'Czech Republic',
4879         'DK': 'Denmark',
4880         'DJ': 'Djibouti',
4881         'DM': 'Dominica',
4882         'DO': 'Dominican Republic',
4883         'EC': 'Ecuador',
4884         'EG': 'Egypt',
4885         'SV': 'El Salvador',
4886         'GQ': 'Equatorial Guinea',
4887         'ER': 'Eritrea',
4888         'EE': 'Estonia',
4889         'ET': 'Ethiopia',
4890         'FK': 'Falkland Islands (Malvinas)',
4891         'FO': 'Faroe Islands',
4892         'FJ': 'Fiji',
4893         'FI': 'Finland',
4894         'FR': 'France',
4895         'GF': 'French Guiana',
4896         'PF': 'French Polynesia',
4897         'TF': 'French Southern Territories',
4898         'GA': 'Gabon',
4899         'GM': 'Gambia',
4900         'GE': 'Georgia',
4901         'DE': 'Germany',
4902         'GH': 'Ghana',
4903         'GI': 'Gibraltar',
4904         'GR': 'Greece',
4905         'GL': 'Greenland',
4906         'GD': 'Grenada',
4907         'GP': 'Guadeloupe',
4908         'GU': 'Guam',
4909         'GT': 'Guatemala',
4910         'GG': 'Guernsey',
4911         'GN': 'Guinea',
4912         'GW': 'Guinea-Bissau',
4913         'GY': 'Guyana',
4914         'HT': 'Haiti',
4915         'HM': 'Heard Island and McDonald Islands',
4916         'VA': 'Holy See (Vatican City State)',
4917         'HN': 'Honduras',
4918         'HK': 'Hong Kong',
4919         'HU': 'Hungary',
4920         'IS': 'Iceland',
4921         'IN': 'India',
4922         'ID': 'Indonesia',
4923         'IR': 'Iran, Islamic Republic of',
4924         'IQ': 'Iraq',
4925         'IE': 'Ireland',
4926         'IM': 'Isle of Man',
4927         'IL': 'Israel',
4928         'IT': 'Italy',
4929         'JM': 'Jamaica',
4930         'JP': 'Japan',
4931         'JE': 'Jersey',
4932         'JO': 'Jordan',
4933         'KZ': 'Kazakhstan',
4934         'KE': 'Kenya',
4935         'KI': 'Kiribati',
4936         'KP': 'Korea, Democratic People\'s Republic of',
4937         'KR': 'Korea, Republic of',
4938         'KW': 'Kuwait',
4939         'KG': 'Kyrgyzstan',
4940         'LA': 'Lao People\'s Democratic Republic',
4941         'LV': 'Latvia',
4942         'LB': 'Lebanon',
4943         'LS': 'Lesotho',
4944         'LR': 'Liberia',
4945         'LY': 'Libya',
4946         'LI': 'Liechtenstein',
4947         'LT': 'Lithuania',
4948         'LU': 'Luxembourg',
4949         'MO': 'Macao',
4950         'MK': 'Macedonia, the Former Yugoslav Republic of',
4951         'MG': 'Madagascar',
4952         'MW': 'Malawi',
4953         'MY': 'Malaysia',
4954         'MV': 'Maldives',
4955         'ML': 'Mali',
4956         'MT': 'Malta',
4957         'MH': 'Marshall Islands',
4958         'MQ': 'Martinique',
4959         'MR': 'Mauritania',
4960         'MU': 'Mauritius',
4961         'YT': 'Mayotte',
4962         'MX': 'Mexico',
4963         'FM': 'Micronesia, Federated States of',
4964         'MD': 'Moldova, Republic of',
4965         'MC': 'Monaco',
4966         'MN': 'Mongolia',
4967         'ME': 'Montenegro',
4968         'MS': 'Montserrat',
4969         'MA': 'Morocco',
4970         'MZ': 'Mozambique',
4971         'MM': 'Myanmar',
4972         'NA': 'Namibia',
4973         'NR': 'Nauru',
4974         'NP': 'Nepal',
4975         'NL': 'Netherlands',
4976         'NC': 'New Caledonia',
4977         'NZ': 'New Zealand',
4978         'NI': 'Nicaragua',
4979         'NE': 'Niger',
4980         'NG': 'Nigeria',
4981         'NU': 'Niue',
4982         'NF': 'Norfolk Island',
4983         'MP': 'Northern Mariana Islands',
4984         'NO': 'Norway',
4985         'OM': 'Oman',
4986         'PK': 'Pakistan',
4987         'PW': 'Palau',
4988         'PS': 'Palestine, State of',
4989         'PA': 'Panama',
4990         'PG': 'Papua New Guinea',
4991         'PY': 'Paraguay',
4992         'PE': 'Peru',
4993         'PH': 'Philippines',
4994         'PN': 'Pitcairn',
4995         'PL': 'Poland',
4996         'PT': 'Portugal',
4997         'PR': 'Puerto Rico',
4998         'QA': 'Qatar',
4999         'RE': 'Réunion',
5000         'RO': 'Romania',
5001         'RU': 'Russian Federation',
5002         'RW': 'Rwanda',
5003         'BL': 'Saint Barthélemy',
5004         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5005         'KN': 'Saint Kitts and Nevis',
5006         'LC': 'Saint Lucia',
5007         'MF': 'Saint Martin (French part)',
5008         'PM': 'Saint Pierre and Miquelon',
5009         'VC': 'Saint Vincent and the Grenadines',
5010         'WS': 'Samoa',
5011         'SM': 'San Marino',
5012         'ST': 'Sao Tome and Principe',
5013         'SA': 'Saudi Arabia',
5014         'SN': 'Senegal',
5015         'RS': 'Serbia',
5016         'SC': 'Seychelles',
5017         'SL': 'Sierra Leone',
5018         'SG': 'Singapore',
5019         'SX': 'Sint Maarten (Dutch part)',
5020         'SK': 'Slovakia',
5021         'SI': 'Slovenia',
5022         'SB': 'Solomon Islands',
5023         'SO': 'Somalia',
5024         'ZA': 'South Africa',
5025         'GS': 'South Georgia and the South Sandwich Islands',
5026         'SS': 'South Sudan',
5027         'ES': 'Spain',
5028         'LK': 'Sri Lanka',
5029         'SD': 'Sudan',
5030         'SR': 'Suriname',
5031         'SJ': 'Svalbard and Jan Mayen',
5032         'SZ': 'Swaziland',
5033         'SE': 'Sweden',
5034         'CH': 'Switzerland',
5035         'SY': 'Syrian Arab Republic',
5036         'TW': 'Taiwan, Province of China',
5037         'TJ': 'Tajikistan',
5038         'TZ': 'Tanzania, United Republic of',
5039         'TH': 'Thailand',
5040         'TL': 'Timor-Leste',
5041         'TG': 'Togo',
5042         'TK': 'Tokelau',
5043         'TO': 'Tonga',
5044         'TT': 'Trinidad and Tobago',
5045         'TN': 'Tunisia',
5046         'TR': 'Turkey',
5047         'TM': 'Turkmenistan',
5048         'TC': 'Turks and Caicos Islands',
5049         'TV': 'Tuvalu',
5050         'UG': 'Uganda',
5051         'UA': 'Ukraine',
5052         'AE': 'United Arab Emirates',
5053         'GB': 'United Kingdom',
5054         'US': 'United States',
5055         'UM': 'United States Minor Outlying Islands',
5056         'UY': 'Uruguay',
5057         'UZ': 'Uzbekistan',
5058         'VU': 'Vanuatu',
5059         'VE': 'Venezuela, Bolivarian Republic of',
5060         'VN': 'Viet Nam',
5061         'VG': 'Virgin Islands, British',
5062         'VI': 'Virgin Islands, U.S.',
5063         'WF': 'Wallis and Futuna',
5064         'EH': 'Western Sahara',
5065         'YE': 'Yemen',
5066         'ZM': 'Zambia',
5067         'ZW': 'Zimbabwe',
5068     }
5069
5070     @classmethod
5071     def short2full(cls, code):
5072         """Convert an ISO 3166-2 country code to the corresponding full name"""
5073         return cls._country_map.get(code.upper())
5074
5075
5076 class GeoUtils(object):
5077     # Major IPv4 address blocks per country
5078     _country_ip_map = {
5079         'AD': '46.172.224.0/19',
5080         'AE': '94.200.0.0/13',
5081         'AF': '149.54.0.0/17',
5082         'AG': '209.59.64.0/18',
5083         'AI': '204.14.248.0/21',
5084         'AL': '46.99.0.0/16',
5085         'AM': '46.70.0.0/15',
5086         'AO': '105.168.0.0/13',
5087         'AP': '182.50.184.0/21',
5088         'AQ': '23.154.160.0/24',
5089         'AR': '181.0.0.0/12',
5090         'AS': '202.70.112.0/20',
5091         'AT': '77.116.0.0/14',
5092         'AU': '1.128.0.0/11',
5093         'AW': '181.41.0.0/18',
5094         'AX': '185.217.4.0/22',
5095         'AZ': '5.197.0.0/16',
5096         'BA': '31.176.128.0/17',
5097         'BB': '65.48.128.0/17',
5098         'BD': '114.130.0.0/16',
5099         'BE': '57.0.0.0/8',
5100         'BF': '102.178.0.0/15',
5101         'BG': '95.42.0.0/15',
5102         'BH': '37.131.0.0/17',
5103         'BI': '154.117.192.0/18',
5104         'BJ': '137.255.0.0/16',
5105         'BL': '185.212.72.0/23',
5106         'BM': '196.12.64.0/18',
5107         'BN': '156.31.0.0/16',
5108         'BO': '161.56.0.0/16',
5109         'BQ': '161.0.80.0/20',
5110         'BR': '191.128.0.0/12',
5111         'BS': '24.51.64.0/18',
5112         'BT': '119.2.96.0/19',
5113         'BW': '168.167.0.0/16',
5114         'BY': '178.120.0.0/13',
5115         'BZ': '179.42.192.0/18',
5116         'CA': '99.224.0.0/11',
5117         'CD': '41.243.0.0/16',
5118         'CF': '197.242.176.0/21',
5119         'CG': '160.113.0.0/16',
5120         'CH': '85.0.0.0/13',
5121         'CI': '102.136.0.0/14',
5122         'CK': '202.65.32.0/19',
5123         'CL': '152.172.0.0/14',
5124         'CM': '102.244.0.0/14',
5125         'CN': '36.128.0.0/10',
5126         'CO': '181.240.0.0/12',
5127         'CR': '201.192.0.0/12',
5128         'CU': '152.206.0.0/15',
5129         'CV': '165.90.96.0/19',
5130         'CW': '190.88.128.0/17',
5131         'CY': '31.153.0.0/16',
5132         'CZ': '88.100.0.0/14',
5133         'DE': '53.0.0.0/8',
5134         'DJ': '197.241.0.0/17',
5135         'DK': '87.48.0.0/12',
5136         'DM': '192.243.48.0/20',
5137         'DO': '152.166.0.0/15',
5138         'DZ': '41.96.0.0/12',
5139         'EC': '186.68.0.0/15',
5140         'EE': '90.190.0.0/15',
5141         'EG': '156.160.0.0/11',
5142         'ER': '196.200.96.0/20',
5143         'ES': '88.0.0.0/11',
5144         'ET': '196.188.0.0/14',
5145         'EU': '2.16.0.0/13',
5146         'FI': '91.152.0.0/13',
5147         'FJ': '144.120.0.0/16',
5148         'FK': '80.73.208.0/21',
5149         'FM': '119.252.112.0/20',
5150         'FO': '88.85.32.0/19',
5151         'FR': '90.0.0.0/9',
5152         'GA': '41.158.0.0/15',
5153         'GB': '25.0.0.0/8',
5154         'GD': '74.122.88.0/21',
5155         'GE': '31.146.0.0/16',
5156         'GF': '161.22.64.0/18',
5157         'GG': '62.68.160.0/19',
5158         'GH': '154.160.0.0/12',
5159         'GI': '95.164.0.0/16',
5160         'GL': '88.83.0.0/19',
5161         'GM': '160.182.0.0/15',
5162         'GN': '197.149.192.0/18',
5163         'GP': '104.250.0.0/19',
5164         'GQ': '105.235.224.0/20',
5165         'GR': '94.64.0.0/13',
5166         'GT': '168.234.0.0/16',
5167         'GU': '168.123.0.0/16',
5168         'GW': '197.214.80.0/20',
5169         'GY': '181.41.64.0/18',
5170         'HK': '113.252.0.0/14',
5171         'HN': '181.210.0.0/16',
5172         'HR': '93.136.0.0/13',
5173         'HT': '148.102.128.0/17',
5174         'HU': '84.0.0.0/14',
5175         'ID': '39.192.0.0/10',
5176         'IE': '87.32.0.0/12',
5177         'IL': '79.176.0.0/13',
5178         'IM': '5.62.80.0/20',
5179         'IN': '117.192.0.0/10',
5180         'IO': '203.83.48.0/21',
5181         'IQ': '37.236.0.0/14',
5182         'IR': '2.176.0.0/12',
5183         'IS': '82.221.0.0/16',
5184         'IT': '79.0.0.0/10',
5185         'JE': '87.244.64.0/18',
5186         'JM': '72.27.0.0/17',
5187         'JO': '176.29.0.0/16',
5188         'JP': '133.0.0.0/8',
5189         'KE': '105.48.0.0/12',
5190         'KG': '158.181.128.0/17',
5191         'KH': '36.37.128.0/17',
5192         'KI': '103.25.140.0/22',
5193         'KM': '197.255.224.0/20',
5194         'KN': '198.167.192.0/19',
5195         'KP': '175.45.176.0/22',
5196         'KR': '175.192.0.0/10',
5197         'KW': '37.36.0.0/14',
5198         'KY': '64.96.0.0/15',
5199         'KZ': '2.72.0.0/13',
5200         'LA': '115.84.64.0/18',
5201         'LB': '178.135.0.0/16',
5202         'LC': '24.92.144.0/20',
5203         'LI': '82.117.0.0/19',
5204         'LK': '112.134.0.0/15',
5205         'LR': '102.183.0.0/16',
5206         'LS': '129.232.0.0/17',
5207         'LT': '78.56.0.0/13',
5208         'LU': '188.42.0.0/16',
5209         'LV': '46.109.0.0/16',
5210         'LY': '41.252.0.0/14',
5211         'MA': '105.128.0.0/11',
5212         'MC': '88.209.64.0/18',
5213         'MD': '37.246.0.0/16',
5214         'ME': '178.175.0.0/17',
5215         'MF': '74.112.232.0/21',
5216         'MG': '154.126.0.0/17',
5217         'MH': '117.103.88.0/21',
5218         'MK': '77.28.0.0/15',
5219         'ML': '154.118.128.0/18',
5220         'MM': '37.111.0.0/17',
5221         'MN': '49.0.128.0/17',
5222         'MO': '60.246.0.0/16',
5223         'MP': '202.88.64.0/20',
5224         'MQ': '109.203.224.0/19',
5225         'MR': '41.188.64.0/18',
5226         'MS': '208.90.112.0/22',
5227         'MT': '46.11.0.0/16',
5228         'MU': '105.16.0.0/12',
5229         'MV': '27.114.128.0/18',
5230         'MW': '102.70.0.0/15',
5231         'MX': '187.192.0.0/11',
5232         'MY': '175.136.0.0/13',
5233         'MZ': '197.218.0.0/15',
5234         'NA': '41.182.0.0/16',
5235         'NC': '101.101.0.0/18',
5236         'NE': '197.214.0.0/18',
5237         'NF': '203.17.240.0/22',
5238         'NG': '105.112.0.0/12',
5239         'NI': '186.76.0.0/15',
5240         'NL': '145.96.0.0/11',
5241         'NO': '84.208.0.0/13',
5242         'NP': '36.252.0.0/15',
5243         'NR': '203.98.224.0/19',
5244         'NU': '49.156.48.0/22',
5245         'NZ': '49.224.0.0/14',
5246         'OM': '5.36.0.0/15',
5247         'PA': '186.72.0.0/15',
5248         'PE': '186.160.0.0/14',
5249         'PF': '123.50.64.0/18',
5250         'PG': '124.240.192.0/19',
5251         'PH': '49.144.0.0/13',
5252         'PK': '39.32.0.0/11',
5253         'PL': '83.0.0.0/11',
5254         'PM': '70.36.0.0/20',
5255         'PR': '66.50.0.0/16',
5256         'PS': '188.161.0.0/16',
5257         'PT': '85.240.0.0/13',
5258         'PW': '202.124.224.0/20',
5259         'PY': '181.120.0.0/14',
5260         'QA': '37.210.0.0/15',
5261         'RE': '102.35.0.0/16',
5262         'RO': '79.112.0.0/13',
5263         'RS': '93.86.0.0/15',
5264         'RU': '5.136.0.0/13',
5265         'RW': '41.186.0.0/16',
5266         'SA': '188.48.0.0/13',
5267         'SB': '202.1.160.0/19',
5268         'SC': '154.192.0.0/11',
5269         'SD': '102.120.0.0/13',
5270         'SE': '78.64.0.0/12',
5271         'SG': '8.128.0.0/10',
5272         'SI': '188.196.0.0/14',
5273         'SK': '78.98.0.0/15',
5274         'SL': '102.143.0.0/17',
5275         'SM': '89.186.32.0/19',
5276         'SN': '41.82.0.0/15',
5277         'SO': '154.115.192.0/18',
5278         'SR': '186.179.128.0/17',
5279         'SS': '105.235.208.0/21',
5280         'ST': '197.159.160.0/19',
5281         'SV': '168.243.0.0/16',
5282         'SX': '190.102.0.0/20',
5283         'SY': '5.0.0.0/16',
5284         'SZ': '41.84.224.0/19',
5285         'TC': '65.255.48.0/20',
5286         'TD': '154.68.128.0/19',
5287         'TG': '196.168.0.0/14',
5288         'TH': '171.96.0.0/13',
5289         'TJ': '85.9.128.0/18',
5290         'TK': '27.96.24.0/21',
5291         'TL': '180.189.160.0/20',
5292         'TM': '95.85.96.0/19',
5293         'TN': '197.0.0.0/11',
5294         'TO': '175.176.144.0/21',
5295         'TR': '78.160.0.0/11',
5296         'TT': '186.44.0.0/15',
5297         'TV': '202.2.96.0/19',
5298         'TW': '120.96.0.0/11',
5299         'TZ': '156.156.0.0/14',
5300         'UA': '37.52.0.0/14',
5301         'UG': '102.80.0.0/13',
5302         'US': '6.0.0.0/8',
5303         'UY': '167.56.0.0/13',
5304         'UZ': '84.54.64.0/18',
5305         'VA': '212.77.0.0/19',
5306         'VC': '207.191.240.0/21',
5307         'VE': '186.88.0.0/13',
5308         'VG': '66.81.192.0/20',
5309         'VI': '146.226.0.0/16',
5310         'VN': '14.160.0.0/11',
5311         'VU': '202.80.32.0/20',
5312         'WF': '117.20.32.0/21',
5313         'WS': '202.4.32.0/19',
5314         'YE': '134.35.0.0/16',
5315         'YT': '41.242.116.0/22',
5316         'ZA': '41.0.0.0/11',
5317         'ZM': '102.144.0.0/13',
5318         'ZW': '102.177.192.0/18',
5319     }
5320
5321     @classmethod
5322     def random_ipv4(cls, code_or_block):
5323         if len(code_or_block) == 2:
5324             block = cls._country_ip_map.get(code_or_block.upper())
5325             if not block:
5326                 return None
5327         else:
5328             block = code_or_block
5329         addr, preflen = block.split('/')
5330         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5331         addr_max = addr_min | (0xffffffff >> int(preflen))
5332         return compat_str(socket.inet_ntoa(
5333             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5334
5335
5336 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5337     def __init__(self, proxies=None):
5338         # Set default handlers
5339         for type in ('http', 'https'):
5340             setattr(self, '%s_open' % type,
5341                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5342                         meth(r, proxy, type))
5343         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5344
5345     def proxy_open(self, req, proxy, type):
5346         req_proxy = req.headers.get('Ytdl-request-proxy')
5347         if req_proxy is not None:
5348             proxy = req_proxy
5349             del req.headers['Ytdl-request-proxy']
5350
5351         if proxy == '__noproxy__':
5352             return None  # No Proxy
5353         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5354             req.add_header('Ytdl-socks-proxy', proxy)
5355             # youtube-dl's http/https handlers do wrapping the socket with socks
5356             return None
5357         return compat_urllib_request.ProxyHandler.proxy_open(
5358             self, req, proxy, type)
5359
5360
5361 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5362 # released into Public Domain
5363 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5364
5365 def long_to_bytes(n, blocksize=0):
5366     """long_to_bytes(n:long, blocksize:int) : string
5367     Convert a long integer to a byte string.
5368
5369     If optional blocksize is given and greater than zero, pad the front of the
5370     byte string with binary zeros so that the length is a multiple of
5371     blocksize.
5372     """
5373     # after much testing, this algorithm was deemed to be the fastest
5374     s = b''
5375     n = int(n)
5376     while n > 0:
5377         s = compat_struct_pack('>I', n & 0xffffffff) + s
5378         n = n >> 32
5379     # strip off leading zeros
5380     for i in range(len(s)):
5381         if s[i] != b'\000'[0]:
5382             break
5383     else:
5384         # only happens when n == 0
5385         s = b'\000'
5386         i = 0
5387     s = s[i:]
5388     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5389     # de-padding being done above, but sigh...
5390     if blocksize > 0 and len(s) % blocksize:
5391         s = (blocksize - len(s) % blocksize) * b'\000' + s
5392     return s
5393
5394
5395 def bytes_to_long(s):
5396     """bytes_to_long(string) : long
5397     Convert a byte string to a long integer.
5398
5399     This is (essentially) the inverse of long_to_bytes().
5400     """
5401     acc = 0
5402     length = len(s)
5403     if length % 4:
5404         extra = (4 - length % 4)
5405         s = b'\000' * extra + s
5406         length = length + extra
5407     for i in range(0, length, 4):
5408         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5409     return acc
5410
5411
5412 def ohdave_rsa_encrypt(data, exponent, modulus):
5413     '''
5414     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5415
5416     Input:
5417         data: data to encrypt, bytes-like object
5418         exponent, modulus: parameter e and N of RSA algorithm, both integer
5419     Output: hex string of encrypted data
5420
5421     Limitation: supports one block encryption only
5422     '''
5423
5424     payload = int(binascii.hexlify(data[::-1]), 16)
5425     encrypted = pow(payload, exponent, modulus)
5426     return '%x' % encrypted
5427
5428
5429 def pkcs1pad(data, length):
5430     """
5431     Padding input data with PKCS#1 scheme
5432
5433     @param {int[]} data        input data
5434     @param {int}   length      target length
5435     @returns {int[]}           padded data
5436     """
5437     if len(data) > length - 11:
5438         raise ValueError('Input data too long for PKCS#1 padding')
5439
5440     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5441     return [0, 2] + pseudo_random + [0] + data
5442
5443
5444 def encode_base_n(num, n, table=None):
5445     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5446     if not table:
5447         table = FULL_TABLE[:n]
5448
5449     if n > len(table):
5450         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5451
5452     if num == 0:
5453         return table[0]
5454
5455     ret = ''
5456     while num:
5457         ret = table[num % n] + ret
5458         num = num // n
5459     return ret
5460
5461
5462 def decode_packed_codes(code):
5463     mobj = re.search(PACKED_CODES_RE, code)
5464     obfucasted_code, base, count, symbols = mobj.groups()
5465     base = int(base)
5466     count = int(count)
5467     symbols = symbols.split('|')
5468     symbol_table = {}
5469
5470     while count:
5471         count -= 1
5472         base_n_count = encode_base_n(count, base)
5473         symbol_table[base_n_count] = symbols[count] or base_n_count
5474
5475     return re.sub(
5476         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5477         obfucasted_code)
5478
5479
5480 def caesar(s, alphabet, shift):
5481     if shift == 0:
5482         return s
5483     l = len(alphabet)
5484     return ''.join(
5485         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5486         for c in s)
5487
5488
5489 def rot47(s):
5490     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5491
5492
5493 def parse_m3u8_attributes(attrib):
5494     info = {}
5495     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5496         if val.startswith('"'):
5497             val = val[1:-1]
5498         info[key] = val
5499     return info
5500
5501
5502 def urshift(val, n):
5503     return val >> n if val >= 0 else (val + 0x100000000) >> n
5504
5505
5506 # Based on png2str() written by @gdkchan and improved by @yokrysty
5507 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5508 def decode_png(png_data):
5509     # Reference: https://www.w3.org/TR/PNG/
5510     header = png_data[8:]
5511
5512     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5513         raise IOError('Not a valid PNG file.')
5514
5515     int_map = {1: '>B', 2: '>H', 4: '>I'}
5516     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5517
5518     chunks = []
5519
5520     while header:
5521         length = unpack_integer(header[:4])
5522         header = header[4:]
5523
5524         chunk_type = header[:4]
5525         header = header[4:]
5526
5527         chunk_data = header[:length]
5528         header = header[length:]
5529
5530         header = header[4:]  # Skip CRC
5531
5532         chunks.append({
5533             'type': chunk_type,
5534             'length': length,
5535             'data': chunk_data
5536         })
5537
5538     ihdr = chunks[0]['data']
5539
5540     width = unpack_integer(ihdr[:4])
5541     height = unpack_integer(ihdr[4:8])
5542
5543     idat = b''
5544
5545     for chunk in chunks:
5546         if chunk['type'] == b'IDAT':
5547             idat += chunk['data']
5548
5549     if not idat:
5550         raise IOError('Unable to read PNG data.')
5551
5552     decompressed_data = bytearray(zlib.decompress(idat))
5553
5554     stride = width * 3
5555     pixels = []
5556
5557     def _get_pixel(idx):
5558         x = idx % stride
5559         y = idx // stride
5560         return pixels[y][x]
5561
5562     for y in range(height):
5563         basePos = y * (1 + stride)
5564         filter_type = decompressed_data[basePos]
5565
5566         current_row = []
5567
5568         pixels.append(current_row)
5569
5570         for x in range(stride):
5571             color = decompressed_data[1 + basePos + x]
5572             basex = y * stride + x
5573             left = 0
5574             up = 0
5575
5576             if x > 2:
5577                 left = _get_pixel(basex - 3)
5578             if y > 0:
5579                 up = _get_pixel(basex - stride)
5580
5581             if filter_type == 1:  # Sub
5582                 color = (color + left) & 0xff
5583             elif filter_type == 2:  # Up
5584                 color = (color + up) & 0xff
5585             elif filter_type == 3:  # Average
5586                 color = (color + ((left + up) >> 1)) & 0xff
5587             elif filter_type == 4:  # Paeth
5588                 a = left
5589                 b = up
5590                 c = 0
5591
5592                 if x > 2 and y > 0:
5593                     c = _get_pixel(basex - stride - 3)
5594
5595                 p = a + b - c
5596
5597                 pa = abs(p - a)
5598                 pb = abs(p - b)
5599                 pc = abs(p - c)
5600
5601                 if pa <= pb and pa <= pc:
5602                     color = (color + a) & 0xff
5603                 elif pb <= pc:
5604                     color = (color + b) & 0xff
5605                 else:
5606                     color = (color + c) & 0xff
5607
5608             current_row.append(color)
5609
5610     return width, height, pixels
5611
5612
5613 def write_xattr(path, key, value):
5614     # This mess below finds the best xattr tool for the job
5615     try:
5616         # try the pyxattr module...
5617         import xattr
5618
5619         if hasattr(xattr, 'set'):  # pyxattr
5620             # Unicode arguments are not supported in python-pyxattr until
5621             # version 0.5.0
5622             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5623             pyxattr_required_version = '0.5.0'
5624             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5625                 # TODO: fallback to CLI tools
5626                 raise XAttrUnavailableError(
5627                     'python-pyxattr is detected but is too old. '
5628                     'youtube-dl requires %s or above while your version is %s. '
5629                     'Falling back to other xattr implementations' % (
5630                         pyxattr_required_version, xattr.__version__))
5631
5632             setxattr = xattr.set
5633         else:  # xattr
5634             setxattr = xattr.setxattr
5635
5636         try:
5637             setxattr(path, key, value)
5638         except EnvironmentError as e:
5639             raise XAttrMetadataError(e.errno, e.strerror)
5640
5641     except ImportError:
5642         if compat_os_name == 'nt':
5643             # Write xattrs to NTFS Alternate Data Streams:
5644             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5645             assert ':' not in key
5646             assert os.path.exists(path)
5647
5648             ads_fn = path + ':' + key
5649             try:
5650                 with open(ads_fn, 'wb') as f:
5651                     f.write(value)
5652             except EnvironmentError as e:
5653                 raise XAttrMetadataError(e.errno, e.strerror)
5654         else:
5655             user_has_setfattr = check_executable('setfattr', ['--version'])
5656             user_has_xattr = check_executable('xattr', ['-h'])
5657
5658             if user_has_setfattr or user_has_xattr:
5659
5660                 value = value.decode('utf-8')
5661                 if user_has_setfattr:
5662                     executable = 'setfattr'
5663                     opts = ['-n', key, '-v', value]
5664                 elif user_has_xattr:
5665                     executable = 'xattr'
5666                     opts = ['-w', key, value]
5667
5668                 cmd = ([encodeFilename(executable, True)]
5669                        + [encodeArgument(o) for o in opts]
5670                        + [encodeFilename(path, True)])
5671
5672                 try:
5673                     p = subprocess.Popen(
5674                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5675                 except EnvironmentError as e:
5676                     raise XAttrMetadataError(e.errno, e.strerror)
5677                 stdout, stderr = p.communicate()
5678                 stderr = stderr.decode('utf-8', 'replace')
5679                 if p.returncode != 0:
5680                     raise XAttrMetadataError(p.returncode, stderr)
5681
5682             else:
5683                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5684                 if sys.platform.startswith('linux'):
5685                     raise XAttrUnavailableError(
5686                         "Couldn't find a tool to set the xattrs. "
5687                         "Install either the python 'pyxattr' or 'xattr' "
5688                         "modules, or the GNU 'attr' package "
5689                         "(which contains the 'setfattr' tool).")
5690                 else:
5691                     raise XAttrUnavailableError(
5692                         "Couldn't find a tool to set the xattrs. "
5693                         "Install either the python 'xattr' module, "
5694                         "or the 'xattr' binary.")
5695
5696
5697 def random_birthday(year_field, month_field, day_field):
5698     start_date = datetime.date(1950, 1, 1)
5699     end_date = datetime.date(1995, 12, 31)
5700     offset = random.randint(0, (end_date - start_date).days)
5701     random_date = start_date + datetime.timedelta(offset)
5702     return {
5703         year_field: str(random_date.year),
5704         month_field: str(random_date.month),
5705         day_field: str(random_date.day),
5706     }