2 from __future__ import unicode_literals
19 import xml.etree.ElementTree
23 import urllib.request as compat_urllib_request
24 except ImportError: # Python 2
25 import urllib2 as compat_urllib_request
28 import urllib.error as compat_urllib_error
29 except ImportError: # Python 2
30 import urllib2 as compat_urllib_error
33 import urllib.parse as compat_urllib_parse
34 except ImportError: # Python 2
35 import urllib as compat_urllib_parse
38 from urllib.parse import urlparse as compat_urllib_parse_urlparse
39 except ImportError: # Python 2
40 from urlparse import urlparse as compat_urllib_parse_urlparse
43 import urllib.parse as compat_urlparse
44 except ImportError: # Python 2
45 import urlparse as compat_urlparse
48 import urllib.response as compat_urllib_response
49 except ImportError: # Python 2
50 import urllib as compat_urllib_response
53 import http.cookiejar as compat_cookiejar
54 except ImportError: # Python 2
55 import cookielib as compat_cookiejar
58 import http.cookies as compat_cookies
59 except ImportError: # Python 2
60 import Cookie as compat_cookies
63 import html.entities as compat_html_entities
64 except ImportError: # Python 2
65 import htmlentitydefs as compat_html_entities
68 compat_html_entities_html5 = compat_html_entities.html5
69 except AttributeError:
70 # Copied from CPython 3.5.1 html/entities.py
71 compat_html_entities_html5 = {
80 'acE;': '\u223e\u0333',
100 'alefsym;': '\u2135',
115 'andslope;': '\u2a58',
121 'angmsdaa;': '\u29a8',
122 'angmsdab;': '\u29a9',
123 'angmsdac;': '\u29aa',
124 'angmsdad;': '\u29ab',
125 'angmsdae;': '\u29ac',
126 'angmsdaf;': '\u29ad',
127 'angmsdag;': '\u29ae',
128 'angmsdah;': '\u29af',
130 'angrtvb;': '\u22be',
131 'angrtvbd;': '\u299d',
134 'angzarr;': '\u237c',
137 'Aopf;': '\U0001d538',
138 'aopf;': '\U0001d552',
145 'ApplyFunction;': '\u2061',
147 'approxeq;': '\u224a',
152 'Ascr;': '\U0001d49c',
153 'ascr;': '\U0001d4b6',
157 'asympeq;': '\u224d',
166 'awconint;': '\u2233',
168 'backcong;': '\u224c',
169 'backepsilon;': '\u03f6',
170 'backprime;': '\u2035',
171 'backsim;': '\u223d',
172 'backsimeq;': '\u22cd',
173 'Backslash;': '\u2216',
178 'barwedge;': '\u2305',
180 'bbrktbrk;': '\u23b6',
186 'Because;': '\u2235',
187 'because;': '\u2235',
188 'bemptyv;': '\u29b0',
191 'Bernoullis;': '\u212c',
195 'between;': '\u226c',
196 'Bfr;': '\U0001d505',
197 'bfr;': '\U0001d51f',
199 'bigcirc;': '\u25ef',
201 'bigodot;': '\u2a00',
202 'bigoplus;': '\u2a01',
203 'bigotimes;': '\u2a02',
204 'bigsqcup;': '\u2a06',
205 'bigstar;': '\u2605',
206 'bigtriangledown;': '\u25bd',
207 'bigtriangleup;': '\u25b3',
208 'biguplus;': '\u2a04',
210 'bigwedge;': '\u22c0',
212 'blacklozenge;': '\u29eb',
213 'blacksquare;': '\u25aa',
214 'blacktriangle;': '\u25b4',
215 'blacktriangledown;': '\u25be',
216 'blacktriangleleft;': '\u25c2',
217 'blacktriangleright;': '\u25b8',
224 'bnequiv;': '\u2261\u20e5',
227 'Bopf;': '\U0001d539',
228 'bopf;': '\U0001d553',
251 'boxminus;': '\u229f',
252 'boxplus;': '\u229e',
253 'boxtimes;': '\u22a0',
282 'bscr;': '\U0001d4b7',
288 'bsolhsub;': '\u27c8',
301 'capbrcup;': '\u2a49',
305 'CapitalDifferentialD;': '\u2145',
306 'caps;': '\u2229\ufe00',
309 'Cayleys;': '\u212d',
319 'Cconint;': '\u2230',
321 'ccupssm;': '\u2a50',
327 'cemptyv;': '\u29b2',
330 'CenterDot;': '\xb7',
331 'centerdot;': '\xb7',
333 'cfr;': '\U0001d520',
337 'checkmark;': '\u2713',
343 'circlearrowleft;': '\u21ba',
344 'circlearrowright;': '\u21bb',
345 'circledast;': '\u229b',
346 'circledcirc;': '\u229a',
347 'circleddash;': '\u229d',
348 'CircleDot;': '\u2299',
350 'circledS;': '\u24c8',
351 'CircleMinus;': '\u2296',
352 'CirclePlus;': '\u2295',
353 'CircleTimes;': '\u2297',
356 'cirfnint;': '\u2a10',
358 'cirscir;': '\u29c2',
359 'ClockwiseContourIntegral;': '\u2232',
360 'CloseCurlyDoubleQuote;': '\u201d',
361 'CloseCurlyQuote;': '\u2019',
363 'clubsuit;': '\u2663',
368 'coloneq;': '\u2254',
373 'complement;': '\u2201',
374 'complexes;': '\u2102',
376 'congdot;': '\u2a6d',
377 'Congruent;': '\u2261',
380 'ContourIntegral;': '\u222e',
382 'copf;': '\U0001d554',
384 'Coproduct;': '\u2210',
390 'CounterClockwiseContourIntegral;': '\u2233',
394 'Cscr;': '\U0001d49e',
395 'cscr;': '\U0001d4b8',
401 'cudarrl;': '\u2938',
402 'cudarrr;': '\u2935',
406 'cularrp;': '\u293d',
409 'cupbrcap;': '\u2a48',
415 'cups;': '\u222a\ufe00',
417 'curarrm;': '\u293c',
418 'curlyeqprec;': '\u22de',
419 'curlyeqsucc;': '\u22df',
420 'curlyvee;': '\u22ce',
421 'curlywedge;': '\u22cf',
424 'curvearrowleft;': '\u21b6',
425 'curvearrowright;': '\u21b7',
428 'cwconint;': '\u2232',
440 'dbkarow;': '\u290f',
448 'ddagger;': '\u2021',
450 'DDotrahd;': '\u2911',
451 'ddotseq;': '\u2a77',
457 'demptyv;': '\u29b1',
459 'Dfr;': '\U0001d507',
460 'dfr;': '\U0001d521',
464 'DiacriticalAcute;': '\xb4',
465 'DiacriticalDot;': '\u02d9',
466 'DiacriticalDoubleAcute;': '\u02dd',
467 'DiacriticalGrave;': '`',
468 'DiacriticalTilde;': '\u02dc',
470 'Diamond;': '\u22c4',
471 'diamond;': '\u22c4',
472 'diamondsuit;': '\u2666',
475 'DifferentialD;': '\u2146',
476 'digamma;': '\u03dd',
481 'divideontimes;': '\u22c7',
488 'Dopf;': '\U0001d53b',
489 'dopf;': '\U0001d555',
494 'doteqdot;': '\u2251',
495 'DotEqual;': '\u2250',
496 'dotminus;': '\u2238',
497 'dotplus;': '\u2214',
498 'dotsquare;': '\u22a1',
499 'doublebarwedge;': '\u2306',
500 'DoubleContourIntegral;': '\u222f',
501 'DoubleDot;': '\xa8',
502 'DoubleDownArrow;': '\u21d3',
503 'DoubleLeftArrow;': '\u21d0',
504 'DoubleLeftRightArrow;': '\u21d4',
505 'DoubleLeftTee;': '\u2ae4',
506 'DoubleLongLeftArrow;': '\u27f8',
507 'DoubleLongLeftRightArrow;': '\u27fa',
508 'DoubleLongRightArrow;': '\u27f9',
509 'DoubleRightArrow;': '\u21d2',
510 'DoubleRightTee;': '\u22a8',
511 'DoubleUpArrow;': '\u21d1',
512 'DoubleUpDownArrow;': '\u21d5',
513 'DoubleVerticalBar;': '\u2225',
514 'DownArrow;': '\u2193',
515 'Downarrow;': '\u21d3',
516 'downarrow;': '\u2193',
517 'DownArrowBar;': '\u2913',
518 'DownArrowUpArrow;': '\u21f5',
519 'DownBreve;': '\u0311',
520 'downdownarrows;': '\u21ca',
521 'downharpoonleft;': '\u21c3',
522 'downharpoonright;': '\u21c2',
523 'DownLeftRightVector;': '\u2950',
524 'DownLeftTeeVector;': '\u295e',
525 'DownLeftVector;': '\u21bd',
526 'DownLeftVectorBar;': '\u2956',
527 'DownRightTeeVector;': '\u295f',
528 'DownRightVector;': '\u21c1',
529 'DownRightVectorBar;': '\u2957',
530 'DownTee;': '\u22a4',
531 'DownTeeArrow;': '\u21a7',
532 'drbkarow;': '\u2910',
535 'Dscr;': '\U0001d49f',
536 'dscr;': '\U0001d4b9',
547 'dwangle;': '\u29a6',
550 'dzigrarr;': '\u27ff',
572 'Efr;': '\U0001d508',
573 'efr;': '\U0001d522',
582 'Element;': '\u2208',
583 'elinters;': '\u23e7',
590 'emptyset;': '\u2205',
591 'EmptySmallSquare;': '\u25fb',
593 'EmptyVerySmallSquare;': '\u25ab',
602 'Eopf;': '\U0001d53c',
603 'eopf;': '\U0001d556',
608 'Epsilon;': '\u0395',
609 'epsilon;': '\u03b5',
612 'eqcolon;': '\u2255',
614 'eqslantgtr;': '\u2a96',
615 'eqslantless;': '\u2a95',
618 'EqualTilde;': '\u2242',
620 'Equilibrium;': '\u21cc',
622 'equivDD;': '\u2a78',
623 'eqvparsl;': '\u29e5',
645 'expectation;': '\u2130',
646 'ExponentialE;': '\u2147',
647 'exponentiale;': '\u2147',
648 'fallingdotseq;': '\u2252',
655 'Ffr;': '\U0001d509',
656 'ffr;': '\U0001d523',
658 'FilledSmallSquare;': '\u25fc',
659 'FilledVerySmallSquare;': '\u25aa',
665 'Fopf;': '\U0001d53d',
666 'fopf;': '\U0001d557',
671 'Fouriertrf;': '\u2131',
672 'fpartint;': '\u2a0d',
694 'fscr;': '\U0001d4bb',
716 'geqslant;': '\u2a7e',
720 'gesdoto;': '\u2a82',
721 'gesdotol;': '\u2a84',
722 'gesl;': '\u22db\ufe00',
724 'Gfr;': '\U0001d50a',
725 'gfr;': '\U0001d524',
737 'gnapprox;': '\u2a8a',
743 'Gopf;': '\U0001d53e',
744 'gopf;': '\U0001d558',
746 'GreaterEqual;': '\u2265',
747 'GreaterEqualLess;': '\u22db',
748 'GreaterFullEqual;': '\u2267',
749 'GreaterGreater;': '\u2aa2',
750 'GreaterLess;': '\u2277',
751 'GreaterSlantEqual;': '\u2a7e',
752 'GreaterTilde;': '\u2273',
753 'Gscr;': '\U0001d4a2',
767 'gtquest;': '\u2a7c',
768 'gtrapprox;': '\u2a86',
771 'gtreqless;': '\u22db',
772 'gtreqqless;': '\u2a8c',
773 'gtrless;': '\u2277',
775 'gvertneqq;': '\u2269\ufe00',
776 'gvnE;': '\u2269\ufe00',
785 'harrcir;': '\u2948',
792 'heartsuit;': '\u2665',
796 'hfr;': '\U0001d525',
797 'HilbertSpace;': '\u210b',
798 'hksearow;': '\u2925',
799 'hkswarow;': '\u2926',
802 'hookleftarrow;': '\u21a9',
803 'hookrightarrow;': '\u21aa',
805 'hopf;': '\U0001d559',
807 'HorizontalLine;': '\u2500',
809 'hscr;': '\U0001d4bd',
813 'HumpDownHump;': '\u224e',
814 'HumpEqual;': '\u224f',
835 'ifr;': '\U0001d526',
851 'ImaginaryI;': '\u2148',
852 'imagline;': '\u2110',
853 'imagpart;': '\u2111',
857 'Implies;': '\u21d2',
861 'infintie;': '\u29dd',
866 'integers;': '\u2124',
867 'Integral;': '\u222b',
868 'intercal;': '\u22ba',
869 'Intersection;': '\u22c2',
870 'intlarhk;': '\u2a17',
871 'intprod;': '\u2a3c',
872 'InvisibleComma;': '\u2063',
873 'InvisibleTimes;': '\u2062',
878 'Iopf;': '\U0001d540',
879 'iopf;': '\U0001d55a',
886 'iscr;': '\U0001d4be',
888 'isindot;': '\u22f5',
906 'Jfr;': '\U0001d50d',
907 'jfr;': '\U0001d527',
909 'Jopf;': '\U0001d541',
910 'jopf;': '\U0001d55b',
911 'Jscr;': '\U0001d4a5',
912 'jscr;': '\U0001d4bf',
924 'Kfr;': '\U0001d50e',
925 'kfr;': '\U0001d528',
931 'Kopf;': '\U0001d542',
932 'kopf;': '\U0001d55c',
933 'Kscr;': '\U0001d4a6',
934 'kscr;': '\U0001d4c0',
938 'laemptyv;': '\u29b4',
947 'Laplacetrf;': '\u2112',
954 'larrbfs;': '\u291f',
959 'larrsim;': '\u2973',
965 'lates;': '\u2aad\ufe00',
972 'lbrksld;': '\u298f',
973 'lbrkslu;': '\u298d',
985 'ldrdhar;': '\u2967',
986 'ldrushar;': '\u294b',
990 'LeftAngleBracket;': '\u27e8',
991 'LeftArrow;': '\u2190',
992 'Leftarrow;': '\u21d0',
993 'leftarrow;': '\u2190',
994 'LeftArrowBar;': '\u21e4',
995 'LeftArrowRightArrow;': '\u21c6',
996 'leftarrowtail;': '\u21a2',
997 'LeftCeiling;': '\u2308',
998 'LeftDoubleBracket;': '\u27e6',
999 'LeftDownTeeVector;': '\u2961',
1000 'LeftDownVector;': '\u21c3',
1001 'LeftDownVectorBar;': '\u2959',
1002 'LeftFloor;': '\u230a',
1003 'leftharpoondown;': '\u21bd',
1004 'leftharpoonup;': '\u21bc',
1005 'leftleftarrows;': '\u21c7',
1006 'LeftRightArrow;': '\u2194',
1007 'Leftrightarrow;': '\u21d4',
1008 'leftrightarrow;': '\u2194',
1009 'leftrightarrows;': '\u21c6',
1010 'leftrightharpoons;': '\u21cb',
1011 'leftrightsquigarrow;': '\u21ad',
1012 'LeftRightVector;': '\u294e',
1013 'LeftTee;': '\u22a3',
1014 'LeftTeeArrow;': '\u21a4',
1015 'LeftTeeVector;': '\u295a',
1016 'leftthreetimes;': '\u22cb',
1017 'LeftTriangle;': '\u22b2',
1018 'LeftTriangleBar;': '\u29cf',
1019 'LeftTriangleEqual;': '\u22b4',
1020 'LeftUpDownVector;': '\u2951',
1021 'LeftUpTeeVector;': '\u2960',
1022 'LeftUpVector;': '\u21bf',
1023 'LeftUpVectorBar;': '\u2958',
1024 'LeftVector;': '\u21bc',
1025 'LeftVectorBar;': '\u2952',
1030 'leqslant;': '\u2a7d',
1033 'lesdot;': '\u2a7f',
1034 'lesdoto;': '\u2a81',
1035 'lesdotor;': '\u2a83',
1036 'lesg;': '\u22da\ufe00',
1037 'lesges;': '\u2a93',
1038 'lessapprox;': '\u2a85',
1039 'lessdot;': '\u22d6',
1040 'lesseqgtr;': '\u22da',
1041 'lesseqqgtr;': '\u2a8b',
1042 'LessEqualGreater;': '\u22da',
1043 'LessFullEqual;': '\u2266',
1044 'LessGreater;': '\u2276',
1045 'lessgtr;': '\u2276',
1046 'LessLess;': '\u2aa1',
1047 'lesssim;': '\u2272',
1048 'LessSlantEqual;': '\u2a7d',
1049 'LessTilde;': '\u2272',
1050 'lfisht;': '\u297c',
1051 'lfloor;': '\u230a',
1052 'Lfr;': '\U0001d50f',
1053 'lfr;': '\U0001d529',
1059 'lharul;': '\u296a',
1066 'llcorner;': '\u231e',
1067 'Lleftarrow;': '\u21da',
1068 'llhard;': '\u296b',
1070 'Lmidot;': '\u013f',
1071 'lmidot;': '\u0140',
1072 'lmoust;': '\u23b0',
1073 'lmoustache;': '\u23b0',
1075 'lnapprox;': '\u2a89',
1084 'LongLeftArrow;': '\u27f5',
1085 'Longleftarrow;': '\u27f8',
1086 'longleftarrow;': '\u27f5',
1087 'LongLeftRightArrow;': '\u27f7',
1088 'Longleftrightarrow;': '\u27fa',
1089 'longleftrightarrow;': '\u27f7',
1090 'longmapsto;': '\u27fc',
1091 'LongRightArrow;': '\u27f6',
1092 'Longrightarrow;': '\u27f9',
1093 'longrightarrow;': '\u27f6',
1094 'looparrowleft;': '\u21ab',
1095 'looparrowright;': '\u21ac',
1097 'Lopf;': '\U0001d543',
1098 'lopf;': '\U0001d55d',
1099 'loplus;': '\u2a2d',
1100 'lotimes;': '\u2a34',
1101 'lowast;': '\u2217',
1103 'LowerLeftArrow;': '\u2199',
1104 'LowerRightArrow;': '\u2198',
1106 'lozenge;': '\u25ca',
1109 'lparlt;': '\u2993',
1111 'lrcorner;': '\u231f',
1113 'lrhard;': '\u296d',
1116 'lsaquo;': '\u2039',
1118 'lscr;': '\U0001d4c1',
1126 'lsquor;': '\u201a',
1127 'Lstrok;': '\u0141',
1128 'lstrok;': '\u0142',
1137 'lthree;': '\u22cb',
1138 'ltimes;': '\u22c9',
1139 'ltlarr;': '\u2976',
1140 'ltquest;': '\u2a7b',
1144 'ltrPar;': '\u2996',
1145 'lurdshar;': '\u294a',
1146 'luruhar;': '\u2966',
1147 'lvertneqq;': '\u2268\ufe00',
1148 'lvnE;': '\u2268\ufe00',
1153 'maltese;': '\u2720',
1156 'mapsto;': '\u21a6',
1157 'mapstodown;': '\u21a7',
1158 'mapstoleft;': '\u21a4',
1159 'mapstoup;': '\u21a5',
1160 'marker;': '\u25ae',
1161 'mcomma;': '\u2a29',
1166 'measuredangle;': '\u2221',
1167 'MediumSpace;': '\u205f',
1168 'Mellintrf;': '\u2133',
1169 'Mfr;': '\U0001d510',
1170 'mfr;': '\U0001d52a',
1176 'midcir;': '\u2af0',
1180 'minusb;': '\u229f',
1181 'minusd;': '\u2238',
1182 'minusdu;': '\u2a2a',
1183 'MinusPlus;': '\u2213',
1186 'mnplus;': '\u2213',
1187 'models;': '\u22a7',
1188 'Mopf;': '\U0001d544',
1189 'mopf;': '\U0001d55e',
1192 'mscr;': '\U0001d4c2',
1193 'mstpos;': '\u223e',
1196 'multimap;': '\u22b8',
1199 'Nacute;': '\u0143',
1200 'nacute;': '\u0144',
1201 'nang;': '\u2220\u20d2',
1203 'napE;': '\u2a70\u0338',
1204 'napid;': '\u224b\u0338',
1206 'napprox;': '\u2249',
1208 'natural;': '\u266e',
1209 'naturals;': '\u2115',
1212 'nbump;': '\u224e\u0338',
1213 'nbumpe;': '\u224f\u0338',
1215 'Ncaron;': '\u0147',
1216 'ncaron;': '\u0148',
1217 'Ncedil;': '\u0145',
1218 'ncedil;': '\u0146',
1220 'ncongdot;': '\u2a6d\u0338',
1226 'nearhk;': '\u2924',
1229 'nearrow;': '\u2197',
1230 'nedot;': '\u2250\u0338',
1231 'NegativeMediumSpace;': '\u200b',
1232 'NegativeThickSpace;': '\u200b',
1233 'NegativeThinSpace;': '\u200b',
1234 'NegativeVeryThinSpace;': '\u200b',
1235 'nequiv;': '\u2262',
1236 'nesear;': '\u2928',
1237 'nesim;': '\u2242\u0338',
1238 'NestedGreaterGreater;': '\u226b',
1239 'NestedLessLess;': '\u226a',
1241 'nexist;': '\u2204',
1242 'nexists;': '\u2204',
1243 'Nfr;': '\U0001d511',
1244 'nfr;': '\U0001d52b',
1245 'ngE;': '\u2267\u0338',
1248 'ngeqq;': '\u2267\u0338',
1249 'ngeqslant;': '\u2a7e\u0338',
1250 'nges;': '\u2a7e\u0338',
1251 'nGg;': '\u22d9\u0338',
1253 'nGt;': '\u226b\u20d2',
1256 'nGtv;': '\u226b\u0338',
1269 'nlE;': '\u2266\u0338',
1271 'nLeftarrow;': '\u21cd',
1272 'nleftarrow;': '\u219a',
1273 'nLeftrightarrow;': '\u21ce',
1274 'nleftrightarrow;': '\u21ae',
1276 'nleqq;': '\u2266\u0338',
1277 'nleqslant;': '\u2a7d\u0338',
1278 'nles;': '\u2a7d\u0338',
1280 'nLl;': '\u22d8\u0338',
1282 'nLt;': '\u226a\u20d2',
1285 'nltrie;': '\u22ec',
1286 'nLtv;': '\u226a\u0338',
1288 'NoBreak;': '\u2060',
1289 'NonBreakingSpace;': '\xa0',
1291 'nopf;': '\U0001d55f',
1295 'NotCongruent;': '\u2262',
1296 'NotCupCap;': '\u226d',
1297 'NotDoubleVerticalBar;': '\u2226',
1298 'NotElement;': '\u2209',
1299 'NotEqual;': '\u2260',
1300 'NotEqualTilde;': '\u2242\u0338',
1301 'NotExists;': '\u2204',
1302 'NotGreater;': '\u226f',
1303 'NotGreaterEqual;': '\u2271',
1304 'NotGreaterFullEqual;': '\u2267\u0338',
1305 'NotGreaterGreater;': '\u226b\u0338',
1306 'NotGreaterLess;': '\u2279',
1307 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1308 'NotGreaterTilde;': '\u2275',
1309 'NotHumpDownHump;': '\u224e\u0338',
1310 'NotHumpEqual;': '\u224f\u0338',
1312 'notindot;': '\u22f5\u0338',
1313 'notinE;': '\u22f9\u0338',
1314 'notinva;': '\u2209',
1315 'notinvb;': '\u22f7',
1316 'notinvc;': '\u22f6',
1317 'NotLeftTriangle;': '\u22ea',
1318 'NotLeftTriangleBar;': '\u29cf\u0338',
1319 'NotLeftTriangleEqual;': '\u22ec',
1320 'NotLess;': '\u226e',
1321 'NotLessEqual;': '\u2270',
1322 'NotLessGreater;': '\u2278',
1323 'NotLessLess;': '\u226a\u0338',
1324 'NotLessSlantEqual;': '\u2a7d\u0338',
1325 'NotLessTilde;': '\u2274',
1326 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1327 'NotNestedLessLess;': '\u2aa1\u0338',
1329 'notniva;': '\u220c',
1330 'notnivb;': '\u22fe',
1331 'notnivc;': '\u22fd',
1332 'NotPrecedes;': '\u2280',
1333 'NotPrecedesEqual;': '\u2aaf\u0338',
1334 'NotPrecedesSlantEqual;': '\u22e0',
1335 'NotReverseElement;': '\u220c',
1336 'NotRightTriangle;': '\u22eb',
1337 'NotRightTriangleBar;': '\u29d0\u0338',
1338 'NotRightTriangleEqual;': '\u22ed',
1339 'NotSquareSubset;': '\u228f\u0338',
1340 'NotSquareSubsetEqual;': '\u22e2',
1341 'NotSquareSuperset;': '\u2290\u0338',
1342 'NotSquareSupersetEqual;': '\u22e3',
1343 'NotSubset;': '\u2282\u20d2',
1344 'NotSubsetEqual;': '\u2288',
1345 'NotSucceeds;': '\u2281',
1346 'NotSucceedsEqual;': '\u2ab0\u0338',
1347 'NotSucceedsSlantEqual;': '\u22e1',
1348 'NotSucceedsTilde;': '\u227f\u0338',
1349 'NotSuperset;': '\u2283\u20d2',
1350 'NotSupersetEqual;': '\u2289',
1351 'NotTilde;': '\u2241',
1352 'NotTildeEqual;': '\u2244',
1353 'NotTildeFullEqual;': '\u2247',
1354 'NotTildeTilde;': '\u2249',
1355 'NotVerticalBar;': '\u2224',
1357 'nparallel;': '\u2226',
1358 'nparsl;': '\u2afd\u20e5',
1359 'npart;': '\u2202\u0338',
1360 'npolint;': '\u2a14',
1362 'nprcue;': '\u22e0',
1363 'npre;': '\u2aaf\u0338',
1365 'npreceq;': '\u2aaf\u0338',
1368 'nrarrc;': '\u2933\u0338',
1369 'nrarrw;': '\u219d\u0338',
1370 'nRightarrow;': '\u21cf',
1371 'nrightarrow;': '\u219b',
1373 'nrtrie;': '\u22ed',
1375 'nsccue;': '\u22e1',
1376 'nsce;': '\u2ab0\u0338',
1377 'Nscr;': '\U0001d4a9',
1378 'nscr;': '\U0001d4c3',
1379 'nshortmid;': '\u2224',
1380 'nshortparallel;': '\u2226',
1383 'nsimeq;': '\u2244',
1386 'nsqsube;': '\u22e2',
1387 'nsqsupe;': '\u22e3',
1389 'nsubE;': '\u2ac5\u0338',
1391 'nsubset;': '\u2282\u20d2',
1392 'nsubseteq;': '\u2288',
1393 'nsubseteqq;': '\u2ac5\u0338',
1395 'nsucceq;': '\u2ab0\u0338',
1397 'nsupE;': '\u2ac6\u0338',
1399 'nsupset;': '\u2283\u20d2',
1400 'nsupseteq;': '\u2289',
1401 'nsupseteqq;': '\u2ac6\u0338',
1408 'ntriangleleft;': '\u22ea',
1409 'ntrianglelefteq;': '\u22ec',
1410 'ntriangleright;': '\u22eb',
1411 'ntrianglerighteq;': '\u22ed',
1415 'numero;': '\u2116',
1417 'nvap;': '\u224d\u20d2',
1418 'nVDash;': '\u22af',
1419 'nVdash;': '\u22ae',
1420 'nvDash;': '\u22ad',
1421 'nvdash;': '\u22ac',
1422 'nvge;': '\u2265\u20d2',
1424 'nvHarr;': '\u2904',
1425 'nvinfin;': '\u29de',
1426 'nvlArr;': '\u2902',
1427 'nvle;': '\u2264\u20d2',
1429 'nvltrie;': '\u22b4\u20d2',
1430 'nvrArr;': '\u2903',
1431 'nvrtrie;': '\u22b5\u20d2',
1432 'nvsim;': '\u223c\u20d2',
1433 'nwarhk;': '\u2923',
1436 'nwarrow;': '\u2196',
1437 'nwnear;': '\u2927',
1451 'Odblac;': '\u0150',
1452 'odblac;': '\u0151',
1455 'odsold;': '\u29bc',
1459 'Ofr;': '\U0001d512',
1460 'ofr;': '\U0001d52c',
1472 'olcross;': '\u29bb',
1479 'Omicron;': '\u039f',
1480 'omicron;': '\u03bf',
1482 'ominus;': '\u2296',
1483 'Oopf;': '\U0001d546',
1484 'oopf;': '\U0001d560',
1486 'OpenCurlyDoubleQuote;': '\u201c',
1487 'OpenCurlyQuote;': '\u2018',
1495 'orderof;': '\u2134',
1500 'origof;': '\u22b6',
1502 'orslope;': '\u2a57',
1505 'Oscr;': '\U0001d4aa',
1516 'Otimes;': '\u2a37',
1517 'otimes;': '\u2297',
1518 'otimesas;': '\u2a36',
1524 'OverBar;': '\u203e',
1525 'OverBrace;': '\u23de',
1526 'OverBracket;': '\u23b4',
1527 'OverParenthesis;': '\u23dc',
1531 'parallel;': '\u2225',
1532 'parsim;': '\u2af3',
1535 'PartialD;': '\u2202',
1540 'permil;': '\u2030',
1542 'pertenk;': '\u2031',
1543 'Pfr;': '\U0001d513',
1544 'pfr;': '\U0001d52d',
1548 'phmmat;': '\u2133',
1552 'pitchfork;': '\u22d4',
1554 'planck;': '\u210f',
1555 'planckh;': '\u210e',
1556 'plankv;': '\u210f',
1558 'plusacir;': '\u2a23',
1560 'pluscir;': '\u2a22',
1561 'plusdo;': '\u2214',
1562 'plusdu;': '\u2a25',
1564 'PlusMinus;': '\xb1',
1567 'plussim;': '\u2a26',
1568 'plustwo;': '\u2a27',
1570 'Poincareplane;': '\u210c',
1571 'pointint;': '\u2a15',
1573 'popf;': '\U0001d561',
1583 'precapprox;': '\u2ab7',
1584 'preccurlyeq;': '\u227c',
1585 'Precedes;': '\u227a',
1586 'PrecedesEqual;': '\u2aaf',
1587 'PrecedesSlantEqual;': '\u227c',
1588 'PrecedesTilde;': '\u227e',
1589 'preceq;': '\u2aaf',
1590 'precnapprox;': '\u2ab9',
1591 'precneqq;': '\u2ab5',
1592 'precnsim;': '\u22e8',
1593 'precsim;': '\u227e',
1596 'primes;': '\u2119',
1599 'prnsim;': '\u22e8',
1601 'Product;': '\u220f',
1602 'profalar;': '\u232e',
1603 'profline;': '\u2312',
1604 'profsurf;': '\u2313',
1606 'Proportion;': '\u2237',
1607 'Proportional;': '\u221d',
1608 'propto;': '\u221d',
1610 'prurel;': '\u22b0',
1611 'Pscr;': '\U0001d4ab',
1612 'pscr;': '\U0001d4c5',
1615 'puncsp;': '\u2008',
1616 'Qfr;': '\U0001d514',
1617 'qfr;': '\U0001d52e',
1620 'qopf;': '\U0001d562',
1621 'qprime;': '\u2057',
1622 'Qscr;': '\U0001d4ac',
1623 'qscr;': '\U0001d4c6',
1624 'quaternions;': '\u210d',
1625 'quatint;': '\u2a16',
1627 'questeq;': '\u225f',
1633 'race;': '\u223d\u0331',
1634 'Racute;': '\u0154',
1635 'racute;': '\u0155',
1637 'raemptyv;': '\u29b3',
1642 'rangle;': '\u27e9',
1648 'rarrap;': '\u2975',
1650 'rarrbfs;': '\u2920',
1652 'rarrfs;': '\u291e',
1653 'rarrhk;': '\u21aa',
1654 'rarrlp;': '\u21ac',
1655 'rarrpl;': '\u2945',
1656 'rarrsim;': '\u2974',
1657 'Rarrtl;': '\u2916',
1658 'rarrtl;': '\u21a3',
1660 'rAtail;': '\u291c',
1661 'ratail;': '\u291a',
1663 'rationals;': '\u211a',
1671 'rbrksld;': '\u298e',
1672 'rbrkslu;': '\u2990',
1673 'Rcaron;': '\u0158',
1674 'rcaron;': '\u0159',
1675 'Rcedil;': '\u0156',
1676 'rcedil;': '\u0157',
1682 'rdldhar;': '\u2969',
1684 'rdquor;': '\u201d',
1688 'realine;': '\u211b',
1689 'realpart;': '\u211c',
1696 'ReverseElement;': '\u220b',
1697 'ReverseEquilibrium;': '\u21cb',
1698 'ReverseUpEquilibrium;': '\u296f',
1699 'rfisht;': '\u297d',
1700 'rfloor;': '\u230b',
1702 'rfr;': '\U0001d52f',
1706 'rharul;': '\u296c',
1710 'RightAngleBracket;': '\u27e9',
1711 'RightArrow;': '\u2192',
1712 'Rightarrow;': '\u21d2',
1713 'rightarrow;': '\u2192',
1714 'RightArrowBar;': '\u21e5',
1715 'RightArrowLeftArrow;': '\u21c4',
1716 'rightarrowtail;': '\u21a3',
1717 'RightCeiling;': '\u2309',
1718 'RightDoubleBracket;': '\u27e7',
1719 'RightDownTeeVector;': '\u295d',
1720 'RightDownVector;': '\u21c2',
1721 'RightDownVectorBar;': '\u2955',
1722 'RightFloor;': '\u230b',
1723 'rightharpoondown;': '\u21c1',
1724 'rightharpoonup;': '\u21c0',
1725 'rightleftarrows;': '\u21c4',
1726 'rightleftharpoons;': '\u21cc',
1727 'rightrightarrows;': '\u21c9',
1728 'rightsquigarrow;': '\u219d',
1729 'RightTee;': '\u22a2',
1730 'RightTeeArrow;': '\u21a6',
1731 'RightTeeVector;': '\u295b',
1732 'rightthreetimes;': '\u22cc',
1733 'RightTriangle;': '\u22b3',
1734 'RightTriangleBar;': '\u29d0',
1735 'RightTriangleEqual;': '\u22b5',
1736 'RightUpDownVector;': '\u294f',
1737 'RightUpTeeVector;': '\u295c',
1738 'RightUpVector;': '\u21be',
1739 'RightUpVectorBar;': '\u2954',
1740 'RightVector;': '\u21c0',
1741 'RightVectorBar;': '\u2953',
1743 'risingdotseq;': '\u2253',
1747 'rmoust;': '\u23b1',
1748 'rmoustache;': '\u23b1',
1755 'ropf;': '\U0001d563',
1756 'roplus;': '\u2a2e',
1757 'rotimes;': '\u2a35',
1758 'RoundImplies;': '\u2970',
1760 'rpargt;': '\u2994',
1761 'rppolint;': '\u2a12',
1763 'Rrightarrow;': '\u21db',
1764 'rsaquo;': '\u203a',
1766 'rscr;': '\U0001d4c7',
1771 'rsquor;': '\u2019',
1772 'rthree;': '\u22cc',
1773 'rtimes;': '\u22ca',
1777 'rtriltri;': '\u29ce',
1778 'RuleDelayed;': '\u29f4',
1779 'ruluhar;': '\u2968',
1781 'Sacute;': '\u015a',
1782 'sacute;': '\u015b',
1787 'Scaron;': '\u0160',
1788 'scaron;': '\u0161',
1792 'Scedil;': '\u015e',
1793 'scedil;': '\u015f',
1798 'scnsim;': '\u22e9',
1799 'scpolint;': '\u2a13',
1806 'searhk;': '\u2925',
1809 'searrow;': '\u2198',
1813 'seswar;': '\u2929',
1814 'setminus;': '\u2216',
1817 'Sfr;': '\U0001d516',
1818 'sfr;': '\U0001d530',
1819 'sfrown;': '\u2322',
1821 'SHCHcy;': '\u0429',
1822 'shchcy;': '\u0449',
1825 'ShortDownArrow;': '\u2193',
1826 'ShortLeftArrow;': '\u2190',
1827 'shortmid;': '\u2223',
1828 'shortparallel;': '\u2225',
1829 'ShortRightArrow;': '\u2192',
1830 'ShortUpArrow;': '\u2191',
1835 'sigmaf;': '\u03c2',
1836 'sigmav;': '\u03c2',
1838 'simdot;': '\u2a6a',
1846 'simplus;': '\u2a24',
1847 'simrarr;': '\u2972',
1849 'SmallCircle;': '\u2218',
1850 'smallsetminus;': '\u2216',
1851 'smashp;': '\u2a33',
1852 'smeparsl;': '\u29e4',
1857 'smtes;': '\u2aac\ufe00',
1858 'SOFTcy;': '\u042c',
1859 'softcy;': '\u044c',
1862 'solbar;': '\u233f',
1863 'Sopf;': '\U0001d54a',
1864 'sopf;': '\U0001d564',
1865 'spades;': '\u2660',
1866 'spadesuit;': '\u2660',
1869 'sqcaps;': '\u2293\ufe00',
1871 'sqcups;': '\u2294\ufe00',
1874 'sqsube;': '\u2291',
1875 'sqsubset;': '\u228f',
1876 'sqsubseteq;': '\u2291',
1878 'sqsupe;': '\u2292',
1879 'sqsupset;': '\u2290',
1880 'sqsupseteq;': '\u2292',
1882 'Square;': '\u25a1',
1883 'square;': '\u25a1',
1884 'SquareIntersection;': '\u2293',
1885 'SquareSubset;': '\u228f',
1886 'SquareSubsetEqual;': '\u2291',
1887 'SquareSuperset;': '\u2290',
1888 'SquareSupersetEqual;': '\u2292',
1889 'SquareUnion;': '\u2294',
1890 'squarf;': '\u25aa',
1893 'Sscr;': '\U0001d4ae',
1894 'sscr;': '\U0001d4c8',
1895 'ssetmn;': '\u2216',
1896 'ssmile;': '\u2323',
1897 'sstarf;': '\u22c6',
1901 'straightepsilon;': '\u03f5',
1902 'straightphi;': '\u03d5',
1906 'subdot;': '\u2abd',
1909 'subedot;': '\u2ac3',
1910 'submult;': '\u2ac1',
1913 'subplus;': '\u2abf',
1914 'subrarr;': '\u2979',
1915 'Subset;': '\u22d0',
1916 'subset;': '\u2282',
1917 'subseteq;': '\u2286',
1918 'subseteqq;': '\u2ac5',
1919 'SubsetEqual;': '\u2286',
1920 'subsetneq;': '\u228a',
1921 'subsetneqq;': '\u2acb',
1922 'subsim;': '\u2ac7',
1923 'subsub;': '\u2ad5',
1924 'subsup;': '\u2ad3',
1926 'succapprox;': '\u2ab8',
1927 'succcurlyeq;': '\u227d',
1928 'Succeeds;': '\u227b',
1929 'SucceedsEqual;': '\u2ab0',
1930 'SucceedsSlantEqual;': '\u227d',
1931 'SucceedsTilde;': '\u227f',
1932 'succeq;': '\u2ab0',
1933 'succnapprox;': '\u2aba',
1934 'succneqq;': '\u2ab6',
1935 'succnsim;': '\u22e9',
1936 'succsim;': '\u227f',
1937 'SuchThat;': '\u220b',
1949 'supdot;': '\u2abe',
1950 'supdsub;': '\u2ad8',
1953 'supedot;': '\u2ac4',
1954 'Superset;': '\u2283',
1955 'SupersetEqual;': '\u2287',
1956 'suphsol;': '\u27c9',
1957 'suphsub;': '\u2ad7',
1958 'suplarr;': '\u297b',
1959 'supmult;': '\u2ac2',
1962 'supplus;': '\u2ac0',
1963 'Supset;': '\u22d1',
1964 'supset;': '\u2283',
1965 'supseteq;': '\u2287',
1966 'supseteqq;': '\u2ac6',
1967 'supsetneq;': '\u228b',
1968 'supsetneqq;': '\u2acc',
1969 'supsim;': '\u2ac8',
1970 'supsub;': '\u2ad4',
1971 'supsup;': '\u2ad6',
1972 'swarhk;': '\u2926',
1975 'swarrow;': '\u2199',
1976 'swnwar;': '\u292a',
1980 'target;': '\u2316',
1984 'Tcaron;': '\u0164',
1985 'tcaron;': '\u0165',
1986 'Tcedil;': '\u0162',
1987 'tcedil;': '\u0163',
1991 'telrec;': '\u2315',
1992 'Tfr;': '\U0001d517',
1993 'tfr;': '\U0001d531',
1994 'there4;': '\u2234',
1995 'Therefore;': '\u2234',
1996 'therefore;': '\u2234',
1999 'thetasym;': '\u03d1',
2000 'thetav;': '\u03d1',
2001 'thickapprox;': '\u2248',
2002 'thicksim;': '\u223c',
2003 'ThickSpace;': '\u205f\u200a',
2004 'thinsp;': '\u2009',
2005 'ThinSpace;': '\u2009',
2007 'thksim;': '\u223c',
2014 'TildeEqual;': '\u2243',
2015 'TildeFullEqual;': '\u2245',
2016 'TildeTilde;': '\u2248',
2019 'timesb;': '\u22a0',
2020 'timesbar;': '\u2a31',
2021 'timesd;': '\u2a30',
2025 'topbot;': '\u2336',
2026 'topcir;': '\u2af1',
2027 'Topf;': '\U0001d54b',
2028 'topf;': '\U0001d565',
2029 'topfork;': '\u2ada',
2031 'tprime;': '\u2034',
2034 'triangle;': '\u25b5',
2035 'triangledown;': '\u25bf',
2036 'triangleleft;': '\u25c3',
2037 'trianglelefteq;': '\u22b4',
2038 'triangleq;': '\u225c',
2039 'triangleright;': '\u25b9',
2040 'trianglerighteq;': '\u22b5',
2041 'tridot;': '\u25ec',
2043 'triminus;': '\u2a3a',
2044 'TripleDot;': '\u20db',
2045 'triplus;': '\u2a39',
2047 'tritime;': '\u2a3b',
2048 'trpezium;': '\u23e2',
2049 'Tscr;': '\U0001d4af',
2050 'tscr;': '\U0001d4c9',
2055 'Tstrok;': '\u0166',
2056 'tstrok;': '\u0167',
2058 'twoheadleftarrow;': '\u219e',
2059 'twoheadrightarrow;': '\u21a0',
2067 'Uarrocir;': '\u2949',
2070 'Ubreve;': '\u016c',
2071 'ubreve;': '\u016d',
2079 'Udblac;': '\u0170',
2080 'udblac;': '\u0171',
2082 'ufisht;': '\u297e',
2083 'Ufr;': '\U0001d518',
2084 'ufr;': '\U0001d532',
2093 'ulcorn;': '\u231c',
2094 'ulcorner;': '\u231c',
2095 'ulcrop;': '\u230f',
2102 'UnderBrace;': '\u23df',
2103 'UnderBracket;': '\u23b5',
2104 'UnderParenthesis;': '\u23dd',
2106 'UnionPlus;': '\u228e',
2109 'Uopf;': '\U0001d54c',
2110 'uopf;': '\U0001d566',
2111 'UpArrow;': '\u2191',
2112 'Uparrow;': '\u21d1',
2113 'uparrow;': '\u2191',
2114 'UpArrowBar;': '\u2912',
2115 'UpArrowDownArrow;': '\u21c5',
2116 'UpDownArrow;': '\u2195',
2117 'Updownarrow;': '\u21d5',
2118 'updownarrow;': '\u2195',
2119 'UpEquilibrium;': '\u296e',
2120 'upharpoonleft;': '\u21bf',
2121 'upharpoonright;': '\u21be',
2123 'UpperLeftArrow;': '\u2196',
2124 'UpperRightArrow;': '\u2197',
2128 'Upsilon;': '\u03a5',
2129 'upsilon;': '\u03c5',
2131 'UpTeeArrow;': '\u21a5',
2132 'upuparrows;': '\u21c8',
2133 'urcorn;': '\u231d',
2134 'urcorner;': '\u231d',
2135 'urcrop;': '\u230e',
2139 'Uscr;': '\U0001d4b0',
2140 'uscr;': '\U0001d4ca',
2142 'Utilde;': '\u0168',
2143 'utilde;': '\u0169',
2151 'uwangle;': '\u29a7',
2152 'vangrt;': '\u299c',
2153 'varepsilon;': '\u03f5',
2154 'varkappa;': '\u03f0',
2155 'varnothing;': '\u2205',
2156 'varphi;': '\u03d5',
2158 'varpropto;': '\u221d',
2161 'varrho;': '\u03f1',
2162 'varsigma;': '\u03c2',
2163 'varsubsetneq;': '\u228a\ufe00',
2164 'varsubsetneqq;': '\u2acb\ufe00',
2165 'varsupsetneq;': '\u228b\ufe00',
2166 'varsupsetneqq;': '\u2acc\ufe00',
2167 'vartheta;': '\u03d1',
2168 'vartriangleleft;': '\u22b2',
2169 'vartriangleright;': '\u22b3',
2179 'Vdashl;': '\u2ae6',
2182 'veebar;': '\u22bb',
2184 'vellip;': '\u22ee',
2185 'Verbar;': '\u2016',
2189 'VerticalBar;': '\u2223',
2190 'VerticalLine;': '|',
2191 'VerticalSeparator;': '\u2758',
2192 'VerticalTilde;': '\u2240',
2193 'VeryThinSpace;': '\u200a',
2194 'Vfr;': '\U0001d519',
2195 'vfr;': '\U0001d533',
2197 'vnsub;': '\u2282\u20d2',
2198 'vnsup;': '\u2283\u20d2',
2199 'Vopf;': '\U0001d54d',
2200 'vopf;': '\U0001d567',
2203 'Vscr;': '\U0001d4b1',
2204 'vscr;': '\U0001d4cb',
2205 'vsubnE;': '\u2acb\ufe00',
2206 'vsubne;': '\u228a\ufe00',
2207 'vsupnE;': '\u2acc\ufe00',
2208 'vsupne;': '\u228b\ufe00',
2209 'Vvdash;': '\u22aa',
2210 'vzigzag;': '\u299a',
2213 'wedbar;': '\u2a5f',
2216 'wedgeq;': '\u2259',
2217 'weierp;': '\u2118',
2218 'Wfr;': '\U0001d51a',
2219 'wfr;': '\U0001d534',
2220 'Wopf;': '\U0001d54e',
2221 'wopf;': '\U0001d568',
2224 'wreath;': '\u2240',
2225 'Wscr;': '\U0001d4b2',
2226 'wscr;': '\U0001d4cc',
2231 'Xfr;': '\U0001d51b',
2232 'xfr;': '\U0001d535',
2242 'Xopf;': '\U0001d54f',
2243 'xopf;': '\U0001d569',
2244 'xoplus;': '\u2a01',
2245 'xotime;': '\u2a02',
2248 'Xscr;': '\U0001d4b3',
2249 'xscr;': '\U0001d4cd',
2250 'xsqcup;': '\u2a06',
2251 'xuplus;': '\u2a04',
2254 'xwedge;': '\u22c0',
2267 'Yfr;': '\U0001d51c',
2268 'yfr;': '\U0001d536',
2271 'Yopf;': '\U0001d550',
2272 'yopf;': '\U0001d56a',
2273 'Yscr;': '\U0001d4b4',
2274 'yscr;': '\U0001d4ce',
2280 'Zacute;': '\u0179',
2281 'zacute;': '\u017a',
2282 'Zcaron;': '\u017d',
2283 'zcaron;': '\u017e',
2288 'zeetrf;': '\u2128',
2289 'ZeroWidthSpace;': '\u200b',
2293 'zfr;': '\U0001d537',
2296 'zigrarr;': '\u21dd',
2298 'zopf;': '\U0001d56b',
2299 'Zscr;': '\U0001d4b5',
2300 'zscr;': '\U0001d4cf',
2306 import http.client as compat_http_client
2307 except ImportError: # Python 2
2308 import httplib as compat_http_client
2311 from urllib.error import HTTPError as compat_HTTPError
2312 except ImportError: # Python 2
2313 from urllib2 import HTTPError as compat_HTTPError
2316 from urllib.request import urlretrieve as compat_urlretrieve
2317 except ImportError: # Python 2
2318 from urllib import urlretrieve as compat_urlretrieve
2321 from html.parser import HTMLParser as compat_HTMLParser
2322 except ImportError: # Python 2
2323 from HTMLParser import HTMLParser as compat_HTMLParser
2326 from subprocess import DEVNULL
2327 compat_subprocess_get_DEVNULL = lambda: DEVNULL
2329 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
2332 import http.server as compat_http_server
2334 import BaseHTTPServer as compat_http_server
2337 compat_str = unicode # Python 2
2342 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
2343 from urllib.parse import unquote as compat_urllib_parse_unquote
2344 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
2345 except ImportError: # Python 2
2346 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
2347 else re.compile(r'([\x00-\x7f]+)'))
2349 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2350 # implementations from cpython 3.4.3's stdlib. Python 2's version
2351 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2353 def compat_urllib_parse_unquote_to_bytes(string):
2354 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2355 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2356 # unescaped non-ASCII characters, which URIs should not.
2358 # Is it a string-like object?
2361 if isinstance(string, compat_str):
2362 string = string.encode('utf-8')
2363 bits = string.split(b'%')
2368 for item in bits[1:]:
2370 append(compat_urllib_parse._hextochr[item[:2]])
2375 return b''.join(res)
2377 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
2378 """Replace %xx escapes by their single-character equivalent. The optional
2379 encoding and errors parameters specify how to decode percent-encoded
2380 sequences into Unicode characters, as accepted by the bytes.decode()
2382 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2383 sequences are replaced by a placeholder character.
2385 unquote('abc%20def') -> 'abc def'.
2387 if '%' not in string:
2390 if encoding is None:
2394 bits = _asciire.split(string)
2397 for i in range(1, len(bits), 2):
2398 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
2402 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
2403 """Like unquote(), but also replace plus signs by spaces, as required for
2404 unquoting HTML form values.
2406 unquote_plus('%7e/abc+def') -> '~/abc def'
2408 string = string.replace('+', ' ')
2409 return compat_urllib_parse_unquote(string, encoding, errors)
2412 from urllib.parse import urlencode as compat_urllib_parse_urlencode
2413 except ImportError: # Python 2
2414 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2415 # Possible solutions are to either port it from python 3 with all
2416 # the friends or manually ensure input query contains only byte strings.
2417 # We will stick with latter thus recursively encoding the whole query.
2418 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
2420 if isinstance(e, dict):
2422 elif isinstance(e, (list, tuple,)):
2423 list_e = encode_list(e)
2424 e = tuple(list_e) if isinstance(e, tuple) else list_e
2425 elif isinstance(e, compat_str):
2426 e = e.encode(encoding)
2430 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
2433 return [encode_elem(e) for e in l]
2435 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
2438 from urllib.request import DataHandler as compat_urllib_request_DataHandler
2439 except ImportError: # Python < 3.4
2440 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2441 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
2442 def data_open(self, req):
2443 # data URLs as specified in RFC 2397.
2445 # ignores POSTed data
2448 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2449 # mediatype := [ type "/" subtype ] *( ";" parameter )
2451 # parameter := attribute "=" value
2452 url = req.get_full_url()
2454 scheme, data = url.split(':', 1)
2455 mediatype, data = data.split(',', 1)
2457 # even base64 encoded data URLs might be quoted so unquote in any case:
2458 data = compat_urllib_parse_unquote_to_bytes(data)
2459 if mediatype.endswith(';base64'):
2460 data = binascii.a2b_base64(data)
2461 mediatype = mediatype[:-7]
2464 mediatype = 'text/plain;charset=US-ASCII'
2466 headers = email.message_from_string(
2467 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
2469 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
2472 compat_basestring = basestring # Python 2
2474 compat_basestring = str
2477 compat_chr = unichr # Python 2
2482 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
2483 except ImportError: # Python 2.6
2484 from xml.parsers.expat import ExpatError as compat_xml_parse_error
2487 etree = xml.etree.ElementTree
2490 class _TreeBuilder(etree.TreeBuilder):
2491 def doctype(self, name, pubid, system):
2495 if sys.version_info[0] >= 3:
2496 def compat_etree_fromstring(text):
2497 return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
2499 # python 2.x tries to encode unicode strings with ascii (see the
2500 # XMLParser._fixtext method)
2502 _etree_iter = etree.Element.iter
2503 except AttributeError: # Python <=2.6
2504 def _etree_iter(root):
2505 for el in root.findall('*'):
2507 for sub in _etree_iter(el):
2510 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2512 def _XML(text, parser=None):
2514 parser = etree.XMLParser(target=_TreeBuilder())
2516 return parser.close()
2518 def _element_factory(*args, **kwargs):
2519 el = etree.Element(*args, **kwargs)
2520 for k, v in el.items():
2521 if isinstance(v, bytes):
2522 el.set(k, v.decode('utf-8'))
2525 def compat_etree_fromstring(text):
2526 doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
2527 for el in _etree_iter(doc):
2528 if el.text is not None and isinstance(el.text, bytes):
2529 el.text = el.text.decode('utf-8')
2532 if hasattr(etree, 'register_namespace'):
2533 compat_etree_register_namespace = etree.register_namespace
2535 def compat_etree_register_namespace(prefix, uri):
2536 """Register a namespace prefix.
2537 The registry is global, and any existing mapping for either the
2538 given prefix or the namespace URI will be removed.
2539 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2540 attributes in this namespace will be serialized with prefix if possible.
2541 ValueError is raised if prefix is reserved or is invalid.
2543 if re.match(r"ns\d+$", prefix):
2544 raise ValueError("Prefix format reserved for internal use")
2545 for k, v in list(etree._namespace_map.items()):
2546 if k == uri or v == prefix:
2547 del etree._namespace_map[k]
2548 etree._namespace_map[uri] = prefix
2550 if sys.version_info < (2, 7):
2551 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2552 # .//node does not match if a node is a direct child of . !
2553 def compat_xpath(xpath):
2554 if isinstance(xpath, compat_str):
2555 xpath = xpath.encode('ascii')
2558 compat_xpath = lambda xpath: xpath
2561 from urllib.parse import parse_qs as compat_parse_qs
2562 except ImportError: # Python 2
2563 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2564 # Python 2's version is apparently totally broken
2566 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
2567 encoding='utf-8', errors='replace'):
2568 qs, _coerce_result = qs, compat_str
2569 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
2571 for name_value in pairs:
2572 if not name_value and not strict_parsing:
2574 nv = name_value.split('=', 1)
2577 raise ValueError('bad query field: %r' % (name_value,))
2578 # Handle case of a control-name with no equal sign
2579 if keep_blank_values:
2583 if len(nv[1]) or keep_blank_values:
2584 name = nv[0].replace('+', ' ')
2585 name = compat_urllib_parse_unquote(
2586 name, encoding=encoding, errors=errors)
2587 name = _coerce_result(name)
2588 value = nv[1].replace('+', ' ')
2589 value = compat_urllib_parse_unquote(
2590 value, encoding=encoding, errors=errors)
2591 value = _coerce_result(value)
2592 r.append((name, value))
2595 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
2596 encoding='utf-8', errors='replace'):
2598 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
2599 encoding=encoding, errors=errors)
2600 for name, value in pairs:
2601 if name in parsed_result:
2602 parsed_result[name].append(value)
2604 parsed_result[name] = [value]
2605 return parsed_result
2608 from shlex import quote as compat_shlex_quote
2609 except ImportError: # Python < 3.3
2610 def compat_shlex_quote(s):
2611 if re.match(r'^[-_\w./]+$', s):
2614 return "'" + s.replace("'", "'\"'\"'") + "'"
2618 args = shlex.split('ä¸æ–‡')
2619 assert (isinstance(args, list) and
2620 isinstance(args[0], compat_str) and
2621 args[0] == 'ä¸æ–‡')
2622 compat_shlex_split = shlex.split
2623 except (AssertionError, UnicodeEncodeError):
2624 # Working around shlex issue with unicode strings on some python 2
2625 # versions (see http://bugs.python.org/issue1548891)
2626 def compat_shlex_split(s, comments=False, posix=True):
2627 if isinstance(s, compat_str):
2628 s = s.encode('utf-8')
2629 return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
2639 compat_os_name = os._name if os.name == 'java' else os.name
2642 if sys.version_info >= (3, 0):
2643 compat_getenv = os.getenv
2644 compat_expanduser = os.path.expanduser
2646 def compat_setenv(key, value, env=os.environ):
2649 # Environment variables should be decoded with filesystem encoding.
2650 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2652 def compat_getenv(key, default=None):
2653 from .utils import get_filesystem_encoding
2654 env = os.getenv(key, default)
2656 env = env.decode(get_filesystem_encoding())
2659 def compat_setenv(key, value, env=os.environ):
2661 from .utils import get_filesystem_encoding
2662 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
2663 env[encode(key)] = encode(value)
2665 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2666 # environment variables with filesystem encoding. We will work around this by
2667 # providing adjusted implementations.
2668 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2669 # for different platforms with correct environment variables decoding.
2671 if compat_os_name == 'posix':
2672 def compat_expanduser(path):
2673 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2675 if not path.startswith('~'):
2677 i = path.find('/', 1)
2681 if 'HOME' not in os.environ:
2683 userhome = pwd.getpwuid(os.getuid()).pw_dir
2685 userhome = compat_getenv('HOME')
2689 pwent = pwd.getpwnam(path[1:i])
2692 userhome = pwent.pw_dir
2693 userhome = userhome.rstrip('/')
2694 return (userhome + path[i:]) or '/'
2695 elif compat_os_name == 'nt' or compat_os_name == 'ce':
2696 def compat_expanduser(path):
2697 """Expand ~ and ~user constructs.
2699 If user or $HOME is unknown, do nothing."""
2703 while i < n and path[i] not in '/\\':
2706 if 'HOME' in os.environ:
2707 userhome = compat_getenv('HOME')
2708 elif 'USERPROFILE' in os.environ:
2709 userhome = compat_getenv('USERPROFILE')
2710 elif 'HOMEPATH' not in os.environ:
2714 drive = compat_getenv('HOMEDRIVE')
2717 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
2720 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
2722 return userhome + path[i:]
2724 compat_expanduser = os.path.expanduser
2727 if sys.version_info < (3, 0):
2728 def compat_print(s):
2729 from .utils import preferredencoding
2730 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
2732 def compat_print(s):
2733 assert isinstance(s, compat_str)
2737 if sys.version_info < (3, 0) and sys.platform == 'win32':
2738 def compat_getpass(prompt, *args, **kwargs):
2739 if isinstance(prompt, compat_str):
2740 from .utils import preferredencoding
2741 prompt = prompt.encode(preferredencoding())
2742 return getpass.getpass(prompt, *args, **kwargs)
2744 compat_getpass = getpass.getpass
2747 compat_input = raw_input
2748 except NameError: # Python 3
2749 compat_input = input
2751 # Python < 2.6.5 require kwargs to be bytes
2755 _testfunc(**{'x': 0})
2757 def compat_kwargs(kwargs):
2758 return dict((bytes(k), v) for k, v in kwargs.items())
2760 compat_kwargs = lambda kwargs: kwargs
2764 compat_numeric_types = (int, float, long, complex)
2765 except NameError: # Python 3
2766 compat_numeric_types = (int, float, complex)
2769 if sys.version_info < (2, 7):
2770 def compat_socket_create_connection(address, timeout, source_address=None):
2771 host, port = address
2773 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
2774 af, socktype, proto, canonname, sa = res
2777 sock = socket.socket(af, socktype, proto)
2778 sock.settimeout(timeout)
2780 sock.bind(source_address)
2783 except socket.error as _:
2785 if sock is not None:
2790 raise socket.error('getaddrinfo returns an empty list')
2792 compat_socket_create_connection = socket.create_connection
2795 # Fix https://github.com/rg3/youtube-dl/issues/4223
2796 # See http://bugs.python.org/issue9161 for what is broken
2797 def workaround_optparse_bug9161():
2798 op = optparse.OptionParser()
2799 og = optparse.OptionGroup(op, 'foo')
2803 real_add_option = optparse.OptionGroup.add_option
2805 def _compat_add_option(self, *args, **kwargs):
2807 v.encode('ascii', 'replace') if isinstance(v, compat_str)
2809 bargs = [enc(a) for a in args]
2811 (k, enc(v)) for k, v in kwargs.items())
2812 return real_add_option(self, *bargs, **bkwargs)
2813 optparse.OptionGroup.add_option = _compat_add_option
2816 if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
2817 compat_get_terminal_size = shutil.get_terminal_size
2819 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
2821 def compat_get_terminal_size(fallback=(80, 24)):
2822 columns = compat_getenv('COLUMNS')
2824 columns = int(columns)
2827 lines = compat_getenv('LINES')
2833 if columns is None or lines is None or columns <= 0 or lines <= 0:
2835 sp = subprocess.Popen(
2837 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2838 out, err = sp.communicate()
2839 _lines, _columns = map(int, out.split())
2841 _columns, _lines = _terminal_size(*fallback)
2843 if columns is None or columns <= 0:
2845 if lines is None or lines <= 0:
2847 return _terminal_size(columns, lines)
2850 itertools.count(start=0, step=1)
2851 compat_itertools_count = itertools.count
2852 except TypeError: # Python 2.6
2853 def compat_itertools_count(start=0, step=1):
2859 if sys.version_info >= (3, 0):
2860 from tokenize import tokenize as compat_tokenize_tokenize
2862 from tokenize import generate_tokens as compat_tokenize_tokenize
2866 struct.pack('!I', 0)
2868 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2869 # See https://bugs.python.org/issue19099
2870 def compat_struct_pack(spec, *args):
2871 if isinstance(spec, compat_str):
2872 spec = spec.encode('ascii')
2873 return struct.pack(spec, *args)
2875 def compat_struct_unpack(spec, *args):
2876 if isinstance(spec, compat_str):
2877 spec = spec.encode('ascii')
2878 return struct.unpack(spec, *args)
2880 compat_struct_pack = struct.pack
2881 compat_struct_unpack = struct.unpack
2885 'compat_HTMLParser',
2887 'compat_basestring',
2891 'compat_etree_fromstring',
2892 'compat_etree_register_namespace',
2893 'compat_expanduser',
2894 'compat_get_terminal_size',
2897 'compat_html_entities',
2898 'compat_html_entities_html5',
2899 'compat_http_client',
2900 'compat_http_server',
2902 'compat_itertools_count',
2904 'compat_numeric_types',
2910 'compat_shlex_quote',
2911 'compat_shlex_split',
2912 'compat_socket_create_connection',
2914 'compat_struct_pack',
2915 'compat_struct_unpack',
2916 'compat_subprocess_get_DEVNULL',
2917 'compat_tokenize_tokenize',
2918 'compat_urllib_error',
2919 'compat_urllib_parse',
2920 'compat_urllib_parse_unquote',
2921 'compat_urllib_parse_unquote_plus',
2922 'compat_urllib_parse_unquote_to_bytes',
2923 'compat_urllib_parse_urlencode',
2924 'compat_urllib_parse_urlparse',
2925 'compat_urllib_request',
2926 'compat_urllib_request_DataHandler',
2927 'compat_urllib_response',
2929 'compat_urlretrieve',
2930 'compat_xml_parse_error',
2932 'workaround_optparse_bug9161',