Jeff Vandrew Jr
6 years ago
133 changed files with 184354 additions and 0 deletions
@ -0,0 +1,26 @@ |
|||
#!/bin/bash |
|||
|
|||
if [ "$(id -u)" != "0" ]; then |
|||
echo "This script must be run as root." |
|||
echo "Use the command 'sudo su -' (include the trailing hypen) and try again" |
|||
exit 1 |
|||
fi |
|||
|
|||
(return 2>/dev/null) && sourced=1 || sourced=0 |
|||
|
|||
if [ $sourced != 1 ]; then |
|||
echo "You forgot the leading '.' followed by a space!" |
|||
echo "Try this format: . ./backup.sh" |
|||
exit 1 |
|||
fi |
|||
|
|||
if [ -z ${BACKUP_PROVIDER+x} ]; then |
|||
echo "Set BACKUP_PROVIDER environmental variable and try again." |
|||
exit 1 |
|||
elif [ ${BACKUP_PROVIDER="Dropbox"} ]; then |
|||
btcpay-down.sh |
|||
tar -cvzf $PWD/dropbox-script/backup.tar.gz --exclude='/var/lib/docker/volumes/generated_bitcoin_datadir/*' /var/lib/docker |
|||
btcpay-up.sh |
|||
cd dropbox-script |
|||
./dropbox-script && rm backup.tar.gz |
|||
fi |
@ -0,0 +1,23 @@ |
|||
MIT License |
|||
|
|||
Copyright (c) 2019 Jeff Vandrew Jr |
|||
|
|||
All subdirectories are dependencies and covered by the licenses of their various projects. |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
@ -0,0 +1,3 @@ |
|||
from .core import where |
|||
|
|||
__version__ = "2019.03.09" |
@ -0,0 +1,2 @@ |
|||
from certifi import where |
|||
print(where()) |
File diff suppressed because it is too large
@ -0,0 +1,15 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
certifi.py |
|||
~~~~~~~~~~ |
|||
|
|||
This module returns the installation location of cacert.pem. |
|||
""" |
|||
import os |
|||
|
|||
|
|||
def where(): |
|||
f = os.path.dirname(__file__) |
|||
|
|||
return os.path.join(f, 'cacert.pem') |
@ -0,0 +1,39 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
|
|||
from .compat import PY2, PY3 |
|||
from .universaldetector import UniversalDetector |
|||
from .version import __version__, VERSION |
|||
|
|||
|
|||
def detect(byte_str): |
|||
""" |
|||
Detect the encoding of the given byte string. |
|||
|
|||
:param byte_str: The byte sequence to examine. |
|||
:type byte_str: ``bytes`` or ``bytearray`` |
|||
""" |
|||
if not isinstance(byte_str, bytearray): |
|||
if not isinstance(byte_str, bytes): |
|||
raise TypeError('Expected object of type bytes or bytearray, got: ' |
|||
'{0}'.format(type(byte_str))) |
|||
else: |
|||
byte_str = bytearray(byte_str) |
|||
detector = UniversalDetector() |
|||
detector.feed(byte_str) |
|||
return detector.close() |
@ -0,0 +1,386 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# Big5 frequency table |
|||
# by Taiwan's Mandarin Promotion Council |
|||
# <http://www.edu.tw:81/mandr/> |
|||
# |
|||
# 128 --> 0.42261 |
|||
# 256 --> 0.57851 |
|||
# 512 --> 0.74851 |
|||
# 1024 --> 0.89384 |
|||
# 2048 --> 0.97583 |
|||
# |
|||
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 |
|||
# Random Distribution Ration = 512/(5401-512)=0.105 |
|||
# |
|||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR |
|||
|
|||
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 |
|||
|
|||
#Char to FreqOrder table |
|||
BIG5_TABLE_SIZE = 5376 |
|||
|
|||
BIG5_CHAR_TO_FREQ_ORDER = ( |
|||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 |
|||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 |
|||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 |
|||
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 |
|||
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 |
|||
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 |
|||
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 |
|||
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 |
|||
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 |
|||
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 |
|||
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 |
|||
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 |
|||
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 |
|||
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 |
|||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 |
|||
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 |
|||
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 |
|||
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 |
|||
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 |
|||
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 |
|||
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 |
|||
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 |
|||
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 |
|||
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 |
|||
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 |
|||
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 |
|||
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 |
|||
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 |
|||
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 |
|||
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 |
|||
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 |
|||
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 |
|||
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 |
|||
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 |
|||
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 |
|||
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 |
|||
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 |
|||
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 |
|||
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 |
|||
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 |
|||
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 |
|||
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 |
|||
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 |
|||
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 |
|||
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 |
|||
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 |
|||
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 |
|||
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 |
|||
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 |
|||
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 |
|||
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 |
|||
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 |
|||
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 |
|||
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 |
|||
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 |
|||
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 |
|||
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 |
|||
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 |
|||
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 |
|||
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 |
|||
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 |
|||
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 |
|||
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 |
|||
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 |
|||
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 |
|||
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 |
|||
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 |
|||
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 |
|||
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 |
|||
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 |
|||
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 |
|||
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 |
|||
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 |
|||
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 |
|||
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 |
|||
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 |
|||
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 |
|||
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 |
|||
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 |
|||
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 |
|||
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 |
|||
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 |
|||
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 |
|||
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 |
|||
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 |
|||
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 |
|||
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 |
|||
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 |
|||
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 |
|||
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 |
|||
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 |
|||
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 |
|||
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 |
|||
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 |
|||
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 |
|||
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 |
|||
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 |
|||
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 |
|||
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 |
|||
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 |
|||
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 |
|||
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 |
|||
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 |
|||
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 |
|||
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 |
|||
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 |
|||
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 |
|||
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 |
|||
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 |
|||
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 |
|||
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 |
|||
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 |
|||
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 |
|||
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 |
|||
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 |
|||
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 |
|||
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 |
|||
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 |
|||
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 |
|||
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 |
|||
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 |
|||
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 |
|||
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 |
|||
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 |
|||
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 |
|||
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 |
|||
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 |
|||
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 |
|||
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 |
|||
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 |
|||
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 |
|||
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 |
|||
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 |
|||
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 |
|||
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 |
|||
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 |
|||
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 |
|||
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 |
|||
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 |
|||
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 |
|||
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 |
|||
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 |
|||
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 |
|||
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 |
|||
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 |
|||
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 |
|||
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 |
|||
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 |
|||
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 |
|||
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 |
|||
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 |
|||
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 |
|||
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 |
|||
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 |
|||
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 |
|||
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 |
|||
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 |
|||
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 |
|||
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 |
|||
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 |
|||
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 |
|||
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 |
|||
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 |
|||
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 |
|||
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 |
|||
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 |
|||
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 |
|||
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 |
|||
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 |
|||
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 |
|||
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 |
|||
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 |
|||
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 |
|||
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 |
|||
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 |
|||
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 |
|||
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 |
|||
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 |
|||
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 |
|||
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 |
|||
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 |
|||
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 |
|||
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 |
|||
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 |
|||
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 |
|||
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 |
|||
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 |
|||
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 |
|||
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 |
|||
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 |
|||
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 |
|||
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 |
|||
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 |
|||
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 |
|||
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 |
|||
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 |
|||
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 |
|||
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 |
|||
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 |
|||
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 |
|||
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 |
|||
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 |
|||
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 |
|||
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 |
|||
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 |
|||
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 |
|||
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 |
|||
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 |
|||
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 |
|||
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 |
|||
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 |
|||
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 |
|||
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 |
|||
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 |
|||
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 |
|||
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 |
|||
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 |
|||
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 |
|||
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 |
|||
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 |
|||
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 |
|||
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 |
|||
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 |
|||
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 |
|||
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 |
|||
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 |
|||
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 |
|||
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 |
|||
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 |
|||
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 |
|||
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 |
|||
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 |
|||
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 |
|||
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 |
|||
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 |
|||
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 |
|||
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 |
|||
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 |
|||
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 |
|||
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 |
|||
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 |
|||
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 |
|||
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 |
|||
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 |
|||
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 |
|||
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 |
|||
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 |
|||
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 |
|||
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 |
|||
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 |
|||
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 |
|||
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 |
|||
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 |
|||
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 |
|||
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 |
|||
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 |
|||
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 |
|||
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 |
|||
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 |
|||
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 |
|||
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 |
|||
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 |
|||
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 |
|||
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 |
|||
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 |
|||
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 |
|||
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 |
|||
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 |
|||
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 |
|||
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 |
|||
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 |
|||
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 |
|||
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 |
|||
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 |
|||
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 |
|||
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 |
|||
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 |
|||
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 |
|||
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 |
|||
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 |
|||
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 |
|||
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 |
|||
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 |
|||
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 |
|||
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 |
|||
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 |
|||
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 |
|||
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 |
|||
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 |
|||
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 |
|||
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 |
|||
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 |
|||
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 |
|||
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 |
|||
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 |
|||
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 |
|||
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 |
|||
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 |
|||
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 |
|||
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 |
|||
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 |
|||
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 |
|||
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 |
|||
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 |
|||
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 |
|||
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 |
|||
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 |
|||
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 |
|||
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 |
|||
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 |
|||
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 |
|||
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 |
|||
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 |
|||
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 |
|||
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 |
|||
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 |
|||
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 |
|||
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 |
|||
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 |
|||
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 |
|||
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 |
|||
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 |
|||
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 |
|||
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 |
|||
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 |
|||
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 |
|||
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 |
|||
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 |
|||
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 |
|||
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 |
|||
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 |
|||
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 |
|||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 |
|||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 |
|||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 |
|||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 |
|||
) |
|||
|
@ -0,0 +1,47 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import Big5DistributionAnalysis |
|||
from .mbcssm import BIG5_SM_MODEL |
|||
|
|||
|
|||
class Big5Prober(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(Big5Prober, self).__init__() |
|||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) |
|||
self.distribution_analyzer = Big5DistributionAnalysis() |
|||
self.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "Big5" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Chinese" |
@ -0,0 +1,233 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, |
|||
EUCTW_TYPICAL_DISTRIBUTION_RATIO) |
|||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, |
|||
EUCKR_TYPICAL_DISTRIBUTION_RATIO) |
|||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, |
|||
GB2312_TYPICAL_DISTRIBUTION_RATIO) |
|||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, |
|||
BIG5_TYPICAL_DISTRIBUTION_RATIO) |
|||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, |
|||
JIS_TYPICAL_DISTRIBUTION_RATIO) |
|||
|
|||
|
|||
class CharDistributionAnalysis(object): |
|||
ENOUGH_DATA_THRESHOLD = 1024 |
|||
SURE_YES = 0.99 |
|||
SURE_NO = 0.01 |
|||
MINIMUM_DATA_THRESHOLD = 3 |
|||
|
|||
def __init__(self): |
|||
# Mapping table to get frequency order from char order (get from |
|||
# GetOrder()) |
|||
self._char_to_freq_order = None |
|||
self._table_size = None # Size of above table |
|||
# This is a constant value which varies from language to language, |
|||
# used in calculating confidence. See |
|||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html |
|||
# for further detail. |
|||
self.typical_distribution_ratio = None |
|||
self._done = None |
|||
self._total_chars = None |
|||
self._freq_chars = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
"""reset analyser, clear any state""" |
|||
# If this flag is set to True, detection is done and conclusion has |
|||
# been made |
|||
self._done = False |
|||
self._total_chars = 0 # Total characters encountered |
|||
# The number of characters whose frequency order is less than 512 |
|||
self._freq_chars = 0 |
|||
|
|||
def feed(self, char, char_len): |
|||
"""feed a character with known length""" |
|||
if char_len == 2: |
|||
# we only care about 2-bytes character in our distribution analysis |
|||
order = self.get_order(char) |
|||
else: |
|||
order = -1 |
|||
if order >= 0: |
|||
self._total_chars += 1 |
|||
# order is valid |
|||
if order < self._table_size: |
|||
if 512 > self._char_to_freq_order[order]: |
|||
self._freq_chars += 1 |
|||
|
|||
def get_confidence(self): |
|||
"""return confidence based on existing data""" |
|||
# if we didn't receive any character in our consideration range, |
|||
# return negative answer |
|||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD: |
|||
return self.SURE_NO |
|||
|
|||
if self._total_chars != self._freq_chars: |
|||
r = (self._freq_chars / ((self._total_chars - self._freq_chars) |
|||
* self.typical_distribution_ratio)) |
|||
if r < self.SURE_YES: |
|||
return r |
|||
|
|||
# normalize confidence (we don't want to be 100% sure) |
|||
return self.SURE_YES |
|||
|
|||
def got_enough_data(self): |
|||
# It is not necessary to receive all data to draw conclusion. |
|||
# For charset detection, certain amount of data is enough |
|||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD |
|||
|
|||
def get_order(self, byte_str): |
|||
# We do not handle characters based on the original encoding string, |
|||
# but convert this encoding string to a number, here called order. |
|||
# This allows multiple encodings of a language to share one frequency |
|||
# table. |
|||
return -1 |
|||
|
|||
|
|||
class EUCTWDistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(EUCTWDistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER |
|||
self._table_size = EUCTW_TABLE_SIZE |
|||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for euc-TW encoding, we are interested |
|||
# first byte range: 0xc4 -- 0xfe |
|||
# second byte range: 0xa1 -- 0xfe |
|||
# no validation needed here. State machine has done that |
|||
first_char = byte_str[0] |
|||
if first_char >= 0xC4: |
|||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1 |
|||
else: |
|||
return -1 |
|||
|
|||
|
|||
class EUCKRDistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(EUCKRDistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER |
|||
self._table_size = EUCKR_TABLE_SIZE |
|||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for euc-KR encoding, we are interested |
|||
# first byte range: 0xb0 -- 0xfe |
|||
# second byte range: 0xa1 -- 0xfe |
|||
# no validation needed here. State machine has done that |
|||
first_char = byte_str[0] |
|||
if first_char >= 0xB0: |
|||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1 |
|||
else: |
|||
return -1 |
|||
|
|||
|
|||
class GB2312DistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(GB2312DistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER |
|||
self._table_size = GB2312_TABLE_SIZE |
|||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for GB2312 encoding, we are interested |
|||
# first byte range: 0xb0 -- 0xfe |
|||
# second byte range: 0xa1 -- 0xfe |
|||
# no validation needed here. State machine has done that |
|||
first_char, second_char = byte_str[0], byte_str[1] |
|||
if (first_char >= 0xB0) and (second_char >= 0xA1): |
|||
return 94 * (first_char - 0xB0) + second_char - 0xA1 |
|||
else: |
|||
return -1 |
|||
|
|||
|
|||
class Big5DistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(Big5DistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER |
|||
self._table_size = BIG5_TABLE_SIZE |
|||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for big5 encoding, we are interested |
|||
# first byte range: 0xa4 -- 0xfe |
|||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe |
|||
# no validation needed here. State machine has done that |
|||
first_char, second_char = byte_str[0], byte_str[1] |
|||
if first_char >= 0xA4: |
|||
if second_char >= 0xA1: |
|||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 |
|||
else: |
|||
return 157 * (first_char - 0xA4) + second_char - 0x40 |
|||
else: |
|||
return -1 |
|||
|
|||
|
|||
class SJISDistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(SJISDistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER |
|||
self._table_size = JIS_TABLE_SIZE |
|||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for sjis encoding, we are interested |
|||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe |
|||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe |
|||
# no validation needed here. State machine has done that |
|||
first_char, second_char = byte_str[0], byte_str[1] |
|||
if (first_char >= 0x81) and (first_char <= 0x9F): |
|||
order = 188 * (first_char - 0x81) |
|||
elif (first_char >= 0xE0) and (first_char <= 0xEF): |
|||
order = 188 * (first_char - 0xE0 + 31) |
|||
else: |
|||
return -1 |
|||
order = order + second_char - 0x40 |
|||
if second_char > 0x7F: |
|||
order = -1 |
|||
return order |
|||
|
|||
|
|||
class EUCJPDistributionAnalysis(CharDistributionAnalysis): |
|||
def __init__(self): |
|||
super(EUCJPDistributionAnalysis, self).__init__() |
|||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER |
|||
self._table_size = JIS_TABLE_SIZE |
|||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO |
|||
|
|||
def get_order(self, byte_str): |
|||
# for euc-JP encoding, we are interested |
|||
# first byte range: 0xa0 -- 0xfe |
|||
# second byte range: 0xa1 -- 0xfe |
|||
# no validation needed here. State machine has done that |
|||
char = byte_str[0] |
|||
if char >= 0xA0: |
|||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1 |
|||
else: |
|||
return -1 |
@ -0,0 +1,106 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .enums import ProbingState |
|||
from .charsetprober import CharSetProber |
|||
|
|||
|
|||
class CharSetGroupProber(CharSetProber): |
|||
def __init__(self, lang_filter=None): |
|||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) |
|||
self._active_num = 0 |
|||
self.probers = [] |
|||
self._best_guess_prober = None |
|||
|
|||
def reset(self): |
|||
super(CharSetGroupProber, self).reset() |
|||
self._active_num = 0 |
|||
for prober in self.probers: |
|||
if prober: |
|||
prober.reset() |
|||
prober.active = True |
|||
self._active_num += 1 |
|||
self._best_guess_prober = None |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
if not self._best_guess_prober: |
|||
self.get_confidence() |
|||
if not self._best_guess_prober: |
|||
return None |
|||
return self._best_guess_prober.charset_name |
|||
|
|||
@property |
|||
def language(self): |
|||
if not self._best_guess_prober: |
|||
self.get_confidence() |
|||
if not self._best_guess_prober: |
|||
return None |
|||
return self._best_guess_prober.language |
|||
|
|||
def feed(self, byte_str): |
|||
for prober in self.probers: |
|||
if not prober: |
|||
continue |
|||
if not prober.active: |
|||
continue |
|||
state = prober.feed(byte_str) |
|||
if not state: |
|||
continue |
|||
if state == ProbingState.FOUND_IT: |
|||
self._best_guess_prober = prober |
|||
return self.state |
|||
elif state == ProbingState.NOT_ME: |
|||
prober.active = False |
|||
self._active_num -= 1 |
|||
if self._active_num <= 0: |
|||
self._state = ProbingState.NOT_ME |
|||
return self.state |
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
state = self.state |
|||
if state == ProbingState.FOUND_IT: |
|||
return 0.99 |
|||
elif state == ProbingState.NOT_ME: |
|||
return 0.01 |
|||
best_conf = 0.0 |
|||
self._best_guess_prober = None |
|||
for prober in self.probers: |
|||
if not prober: |
|||
continue |
|||
if not prober.active: |
|||
self.logger.debug('%s not active', prober.charset_name) |
|||
continue |
|||
conf = prober.get_confidence() |
|||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) |
|||
if best_conf < conf: |
|||
best_conf = conf |
|||
self._best_guess_prober = prober |
|||
if not self._best_guess_prober: |
|||
return 0.0 |
|||
return best_conf |
@ -0,0 +1,145 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
import logging |
|||
import re |
|||
|
|||
from .enums import ProbingState |
|||
|
|||
|
|||
class CharSetProber(object): |
|||
|
|||
SHORTCUT_THRESHOLD = 0.95 |
|||
|
|||
def __init__(self, lang_filter=None): |
|||
self._state = None |
|||
self.lang_filter = lang_filter |
|||
self.logger = logging.getLogger(__name__) |
|||
|
|||
def reset(self): |
|||
self._state = ProbingState.DETECTING |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return None |
|||
|
|||
def feed(self, buf): |
|||
pass |
|||
|
|||
@property |
|||
def state(self): |
|||
return self._state |
|||
|
|||
def get_confidence(self): |
|||
return 0.0 |
|||
|
|||
@staticmethod |
|||
def filter_high_byte_only(buf): |
|||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf) |
|||
return buf |
|||
|
|||
@staticmethod |
|||
def filter_international_words(buf): |
|||
""" |
|||
We define three types of bytes: |
|||
alphabet: english alphabets [a-zA-Z] |
|||
international: international characters [\x80-\xFF] |
|||
marker: everything else [^a-zA-Z\x80-\xFF] |
|||
|
|||
The input buffer can be thought to contain a series of words delimited |
|||
by markers. This function works to filter all words that contain at |
|||
least one international character. All contiguous sequences of markers |
|||
are replaced by a single space ascii character. |
|||
|
|||
This filter applies to all scripts which do not use English characters. |
|||
""" |
|||
filtered = bytearray() |
|||
|
|||
# This regex expression filters out only words that have at-least one |
|||
# international character. The word may include one marker character at |
|||
# the end. |
|||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', |
|||
buf) |
|||
|
|||
for word in words: |
|||
filtered.extend(word[:-1]) |
|||
|
|||
# If the last character in the word is a marker, replace it with a |
|||
# space as markers shouldn't affect our analysis (they are used |
|||
# similarly across all languages and may thus have similar |
|||
# frequencies). |
|||
last_char = word[-1:] |
|||
if not last_char.isalpha() and last_char < b'\x80': |
|||
last_char = b' ' |
|||
filtered.extend(last_char) |
|||
|
|||
return filtered |
|||
|
|||
@staticmethod |
|||
def filter_with_english_letters(buf): |
|||
""" |
|||
Returns a copy of ``buf`` that retains only the sequences of English |
|||
alphabet and high byte characters that are not between <> characters. |
|||
Also retains English alphabet and high byte characters immediately |
|||
before occurrences of >. |
|||
|
|||
This filter can be applied to all scripts which contain both English |
|||
characters and extended ASCII characters, but is currently only used by |
|||
``Latin1Prober``. |
|||
""" |
|||
filtered = bytearray() |
|||
in_tag = False |
|||
prev = 0 |
|||
|
|||
for curr in range(len(buf)): |
|||
# Slice here to get bytes instead of an int with Python 3 |
|||
buf_char = buf[curr:curr + 1] |
|||
# Check if we're coming out of or entering an HTML tag |
|||
if buf_char == b'>': |
|||
in_tag = False |
|||
elif buf_char == b'<': |
|||
in_tag = True |
|||
|
|||
# If current character is not extended-ASCII and not alphabetic... |
|||
if buf_char < b'\x80' and not buf_char.isalpha(): |
|||
# ...and we're not in a tag |
|||
if curr > prev and not in_tag: |
|||
# Keep everything after last non-extended-ASCII, |
|||
# non-alphabetic character |
|||
filtered.extend(buf[prev:curr]) |
|||
# Output a space to delimit stretch we kept |
|||
filtered.extend(b' ') |
|||
prev = curr + 1 |
|||
|
|||
# If we're not in a tag... |
|||
if not in_tag: |
|||
# Keep everything after last non-extended-ASCII, non-alphabetic |
|||
# character |
|||
filtered.extend(buf[prev:]) |
|||
|
|||
return filtered |
@ -0,0 +1 @@ |
|||
|
@ -0,0 +1,85 @@ |
|||
#!/usr/bin/env python |
|||
""" |
|||
Script which takes one or more file paths and reports on their detected |
|||
encodings |
|||
|
|||
Example:: |
|||
|
|||
% chardetect somefile someotherfile |
|||
somefile: windows-1252 with confidence 0.5 |
|||
someotherfile: ascii with confidence 1.0 |
|||
|
|||
If no paths are provided, it takes its input from stdin. |
|||
|
|||
""" |
|||
|
|||
from __future__ import absolute_import, print_function, unicode_literals |
|||
|
|||
import argparse |
|||
import sys |
|||
|
|||
from chardet import __version__ |
|||
from chardet.compat import PY2 |
|||
from chardet.universaldetector import UniversalDetector |
|||
|
|||
|
|||
def description_of(lines, name='stdin'): |
|||
""" |
|||
Return a string describing the probable encoding of a file or |
|||
list of strings. |
|||
|
|||
:param lines: The lines to get the encoding of. |
|||
:type lines: Iterable of bytes |
|||
:param name: Name of file or collection of lines |
|||
:type name: str |
|||
""" |
|||
u = UniversalDetector() |
|||
for line in lines: |
|||
line = bytearray(line) |
|||
u.feed(line) |
|||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM. |
|||
if u.done: |
|||
break |
|||
u.close() |
|||
result = u.result |
|||
if PY2: |
|||
name = name.decode(sys.getfilesystemencoding(), 'ignore') |
|||
if result['encoding']: |
|||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'], |
|||
result['confidence']) |
|||
else: |
|||
return '{0}: no result'.format(name) |
|||
|
|||
|
|||
def main(argv=None): |
|||
""" |
|||
Handles command line arguments and gets things started. |
|||
|
|||
:param argv: List of arguments, as if specified on the command-line. |
|||
If None, ``sys.argv[1:]`` is used instead. |
|||
:type argv: list of str |
|||
""" |
|||
# Get command line arguments |
|||
parser = argparse.ArgumentParser( |
|||
description="Takes one or more file paths and reports their detected \ |
|||
encodings") |
|||
parser.add_argument('input', |
|||
help='File whose encoding we would like to determine. \ |
|||
(default: stdin)', |
|||
type=argparse.FileType('rb'), nargs='*', |
|||
default=[sys.stdin if PY2 else sys.stdin.buffer]) |
|||
parser.add_argument('--version', action='version', |
|||
version='%(prog)s {0}'.format(__version__)) |
|||
args = parser.parse_args(argv) |
|||
|
|||
for f in args.input: |
|||
if f.isatty(): |
|||
print("You are running chardetect interactively. Press " + |
|||
"CTRL-D twice at the start of a blank line to signal the " + |
|||
"end of your input. If you want help, run chardetect " + |
|||
"--help\n", file=sys.stderr) |
|||
print(description_of(f, f.name)) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,88 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
import logging |
|||
|
|||
from .enums import MachineState |
|||
|
|||
|
|||
class CodingStateMachine(object): |
|||
""" |
|||
A state machine to verify a byte sequence for a particular encoding. For |
|||
each byte the detector receives, it will feed that byte to every active |
|||
state machine available, one byte at a time. The state machine changes its |
|||
state based on its previous state and the byte it receives. There are 3 |
|||
states in a state machine that are of interest to an auto-detector: |
|||
|
|||
START state: This is the state to start with, or a legal byte sequence |
|||
(i.e. a valid code point) for character has been identified. |
|||
|
|||
ME state: This indicates that the state machine identified a byte sequence |
|||
that is specific to the charset it is designed for and that |
|||
there is no other possible encoding which can contain this byte |
|||
sequence. This will to lead to an immediate positive answer for |
|||
the detector. |
|||
|
|||
ERROR state: This indicates the state machine identified an illegal byte |
|||
sequence for that encoding. This will lead to an immediate |
|||
negative answer for this encoding. Detector will exclude this |
|||
encoding from consideration from here on. |
|||
""" |
|||
def __init__(self, sm): |
|||
self._model = sm |
|||
self._curr_byte_pos = 0 |
|||
self._curr_char_len = 0 |
|||
self._curr_state = None |
|||
self.logger = logging.getLogger(__name__) |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
self._curr_state = MachineState.START |
|||
|
|||
def next_state(self, c): |
|||
# for each byte we get its class |
|||
# if it is first byte, we also get byte length |
|||
byte_class = self._model['class_table'][c] |
|||
if self._curr_state == MachineState.START: |
|||
self._curr_byte_pos = 0 |
|||
self._curr_char_len = self._model['char_len_table'][byte_class] |
|||
# from byte's class and state_table, we get its next state |
|||
curr_state = (self._curr_state * self._model['class_factor'] |
|||
+ byte_class) |
|||
self._curr_state = self._model['state_table'][curr_state] |
|||
self._curr_byte_pos += 1 |
|||
return self._curr_state |
|||
|
|||
def get_current_charlen(self): |
|||
return self._curr_char_len |
|||
|
|||
def get_coding_state_machine(self): |
|||
return self._model['name'] |
|||
|
|||
@property |
|||
def language(self): |
|||
return self._model['language'] |
@ -0,0 +1,34 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# Contributor(s): |
|||
# Dan Blanchard |
|||
# Ian Cordasco |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
import sys |
|||
|
|||
|
|||
if sys.version_info < (3, 0): |
|||
PY2 = True |
|||
PY3 = False |
|||
base_str = (str, unicode) |
|||
text_type = unicode |
|||
else: |
|||
PY2 = False |
|||
PY3 = True |
|||
base_str = (bytes, str) |
|||
text_type = str |
@ -0,0 +1,49 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .chardistribution import EUCKRDistributionAnalysis |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .mbcssm import CP949_SM_MODEL |
|||
|
|||
|
|||
class CP949Prober(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(CP949Prober, self).__init__() |
|||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL) |
|||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be |
|||
# not different. |
|||
self.distribution_analyzer = EUCKRDistributionAnalysis() |
|||
self.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "CP949" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Korean" |
@ -0,0 +1,76 @@ |
|||
""" |
|||
All of the Enums that are used throughout the chardet package. |
|||
|
|||
:author: Dan Blanchard (dan.blanchard@gmail.com) |
|||
""" |
|||
|
|||
|
|||
class InputState(object): |
|||
""" |
|||
This enum represents the different states a universal detector can be in. |
|||
""" |
|||
PURE_ASCII = 0 |
|||
ESC_ASCII = 1 |
|||
HIGH_BYTE = 2 |
|||
|
|||
|
|||
class LanguageFilter(object): |
|||
""" |
|||
This enum represents the different language filters we can apply to a |
|||
``UniversalDetector``. |
|||
""" |
|||
CHINESE_SIMPLIFIED = 0x01 |
|||
CHINESE_TRADITIONAL = 0x02 |
|||
JAPANESE = 0x04 |
|||
KOREAN = 0x08 |
|||
NON_CJK = 0x10 |
|||
ALL = 0x1F |
|||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL |
|||
CJK = CHINESE | JAPANESE | KOREAN |
|||
|
|||
|
|||
class ProbingState(object): |
|||
""" |
|||
This enum represents the different states a prober can be in. |
|||
""" |
|||
DETECTING = 0 |
|||
FOUND_IT = 1 |
|||
NOT_ME = 2 |
|||
|
|||
|
|||
class MachineState(object): |
|||
""" |
|||
This enum represents the different states a state machine can be in. |
|||
""" |
|||
START = 0 |
|||
ERROR = 1 |
|||
ITS_ME = 2 |
|||
|
|||
|
|||
class SequenceLikelihood(object): |
|||
""" |
|||
This enum represents the likelihood of a character following the previous one. |
|||
""" |
|||
NEGATIVE = 0 |
|||
UNLIKELY = 1 |
|||
LIKELY = 2 |
|||
POSITIVE = 3 |
|||
|
|||
@classmethod |
|||
def get_num_categories(cls): |
|||
""":returns: The number of likelihood categories in the enum.""" |
|||
return 4 |
|||
|
|||
|
|||
class CharacterCategory(object): |
|||
""" |
|||
This enum represents the different categories language models for |
|||
``SingleByteCharsetProber`` put characters into. |
|||
|
|||
Anything less than CONTROL is considered a letter. |
|||
""" |
|||
UNDEFINED = 255 |
|||
LINE_BREAK = 254 |
|||
SYMBOL = 253 |
|||
DIGIT = 252 |
|||
CONTROL = 251 |
@ -0,0 +1,101 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .enums import LanguageFilter, ProbingState, MachineState |
|||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, |
|||
ISO2022KR_SM_MODEL) |
|||
|
|||
|
|||
class EscCharSetProber(CharSetProber): |
|||
""" |
|||
This CharSetProber uses a "code scheme" approach for detecting encodings, |
|||
whereby easily recognizable escape or shift sequences are relied on to |
|||
identify these encodings. |
|||
""" |
|||
|
|||
def __init__(self, lang_filter=None): |
|||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter) |
|||
self.coding_sm = [] |
|||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: |
|||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) |
|||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) |
|||
if self.lang_filter & LanguageFilter.JAPANESE: |
|||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) |
|||
if self.lang_filter & LanguageFilter.KOREAN: |
|||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) |
|||
self.active_sm_count = None |
|||
self._detected_charset = None |
|||
self._detected_language = None |
|||
self._state = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
super(EscCharSetProber, self).reset() |
|||
for coding_sm in self.coding_sm: |
|||
if not coding_sm: |
|||
continue |
|||
coding_sm.active = True |
|||
coding_sm.reset() |
|||
self.active_sm_count = len(self.coding_sm) |
|||
self._detected_charset = None |
|||
self._detected_language = None |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return self._detected_charset |
|||
|
|||
@property |
|||
def language(self): |
|||
return self._detected_language |
|||
|
|||
def get_confidence(self): |
|||
if self._detected_charset: |
|||
return 0.99 |
|||
else: |
|||
return 0.00 |
|||
|
|||
def feed(self, byte_str): |
|||
for c in byte_str: |
|||
for coding_sm in self.coding_sm: |
|||
if not coding_sm or not coding_sm.active: |
|||
continue |
|||
coding_state = coding_sm.next_state(c) |
|||
if coding_state == MachineState.ERROR: |
|||
coding_sm.active = False |
|||
self.active_sm_count -= 1 |
|||
if self.active_sm_count <= 0: |
|||
self._state = ProbingState.NOT_ME |
|||
return self.state |
|||
elif coding_state == MachineState.ITS_ME: |
|||
self._state = ProbingState.FOUND_IT |
|||
self._detected_charset = coding_sm.get_coding_state_machine() |
|||
self._detected_language = coding_sm.language |
|||
return self.state |
|||
|
|||
return self.state |
@ -0,0 +1,246 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .enums import MachineState |
|||
|
|||
HZ_CLS = ( |
|||
1,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,0,0,0,0,0,0, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,1,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,0,0,0,0, # 20 - 27 |
|||
0,0,0,0,0,0,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
0,0,0,0,0,0,0,0, # 40 - 47 |
|||
0,0,0,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,4,0,5,2,0, # 78 - 7f |
|||
1,1,1,1,1,1,1,1, # 80 - 87 |
|||
1,1,1,1,1,1,1,1, # 88 - 8f |
|||
1,1,1,1,1,1,1,1, # 90 - 97 |
|||
1,1,1,1,1,1,1,1, # 98 - 9f |
|||
1,1,1,1,1,1,1,1, # a0 - a7 |
|||
1,1,1,1,1,1,1,1, # a8 - af |
|||
1,1,1,1,1,1,1,1, # b0 - b7 |
|||
1,1,1,1,1,1,1,1, # b8 - bf |
|||
1,1,1,1,1,1,1,1, # c0 - c7 |
|||
1,1,1,1,1,1,1,1, # c8 - cf |
|||
1,1,1,1,1,1,1,1, # d0 - d7 |
|||
1,1,1,1,1,1,1,1, # d8 - df |
|||
1,1,1,1,1,1,1,1, # e0 - e7 |
|||
1,1,1,1,1,1,1,1, # e8 - ef |
|||
1,1,1,1,1,1,1,1, # f0 - f7 |
|||
1,1,1,1,1,1,1,1, # f8 - ff |
|||
) |
|||
|
|||
HZ_ST = ( |
|||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17 |
|||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f |
|||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27 |
|||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f |
|||
) |
|||
|
|||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) |
|||
|
|||
HZ_SM_MODEL = {'class_table': HZ_CLS, |
|||
'class_factor': 6, |
|||
'state_table': HZ_ST, |
|||
'char_len_table': HZ_CHAR_LEN_TABLE, |
|||
'name': "HZ-GB-2312", |
|||
'language': 'Chinese'} |
|||
|
|||
ISO2022CN_CLS = ( |
|||
2,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,0,0,0,0,0,0, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,1,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,0,0,0,0, # 20 - 27 |
|||
0,3,0,0,0,0,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
0,0,0,4,0,0,0,0, # 40 - 47 |
|||
0,0,0,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,0,0,0,0,0, # 78 - 7f |
|||
2,2,2,2,2,2,2,2, # 80 - 87 |
|||
2,2,2,2,2,2,2,2, # 88 - 8f |
|||
2,2,2,2,2,2,2,2, # 90 - 97 |
|||
2,2,2,2,2,2,2,2, # 98 - 9f |
|||
2,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,2,2,2, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,2,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
2,2,2,2,2,2,2,2, # e0 - e7 |
|||
2,2,2,2,2,2,2,2, # e8 - ef |
|||
2,2,2,2,2,2,2,2, # f0 - f7 |
|||
2,2,2,2,2,2,2,2, # f8 - ff |
|||
) |
|||
|
|||
ISO2022CN_ST = ( |
|||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 |
|||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27 |
|||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f |
|||
) |
|||
|
|||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) |
|||
|
|||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, |
|||
'class_factor': 9, |
|||
'state_table': ISO2022CN_ST, |
|||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE, |
|||
'name': "ISO-2022-CN", |
|||
'language': 'Chinese'} |
|||
|
|||
ISO2022JP_CLS = ( |
|||
2,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,0,0,0,0,2,2, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,1,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,7,0,0,0, # 20 - 27 |
|||
3,0,0,0,0,0,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
6,0,4,0,8,0,0,0, # 40 - 47 |
|||
0,9,5,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,0,0,0,0,0, # 78 - 7f |
|||
2,2,2,2,2,2,2,2, # 80 - 87 |
|||
2,2,2,2,2,2,2,2, # 88 - 8f |
|||
2,2,2,2,2,2,2,2, # 90 - 97 |
|||
2,2,2,2,2,2,2,2, # 98 - 9f |
|||
2,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,2,2,2, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,2,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
2,2,2,2,2,2,2,2, # e0 - e7 |
|||
2,2,2,2,2,2,2,2, # e8 - ef |
|||
2,2,2,2,2,2,2,2, # f0 - f7 |
|||
2,2,2,2,2,2,2,2, # f8 - ff |
|||
) |
|||
|
|||
ISO2022JP_ST = ( |
|||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 |
|||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f |
|||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47 |
|||
) |
|||
|
|||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) |
|||
|
|||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, |
|||
'class_factor': 10, |
|||
'state_table': ISO2022JP_ST, |
|||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE, |
|||
'name': "ISO-2022-JP", |
|||
'language': 'Japanese'} |
|||
|
|||
ISO2022KR_CLS = ( |
|||
2,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,0,0,0,0,0,0, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,1,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,3,0,0,0, # 20 - 27 |
|||
0,4,0,0,0,0,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
0,0,0,5,0,0,0,0, # 40 - 47 |
|||
0,0,0,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,0,0,0,0,0, # 78 - 7f |
|||
2,2,2,2,2,2,2,2, # 80 - 87 |
|||
2,2,2,2,2,2,2,2, # 88 - 8f |
|||
2,2,2,2,2,2,2,2, # 90 - 97 |
|||
2,2,2,2,2,2,2,2, # 98 - 9f |
|||
2,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,2,2,2, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,2,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
2,2,2,2,2,2,2,2, # e0 - e7 |
|||
2,2,2,2,2,2,2,2, # e8 - ef |
|||
2,2,2,2,2,2,2,2, # f0 - f7 |
|||
2,2,2,2,2,2,2,2, # f8 - ff |
|||
) |
|||
|
|||
ISO2022KR_ST = ( |
|||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27 |
|||
) |
|||
|
|||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) |
|||
|
|||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, |
|||
'class_factor': 6, |
|||
'state_table': ISO2022KR_ST, |
|||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE, |
|||
'name': "ISO-2022-KR", |
|||
'language': 'Korean'} |
|||
|
|||
|
@ -0,0 +1,92 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .enums import ProbingState, MachineState |
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import EUCJPDistributionAnalysis |
|||
from .jpcntx import EUCJPContextAnalysis |
|||
from .mbcssm import EUCJP_SM_MODEL |
|||
|
|||
|
|||
class EUCJPProber(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(EUCJPProber, self).__init__() |
|||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) |
|||
self.distribution_analyzer = EUCJPDistributionAnalysis() |
|||
self.context_analyzer = EUCJPContextAnalysis() |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
super(EUCJPProber, self).reset() |
|||
self.context_analyzer.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "EUC-JP" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Japanese" |
|||
|
|||
def feed(self, byte_str): |
|||
for i in range(len(byte_str)): |
|||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte |
|||
coding_state = self.coding_sm.next_state(byte_str[i]) |
|||
if coding_state == MachineState.ERROR: |
|||
self.logger.debug('%s %s prober hit error at byte %s', |
|||
self.charset_name, self.language, i) |
|||
self._state = ProbingState.NOT_ME |
|||
break |
|||
elif coding_state == MachineState.ITS_ME: |
|||
self._state = ProbingState.FOUND_IT |
|||
break |
|||
elif coding_state == MachineState.START: |
|||
char_len = self.coding_sm.get_current_charlen() |
|||
if i == 0: |
|||
self._last_char[1] = byte_str[0] |
|||
self.context_analyzer.feed(self._last_char, char_len) |
|||
self.distribution_analyzer.feed(self._last_char, char_len) |
|||
else: |
|||
self.context_analyzer.feed(byte_str[i - 1:i + 1], |
|||
char_len) |
|||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1], |
|||
char_len) |
|||
|
|||
self._last_char[0] = byte_str[-1] |
|||
|
|||
if self.state == ProbingState.DETECTING: |
|||
if (self.context_analyzer.got_enough_data() and |
|||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)): |
|||
self._state = ProbingState.FOUND_IT |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
context_conf = self.context_analyzer.get_confidence() |
|||
distrib_conf = self.distribution_analyzer.get_confidence() |
|||
return max(context_conf, distrib_conf) |
@ -0,0 +1,195 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# Sampling from about 20M text materials include literature and computer technology |
|||
|
|||
# 128 --> 0.79 |
|||
# 256 --> 0.92 |
|||
# 512 --> 0.986 |
|||
# 1024 --> 0.99944 |
|||
# 2048 --> 0.99999 |
|||
# |
|||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 |
|||
# Random Distribution Ration = 512 / (2350-512) = 0.279. |
|||
# |
|||
# Typical Distribution Ratio |
|||
|
|||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 |
|||
|
|||
EUCKR_TABLE_SIZE = 2352 |
|||
|
|||
# Char to FreqOrder table , |
|||
EUCKR_CHAR_TO_FREQ_ORDER = ( |
|||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, |
|||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, |
|||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, |
|||
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, |
|||
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, |
|||
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, |
|||
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, |
|||
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, |
|||
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, |
|||
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, |
|||
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, |
|||
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, |
|||
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, |
|||
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, |
|||
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, |
|||
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, |
|||
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, |
|||
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, |
|||
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, |
|||
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, |
|||
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, |
|||
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, |
|||
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, |
|||
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, |
|||
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, |
|||
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, |
|||
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, |
|||
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, |
|||
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, |
|||
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, |
|||
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, |
|||
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, |
|||
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, |
|||
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, |
|||
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, |
|||
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, |
|||
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, |
|||
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, |
|||
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, |
|||
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, |
|||
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, |
|||
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, |
|||
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, |
|||
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, |
|||
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, |
|||
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, |
|||
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, |
|||
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, |
|||
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, |
|||
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, |
|||
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, |
|||
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, |
|||
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, |
|||
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, |
|||
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, |
|||
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, |
|||
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, |
|||
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, |
|||
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, |
|||
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, |
|||
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, |
|||
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, |
|||
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, |
|||
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, |
|||
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, |
|||
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, |
|||
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, |
|||
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, |
|||
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, |
|||
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, |
|||
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, |
|||
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, |
|||
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, |
|||
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, |
|||
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, |
|||
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, |
|||
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, |
|||
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, |
|||
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, |
|||
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, |
|||
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, |
|||
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, |
|||
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, |
|||
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, |
|||
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, |
|||
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, |
|||
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, |
|||
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, |
|||
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, |
|||
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, |
|||
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, |
|||
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, |
|||
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, |
|||
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, |
|||
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, |
|||
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, |
|||
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, |
|||
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, |
|||
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, |
|||
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, |
|||
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, |
|||
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, |
|||
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, |
|||
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, |
|||
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, |
|||
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, |
|||
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, |
|||
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, |
|||
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, |
|||
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, |
|||
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, |
|||
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, |
|||
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, |
|||
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, |
|||
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, |
|||
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, |
|||
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, |
|||
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, |
|||
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, |
|||
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, |
|||
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, |
|||
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, |
|||
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, |
|||
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, |
|||
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, |
|||
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, |
|||
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, |
|||
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, |
|||
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, |
|||
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, |
|||
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, |
|||
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, |
|||
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, |
|||
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, |
|||
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, |
|||
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, |
|||
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, |
|||
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, |
|||
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, |
|||
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, |
|||
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, |
|||
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, |
|||
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, |
|||
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, |
|||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, |
|||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, |
|||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 |
|||
) |
|||
|
@ -0,0 +1,47 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import EUCKRDistributionAnalysis |
|||
from .mbcssm import EUCKR_SM_MODEL |
|||
|
|||
|
|||
class EUCKRProber(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(EUCKRProber, self).__init__() |
|||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) |
|||
self.distribution_analyzer = EUCKRDistributionAnalysis() |
|||
self.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "EUC-KR" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Korean" |
@ -0,0 +1,387 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# EUCTW frequency table |
|||
# Converted from big5 work |
|||
# by Taiwan's Mandarin Promotion Council |
|||
# <http:#www.edu.tw:81/mandr/> |
|||
|
|||
# 128 --> 0.42261 |
|||
# 256 --> 0.57851 |
|||
# 512 --> 0.74851 |
|||
# 1024 --> 0.89384 |
|||
# 2048 --> 0.97583 |
|||
# |
|||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 |
|||
# Random Distribution Ration = 512/(5401-512)=0.105 |
|||
# |
|||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR |
|||
|
|||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 |
|||
|
|||
# Char to FreqOrder table , |
|||
EUCTW_TABLE_SIZE = 5376 |
|||
|
|||
EUCTW_CHAR_TO_FREQ_ORDER = ( |
|||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 |
|||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 |
|||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 |
|||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 |
|||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 |
|||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 |
|||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 |
|||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 |
|||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 |
|||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 |
|||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 |
|||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 |
|||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 |
|||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 |
|||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 |
|||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 |
|||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 |
|||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 |
|||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 |
|||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 |
|||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 |
|||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 |
|||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 |
|||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 |
|||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 |
|||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 |
|||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 |
|||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 |
|||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 |
|||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 |
|||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 |
|||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 |
|||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 |
|||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 |
|||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 |
|||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 |
|||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 |
|||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 |
|||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 |
|||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 |
|||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 |
|||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 |
|||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 |
|||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 |
|||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 |
|||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 |
|||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 |
|||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 |
|||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 |
|||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 |
|||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 |
|||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 |
|||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 |
|||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 |
|||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 |
|||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 |
|||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 |
|||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 |
|||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 |
|||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 |
|||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 |
|||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 |
|||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 |
|||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 |
|||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 |
|||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 |
|||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 |
|||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 |
|||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 |
|||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 |
|||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 |
|||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 |
|||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 |
|||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 |
|||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 |
|||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 |
|||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 |
|||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 |
|||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 |
|||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 |
|||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 |
|||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 |
|||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 |
|||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 |
|||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 |
|||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 |
|||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 |
|||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 |
|||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 |
|||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 |
|||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 |
|||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 |
|||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 |
|||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 |
|||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 |
|||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 |
|||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 |
|||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 |
|||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 |
|||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 |
|||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 |
|||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 |
|||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 |
|||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 |
|||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 |
|||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 |
|||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 |
|||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 |
|||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 |
|||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 |
|||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 |
|||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 |
|||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 |
|||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 |
|||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 |
|||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 |
|||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 |
|||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 |
|||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 |
|||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 |
|||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 |
|||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 |
|||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 |
|||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 |
|||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 |
|||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 |
|||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 |
|||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 |
|||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 |
|||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 |
|||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 |
|||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 |
|||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 |
|||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 |
|||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 |
|||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 |
|||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 |
|||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 |
|||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 |
|||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 |
|||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 |
|||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 |
|||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 |
|||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 |
|||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 |
|||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 |
|||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 |
|||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 |
|||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 |
|||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 |
|||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 |
|||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 |
|||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 |
|||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 |
|||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 |
|||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 |
|||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 |
|||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 |
|||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 |
|||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 |
|||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 |
|||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 |
|||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 |
|||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 |
|||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 |
|||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 |
|||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 |
|||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 |
|||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 |
|||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 |
|||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 |
|||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 |
|||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 |
|||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 |
|||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 |
|||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 |
|||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 |
|||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 |
|||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 |
|||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 |
|||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 |
|||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 |
|||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 |
|||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 |
|||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 |
|||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 |
|||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 |
|||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 |
|||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 |
|||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 |
|||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 |
|||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 |
|||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 |
|||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 |
|||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 |
|||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 |
|||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 |
|||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 |
|||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 |
|||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 |
|||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 |
|||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 |
|||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 |
|||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 |
|||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 |
|||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 |
|||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 |
|||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 |
|||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 |
|||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 |
|||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 |
|||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 |
|||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 |
|||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 |
|||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 |
|||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 |
|||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 |
|||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 |
|||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 |
|||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 |
|||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 |
|||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 |
|||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 |
|||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 |
|||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 |
|||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 |
|||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 |
|||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 |
|||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 |
|||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 |
|||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 |
|||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 |
|||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 |
|||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 |
|||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 |
|||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 |
|||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 |
|||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 |
|||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 |
|||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 |
|||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 |
|||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 |
|||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 |
|||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 |
|||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 |
|||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 |
|||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 |
|||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 |
|||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 |
|||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 |
|||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 |
|||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 |
|||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 |
|||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 |
|||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 |
|||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 |
|||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 |
|||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 |
|||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 |
|||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 |
|||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 |
|||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 |
|||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 |
|||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 |
|||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 |
|||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 |
|||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 |
|||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 |
|||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 |
|||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 |
|||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 |
|||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 |
|||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 |
|||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 |
|||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 |
|||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 |
|||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 |
|||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 |
|||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 |
|||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 |
|||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 |
|||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 |
|||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 |
|||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 |
|||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 |
|||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 |
|||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 |
|||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 |
|||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 |
|||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 |
|||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 |
|||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 |
|||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 |
|||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 |
|||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 |
|||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 |
|||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 |
|||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 |
|||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 |
|||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 |
|||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 |
|||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 |
|||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 |
|||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 |
|||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 |
|||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 |
|||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 |
|||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 |
|||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 |
|||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 |
|||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 |
|||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 |
|||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 |
|||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 |
|||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 |
|||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 |
|||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 |
|||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 |
|||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 |
|||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 |
|||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 |
|||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 |
|||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 |
|||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 |
|||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 |
|||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 |
|||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 |
|||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 |
|||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 |
|||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 |
|||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 |
|||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 |
|||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 |
|||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 |
|||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 |
|||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 |
|||
) |
|||
|
@ -0,0 +1,46 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import EUCTWDistributionAnalysis |
|||
from .mbcssm import EUCTW_SM_MODEL |
|||
|
|||
class EUCTWProber(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(EUCTWProber, self).__init__() |
|||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) |
|||
self.distribution_analyzer = EUCTWDistributionAnalysis() |
|||
self.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "EUC-TW" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Taiwan" |
@ -0,0 +1,283 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# GB2312 most frequently used character table |
|||
# |
|||
# Char to FreqOrder table , from hz6763 |
|||
|
|||
# 512 --> 0.79 -- 0.79 |
|||
# 1024 --> 0.92 -- 0.13 |
|||
# 2048 --> 0.98 -- 0.06 |
|||
# 6768 --> 1.00 -- 0.02 |
|||
# |
|||
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 |
|||
# Random Distribution Ration = 512 / (3755 - 512) = 0.157 |
|||
# |
|||
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR |
|||
|
|||
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 |
|||
|
|||
GB2312_TABLE_SIZE = 3760 |
|||
|
|||
GB2312_CHAR_TO_FREQ_ORDER = ( |
|||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, |
|||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, |
|||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, |
|||
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, |
|||
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, |
|||
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, |
|||
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, |
|||
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, |
|||
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, |
|||
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, |
|||
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, |
|||
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, |
|||
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, |
|||
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, |
|||
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, |
|||
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, |
|||
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, |
|||
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, |
|||
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, |
|||
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, |
|||
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, |
|||
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, |
|||
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, |
|||
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, |
|||
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, |
|||
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, |
|||
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, |
|||
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, |
|||
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, |
|||
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, |
|||
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, |
|||
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, |
|||
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, |
|||
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, |
|||
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, |
|||
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, |
|||
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, |
|||
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, |
|||
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, |
|||
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, |
|||
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, |
|||
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, |
|||
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, |
|||
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, |
|||
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, |
|||
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, |
|||
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, |
|||
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, |
|||
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, |
|||
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, |
|||
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, |
|||
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, |
|||
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, |
|||
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, |
|||
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, |
|||
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, |
|||
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, |
|||
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, |
|||
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, |
|||
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, |
|||
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, |
|||
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, |
|||
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, |
|||
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, |
|||
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, |
|||
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, |
|||
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, |
|||
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, |
|||
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, |
|||
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, |
|||
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, |
|||
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, |
|||
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, |
|||
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, |
|||
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, |
|||
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, |
|||
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, |
|||
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, |
|||
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, |
|||
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, |
|||
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, |
|||
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, |
|||
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, |
|||
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, |
|||
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, |
|||
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, |
|||
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, |
|||
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, |
|||
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, |
|||
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, |
|||
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, |
|||
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, |
|||
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, |
|||
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, |
|||
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, |
|||
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, |
|||
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, |
|||
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, |
|||
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, |
|||
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, |
|||
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, |
|||
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, |
|||
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, |
|||
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, |
|||
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, |
|||
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, |
|||
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, |
|||
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, |
|||
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, |
|||
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, |
|||
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, |
|||
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, |
|||
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, |
|||
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, |
|||
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, |
|||
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, |
|||
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, |
|||
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, |
|||
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, |
|||
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, |
|||
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, |
|||
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, |
|||
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, |
|||
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, |
|||
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, |
|||
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, |
|||
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, |
|||
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, |
|||
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, |
|||
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, |
|||
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, |
|||
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, |
|||
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, |
|||
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, |
|||
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, |
|||
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, |
|||
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, |
|||
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, |
|||
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, |
|||
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, |
|||
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, |
|||
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, |
|||
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, |
|||
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, |
|||
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, |
|||
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, |
|||
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, |
|||
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, |
|||
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, |
|||
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, |
|||
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, |
|||
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, |
|||
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, |
|||
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, |
|||
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, |
|||
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, |
|||
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, |
|||
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, |
|||
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, |
|||
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, |
|||
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, |
|||
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, |
|||
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, |
|||
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, |
|||
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, |
|||
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, |
|||
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, |
|||
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, |
|||
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, |
|||
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, |
|||
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, |
|||
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, |
|||
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, |
|||
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, |
|||
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, |
|||
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, |
|||
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, |
|||
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, |
|||
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, |
|||
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, |
|||
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, |
|||
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, |
|||
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, |
|||
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, |
|||
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, |
|||
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, |
|||
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, |
|||
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, |
|||
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, |
|||
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, |
|||
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, |
|||
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, |
|||
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, |
|||
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, |
|||
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, |
|||
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, |
|||
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, |
|||
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, |
|||
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, |
|||
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, |
|||
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, |
|||
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, |
|||
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, |
|||
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, |
|||
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, |
|||
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, |
|||
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, |
|||
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, |
|||
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, |
|||
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, |
|||
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, |
|||
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, |
|||
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, |
|||
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, |
|||
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, |
|||
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, |
|||
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, |
|||
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, |
|||
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, |
|||
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, |
|||
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, |
|||
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, |
|||
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, |
|||
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, |
|||
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, |
|||
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, |
|||
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, |
|||
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, |
|||
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, |
|||
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, |
|||
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, |
|||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, |
|||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, |
|||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, |
|||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512 |
|||
) |
|||
|
@ -0,0 +1,46 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import GB2312DistributionAnalysis |
|||
from .mbcssm import GB2312_SM_MODEL |
|||
|
|||
class GB2312Prober(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(GB2312Prober, self).__init__() |
|||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) |
|||
self.distribution_analyzer = GB2312DistributionAnalysis() |
|||
self.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "GB2312" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Chinese" |
@ -0,0 +1,292 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Shy Shalom |
|||
# Portions created by the Initial Developer are Copyright (C) 2005 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .enums import ProbingState |
|||
|
|||
# This prober doesn't actually recognize a language or a charset. |
|||
# It is a helper prober for the use of the Hebrew model probers |
|||
|
|||
### General ideas of the Hebrew charset recognition ### |
|||
# |
|||
# Four main charsets exist in Hebrew: |
|||
# "ISO-8859-8" - Visual Hebrew |
|||
# "windows-1255" - Logical Hebrew |
|||
# "ISO-8859-8-I" - Logical Hebrew |
|||
# "x-mac-hebrew" - ?? Logical Hebrew ?? |
|||
# |
|||
# Both "ISO" charsets use a completely identical set of code points, whereas |
|||
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of |
|||
# these code points. windows-1255 defines additional characters in the range |
|||
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific |
|||
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. |
|||
# x-mac-hebrew defines similar additional code points but with a different |
|||
# mapping. |
|||
# |
|||
# As far as an average Hebrew text with no diacritics is concerned, all four |
|||
# charsets are identical with respect to code points. Meaning that for the |
|||
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters |
|||
# (including final letters). |
|||
# |
|||
# The dominant difference between these charsets is their directionality. |
|||
# "Visual" directionality means that the text is ordered as if the renderer is |
|||
# not aware of a BIDI rendering algorithm. The renderer sees the text and |
|||
# draws it from left to right. The text itself when ordered naturally is read |
|||
# backwards. A buffer of Visual Hebrew generally looks like so: |
|||
# "[last word of first line spelled backwards] [whole line ordered backwards |
|||
# and spelled backwards] [first word of first line spelled backwards] |
|||
# [end of line] [last word of second line] ... etc' " |
|||
# adding punctuation marks, numbers and English text to visual text is |
|||
# naturally also "visual" and from left to right. |
|||
# |
|||
# "Logical" directionality means the text is ordered "naturally" according to |
|||
# the order it is read. It is the responsibility of the renderer to display |
|||
# the text from right to left. A BIDI algorithm is used to place general |
|||
# punctuation marks, numbers and English text in the text. |
|||
# |
|||
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From |
|||
# what little evidence I could find, it seems that its general directionality |
|||
# is Logical. |
|||
# |
|||
# To sum up all of the above, the Hebrew probing mechanism knows about two |
|||
# charsets: |
|||
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are |
|||
# backwards while line order is natural. For charset recognition purposes |
|||
# the line order is unimportant (In fact, for this implementation, even |
|||
# word order is unimportant). |
|||
# Logical Hebrew - "windows-1255" - normal, naturally ordered text. |
|||
# |
|||
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be |
|||
# specifically identified. |
|||
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew |
|||
# that contain special punctuation marks or diacritics is displayed with |
|||
# some unconverted characters showing as question marks. This problem might |
|||
# be corrected using another model prober for x-mac-hebrew. Due to the fact |
|||
# that x-mac-hebrew texts are so rare, writing another model prober isn't |
|||
# worth the effort and performance hit. |
|||
# |
|||
#### The Prober #### |
|||
# |
|||
# The prober is divided between two SBCharSetProbers and a HebrewProber, |
|||
# all of which are managed, created, fed data, inquired and deleted by the |
|||
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in |
|||
# fact some kind of Hebrew, Logical or Visual. The final decision about which |
|||
# one is it is made by the HebrewProber by combining final-letter scores |
|||
# with the scores of the two SBCharSetProbers to produce a final answer. |
|||
# |
|||
# The SBCSGroupProber is responsible for stripping the original text of HTML |
|||
# tags, English characters, numbers, low-ASCII punctuation characters, spaces |
|||
# and new lines. It reduces any sequence of such characters to a single space. |
|||
# The buffer fed to each prober in the SBCS group prober is pure text in |
|||
# high-ASCII. |
|||
# The two SBCharSetProbers (model probers) share the same language model: |
|||
# Win1255Model. |
|||
# The first SBCharSetProber uses the model normally as any other |
|||
# SBCharSetProber does, to recognize windows-1255, upon which this model was |
|||
# built. The second SBCharSetProber is told to make the pair-of-letter |
|||
# lookup in the language model backwards. This in practice exactly simulates |
|||
# a visual Hebrew model using the windows-1255 logical Hebrew model. |
|||
# |
|||
# The HebrewProber is not using any language model. All it does is look for |
|||
# final-letter evidence suggesting the text is either logical Hebrew or visual |
|||
# Hebrew. Disjointed from the model probers, the results of the HebrewProber |
|||
# alone are meaningless. HebrewProber always returns 0.00 as confidence |
|||
# since it never identifies a charset by itself. Instead, the pointer to the |
|||
# HebrewProber is passed to the model probers as a helper "Name Prober". |
|||
# When the Group prober receives a positive identification from any prober, |
|||
# it asks for the name of the charset identified. If the prober queried is a |
|||
# Hebrew model prober, the model prober forwards the call to the |
|||
# HebrewProber to make the final decision. In the HebrewProber, the |
|||
# decision is made according to the final-letters scores maintained and Both |
|||
# model probers scores. The answer is returned in the form of the name of the |
|||
# charset identified, either "windows-1255" or "ISO-8859-8". |
|||
|
|||
class HebrewProber(CharSetProber): |
|||
# windows-1255 / ISO-8859-8 code points of interest |
|||
FINAL_KAF = 0xea |
|||
NORMAL_KAF = 0xeb |
|||
FINAL_MEM = 0xed |
|||
NORMAL_MEM = 0xee |
|||
FINAL_NUN = 0xef |
|||
NORMAL_NUN = 0xf0 |
|||
FINAL_PE = 0xf3 |
|||
NORMAL_PE = 0xf4 |
|||
FINAL_TSADI = 0xf5 |
|||
NORMAL_TSADI = 0xf6 |
|||
|
|||
# Minimum Visual vs Logical final letter score difference. |
|||
# If the difference is below this, don't rely solely on the final letter score |
|||
# distance. |
|||
MIN_FINAL_CHAR_DISTANCE = 5 |
|||
|
|||
# Minimum Visual vs Logical model score difference. |
|||
# If the difference is below this, don't rely at all on the model score |
|||
# distance. |
|||
MIN_MODEL_DISTANCE = 0.01 |
|||
|
|||
VISUAL_HEBREW_NAME = "ISO-8859-8" |
|||
LOGICAL_HEBREW_NAME = "windows-1255" |
|||
|
|||
def __init__(self): |
|||
super(HebrewProber, self).__init__() |
|||
self._final_char_logical_score = None |
|||
self._final_char_visual_score = None |
|||
self._prev = None |
|||
self._before_prev = None |
|||
self._logical_prober = None |
|||
self._visual_prober = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
self._final_char_logical_score = 0 |
|||
self._final_char_visual_score = 0 |
|||
# The two last characters seen in the previous buffer, |
|||
# mPrev and mBeforePrev are initialized to space in order to simulate |
|||
# a word delimiter at the beginning of the data |
|||
self._prev = ' ' |
|||
self._before_prev = ' ' |
|||
# These probers are owned by the group prober. |
|||
|
|||
def set_model_probers(self, logicalProber, visualProber): |
|||
self._logical_prober = logicalProber |
|||
self._visual_prober = visualProber |
|||
|
|||
def is_final(self, c): |
|||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, |
|||
self.FINAL_PE, self.FINAL_TSADI] |
|||
|
|||
def is_non_final(self, c): |
|||
# The normal Tsadi is not a good Non-Final letter due to words like |
|||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This |
|||
# apostrophe is converted to a space in FilterWithoutEnglishLetters |
|||
# causing the Non-Final tsadi to appear at an end of a word even |
|||
# though this is not the case in the original text. |
|||
# The letters Pe and Kaf rarely display a related behavior of not being |
|||
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' |
|||
# for example legally end with a Non-Final Pe or Kaf. However, the |
|||
# benefit of these letters as Non-Final letters outweighs the damage |
|||
# since these words are quite rare. |
|||
return c in [self.NORMAL_KAF, self.NORMAL_MEM, |
|||
self.NORMAL_NUN, self.NORMAL_PE] |
|||
|
|||
def feed(self, byte_str): |
|||
# Final letter analysis for logical-visual decision. |
|||
# Look for evidence that the received buffer is either logical Hebrew |
|||
# or visual Hebrew. |
|||
# The following cases are checked: |
|||
# 1) A word longer than 1 letter, ending with a final letter. This is |
|||
# an indication that the text is laid out "naturally" since the |
|||
# final letter really appears at the end. +1 for logical score. |
|||
# 2) A word longer than 1 letter, ending with a Non-Final letter. In |
|||
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, |
|||
# should not end with the Non-Final form of that letter. Exceptions |
|||
# to this rule are mentioned above in isNonFinal(). This is an |
|||
# indication that the text is laid out backwards. +1 for visual |
|||
# score |
|||
# 3) A word longer than 1 letter, starting with a final letter. Final |
|||
# letters should not appear at the beginning of a word. This is an |
|||
# indication that the text is laid out backwards. +1 for visual |
|||
# score. |
|||
# |
|||
# The visual score and logical score are accumulated throughout the |
|||
# text and are finally checked against each other in GetCharSetName(). |
|||
# No checking for final letters in the middle of words is done since |
|||
# that case is not an indication for either Logical or Visual text. |
|||
# |
|||
# We automatically filter out all 7-bit characters (replace them with |
|||
# spaces) so the word boundary detection works properly. [MAP] |
|||
|
|||
if self.state == ProbingState.NOT_ME: |
|||
# Both model probers say it's not them. No reason to continue. |
|||
return ProbingState.NOT_ME |
|||
|
|||
byte_str = self.filter_high_byte_only(byte_str) |
|||
|
|||
for cur in byte_str: |
|||
if cur == ' ': |
|||
# We stand on a space - a word just ended |
|||
if self._before_prev != ' ': |
|||
# next-to-last char was not a space so self._prev is not a |
|||
# 1 letter word |
|||
if self.is_final(self._prev): |
|||
# case (1) [-2:not space][-1:final letter][cur:space] |
|||
self._final_char_logical_score += 1 |
|||
elif self.is_non_final(self._prev): |
|||
# case (2) [-2:not space][-1:Non-Final letter][ |
|||
# cur:space] |
|||
self._final_char_visual_score += 1 |
|||
else: |
|||
# Not standing on a space |
|||
if ((self._before_prev == ' ') and |
|||
(self.is_final(self._prev)) and (cur != ' ')): |
|||
# case (3) [-2:space][-1:final letter][cur:not space] |
|||
self._final_char_visual_score += 1 |
|||
self._before_prev = self._prev |
|||
self._prev = cur |
|||
|
|||
# Forever detecting, till the end or until both model probers return |
|||
# ProbingState.NOT_ME (handled above) |
|||
return ProbingState.DETECTING |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
# Make the decision: is it Logical or Visual? |
|||
# If the final letter score distance is dominant enough, rely on it. |
|||
finalsub = self._final_char_logical_score - self._final_char_visual_score |
|||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE: |
|||
return self.LOGICAL_HEBREW_NAME |
|||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE: |
|||
return self.VISUAL_HEBREW_NAME |
|||
|
|||
# It's not dominant enough, try to rely on the model scores instead. |
|||
modelsub = (self._logical_prober.get_confidence() |
|||
- self._visual_prober.get_confidence()) |
|||
if modelsub > self.MIN_MODEL_DISTANCE: |
|||
return self.LOGICAL_HEBREW_NAME |
|||
if modelsub < -self.MIN_MODEL_DISTANCE: |
|||
return self.VISUAL_HEBREW_NAME |
|||
|
|||
# Still no good, back to final letter distance, maybe it'll save the |
|||
# day. |
|||
if finalsub < 0.0: |
|||
return self.VISUAL_HEBREW_NAME |
|||
|
|||
# (finalsub > 0 - Logical) or (don't know what to do) default to |
|||
# Logical. |
|||
return self.LOGICAL_HEBREW_NAME |
|||
|
|||
@property |
|||
def language(self): |
|||
return 'Hebrew' |
|||
|
|||
@property |
|||
def state(self): |
|||
# Remain active as long as any of the model probers are active. |
|||
if (self._logical_prober.state == ProbingState.NOT_ME) and \ |
|||
(self._visual_prober.state == ProbingState.NOT_ME): |
|||
return ProbingState.NOT_ME |
|||
return ProbingState.DETECTING |
@ -0,0 +1,325 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# Sampling from about 20M text materials include literature and computer technology |
|||
# |
|||
# Japanese frequency table, applied to both S-JIS and EUC-JP |
|||
# They are sorted in order. |
|||
|
|||
# 128 --> 0.77094 |
|||
# 256 --> 0.85710 |
|||
# 512 --> 0.92635 |
|||
# 1024 --> 0.97130 |
|||
# 2048 --> 0.99431 |
|||
# |
|||
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 |
|||
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 |
|||
# |
|||
# Typical Distribution Ratio, 25% of IDR |
|||
|
|||
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 |
|||
|
|||
# Char to FreqOrder table , |
|||
JIS_TABLE_SIZE = 4368 |
|||
|
|||
JIS_CHAR_TO_FREQ_ORDER = ( |
|||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 |
|||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 |
|||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 |
|||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 |
|||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 |
|||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 |
|||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 |
|||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 |
|||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 |
|||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 |
|||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 |
|||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 |
|||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 |
|||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 |
|||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 |
|||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 |
|||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 |
|||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 |
|||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 |
|||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 |
|||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 |
|||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 |
|||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 |
|||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 |
|||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 |
|||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 |
|||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 |
|||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 |
|||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 |
|||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 |
|||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 |
|||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 |
|||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 |
|||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 |
|||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 |
|||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 |
|||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 |
|||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 |
|||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 |
|||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 |
|||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 |
|||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 |
|||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 |
|||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 |
|||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 |
|||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 |
|||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 |
|||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 |
|||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 |
|||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 |
|||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 |
|||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 |
|||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 |
|||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 |
|||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 |
|||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 |
|||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 |
|||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 |
|||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 |
|||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 |
|||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 |
|||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 |
|||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 |
|||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 |
|||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 |
|||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 |
|||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 |
|||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 |
|||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 |
|||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 |
|||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 |
|||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 |
|||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 |
|||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 |
|||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 |
|||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 |
|||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 |
|||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 |
|||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 |
|||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 |
|||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 |
|||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 |
|||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 |
|||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 |
|||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 |
|||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 |
|||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 |
|||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 |
|||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 |
|||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 |
|||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 |
|||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 |
|||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 |
|||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 |
|||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 |
|||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 |
|||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 |
|||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 |
|||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 |
|||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 |
|||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 |
|||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 |
|||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 |
|||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 |
|||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 |
|||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 |
|||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 |
|||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 |
|||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 |
|||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 |
|||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 |
|||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 |
|||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 |
|||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 |
|||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 |
|||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 |
|||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 |
|||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 |
|||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 |
|||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 |
|||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 |
|||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 |
|||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 |
|||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 |
|||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 |
|||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 |
|||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 |
|||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 |
|||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 |
|||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 |
|||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 |
|||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 |
|||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 |
|||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 |
|||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 |
|||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 |
|||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 |
|||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 |
|||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 |
|||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 |
|||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 |
|||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 |
|||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 |
|||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 |
|||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 |
|||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 |
|||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 |
|||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 |
|||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 |
|||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 |
|||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 |
|||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 |
|||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 |
|||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 |
|||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 |
|||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 |
|||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 |
|||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 |
|||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 |
|||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 |
|||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 |
|||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 |
|||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 |
|||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 |
|||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 |
|||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 |
|||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 |
|||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 |
|||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 |
|||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 |
|||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 |
|||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 |
|||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 |
|||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 |
|||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 |
|||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 |
|||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 |
|||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 |
|||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 |
|||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 |
|||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 |
|||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 |
|||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 |
|||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 |
|||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 |
|||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 |
|||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 |
|||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 |
|||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 |
|||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 |
|||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 |
|||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 |
|||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 |
|||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 |
|||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 |
|||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 |
|||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 |
|||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 |
|||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 |
|||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 |
|||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 |
|||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 |
|||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 |
|||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 |
|||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 |
|||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 |
|||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 |
|||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 |
|||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 |
|||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 |
|||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 |
|||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 |
|||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 |
|||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 |
|||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 |
|||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 |
|||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 |
|||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 |
|||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 |
|||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 |
|||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 |
|||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 |
|||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 |
|||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 |
|||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 |
|||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 |
|||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 |
|||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 |
|||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 |
|||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 |
|||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 |
|||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 |
|||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 |
|||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 |
|||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 |
|||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 |
|||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 |
|||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 |
|||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 |
|||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 |
|||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 |
|||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 |
|||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 |
|||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 |
|||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 |
|||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 |
|||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 |
|||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 |
|||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 |
|||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 |
|||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 |
|||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 |
|||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 |
|||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 |
|||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 |
|||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 |
|||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 |
|||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 |
|||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 |
|||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 |
|||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 |
|||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 |
|||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 |
|||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 |
|||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 |
|||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 |
|||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 |
|||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 |
|||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 |
|||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 |
|||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 |
|||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 |
|||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 |
|||
) |
|||
|
|||
|
@ -0,0 +1,233 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
|
|||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category |
|||
jp2CharContext = ( |
|||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), |
|||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), |
|||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), |
|||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), |
|||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), |
|||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), |
|||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), |
|||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), |
|||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), |
|||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), |
|||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), |
|||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), |
|||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), |
|||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), |
|||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), |
|||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), |
|||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), |
|||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), |
|||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), |
|||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), |
|||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), |
|||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), |
|||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), |
|||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), |
|||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), |
|||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), |
|||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), |
|||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), |
|||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), |
|||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), |
|||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), |
|||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), |
|||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), |
|||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), |
|||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), |
|||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), |
|||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), |
|||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), |
|||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), |
|||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), |
|||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), |
|||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), |
|||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), |
|||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), |
|||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), |
|||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), |
|||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), |
|||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), |
|||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), |
|||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), |
|||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), |
|||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), |
|||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), |
|||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), |
|||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), |
|||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), |
|||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), |
|||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), |
|||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), |
|||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), |
|||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), |
|||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), |
|||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), |
|||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), |
|||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), |
|||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), |
|||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), |
|||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), |
|||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), |
|||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), |
|||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), |
|||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), |
|||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), |
|||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), |
|||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), |
|||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), |
|||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), |
|||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), |
|||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), |
|||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), |
|||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), |
|||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), |
|||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), |
|||
) |
|||
|
|||
class JapaneseContextAnalysis(object): |
|||
NUM_OF_CATEGORY = 6 |
|||
DONT_KNOW = -1 |
|||
ENOUGH_REL_THRESHOLD = 100 |
|||
MAX_REL_THRESHOLD = 1000 |
|||
MINIMUM_DATA_THRESHOLD = 4 |
|||
|
|||
def __init__(self): |
|||
self._total_rel = None |
|||
self._rel_sample = None |
|||
self._need_to_skip_char_num = None |
|||
self._last_char_order = None |
|||
self._done = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
self._total_rel = 0 # total sequence received |
|||
# category counters, each integer counts sequence in its category |
|||
self._rel_sample = [0] * self.NUM_OF_CATEGORY |
|||
# if last byte in current buffer is not the last byte of a character, |
|||
# we need to know how many bytes to skip in next buffer |
|||
self._need_to_skip_char_num = 0 |
|||
self._last_char_order = -1 # The order of previous char |
|||
# If this flag is set to True, detection is done and conclusion has |
|||
# been made |
|||
self._done = False |
|||
|
|||
def feed(self, byte_str, num_bytes): |
|||
if self._done: |
|||
return |
|||
|
|||
# The buffer we got is byte oriented, and a character may span in more than one |
|||
# buffers. In case the last one or two byte in last buffer is not |
|||
# complete, we record how many byte needed to complete that character |
|||
# and skip these bytes here. We can choose to record those bytes as |
|||
# well and analyse the character once it is complete, but since a |
|||
# character will not make much difference, by simply skipping |
|||
# this character will simply our logic and improve performance. |
|||
i = self._need_to_skip_char_num |
|||
while i < num_bytes: |
|||
order, char_len = self.get_order(byte_str[i:i + 2]) |
|||
i += char_len |
|||
if i > num_bytes: |
|||
self._need_to_skip_char_num = i - num_bytes |
|||
self._last_char_order = -1 |
|||
else: |
|||
if (order != -1) and (self._last_char_order != -1): |
|||
self._total_rel += 1 |
|||
if self._total_rel > self.MAX_REL_THRESHOLD: |
|||
self._done = True |
|||
break |
|||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 |
|||
self._last_char_order = order |
|||
|
|||
def got_enough_data(self): |
|||
return self._total_rel > self.ENOUGH_REL_THRESHOLD |
|||
|
|||
def get_confidence(self): |
|||
# This is just one way to calculate confidence. It works well for me. |
|||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD: |
|||
return (self._total_rel - self._rel_sample[0]) / self._total_rel |
|||
else: |
|||
return self.DONT_KNOW |
|||
|
|||
def get_order(self, byte_str): |
|||
return -1, 1 |
|||
|
|||
class SJISContextAnalysis(JapaneseContextAnalysis): |
|||
def __init__(self): |
|||
super(SJISContextAnalysis, self).__init__() |
|||
self._charset_name = "SHIFT_JIS" |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return self._charset_name |
|||
|
|||
def get_order(self, byte_str): |
|||
if not byte_str: |
|||
return -1, 1 |
|||
# find out current char's byte length |
|||
first_char = byte_str[0] |
|||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC): |
|||
char_len = 2 |
|||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): |
|||
self._charset_name = "CP932" |
|||
else: |
|||
char_len = 1 |
|||
|
|||
# return its order if it is hiragana |
|||
if len(byte_str) > 1: |
|||
second_char = byte_str[1] |
|||
if (first_char == 202) and (0x9F <= second_char <= 0xF1): |
|||
return second_char - 0x9F, char_len |
|||
|
|||
return -1, char_len |
|||
|
|||
class EUCJPContextAnalysis(JapaneseContextAnalysis): |
|||
def get_order(self, byte_str): |
|||
if not byte_str: |
|||
return -1, 1 |
|||
# find out current char's byte length |
|||
first_char = byte_str[0] |
|||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): |
|||
char_len = 2 |
|||
elif first_char == 0x8F: |
|||
char_len = 3 |
|||
else: |
|||
char_len = 1 |
|||
|
|||
# return its order if it is hiragana |
|||
if len(byte_str) > 1: |
|||
second_char = byte_str[1] |
|||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): |
|||
return second_char - 0xA1, char_len |
|||
|
|||
return -1, char_len |
|||
|
|||
|
@ -0,0 +1,228 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# Character Mapping Table: |
|||
# this table is modified base on win1251BulgarianCharToOrderMap, so |
|||
# only number <64 is sure valid |
|||
|
|||
Latin5_BulgarianCharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 |
|||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 |
|||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 |
|||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 |
|||
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 |
|||
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 |
|||
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 |
|||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 |
|||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 |
|||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 |
|||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 |
|||
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 |
|||
) |
|||
|
|||
win1251BulgarianCharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 |
|||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 |
|||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 |
|||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 |
|||
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 |
|||
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 |
|||
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 |
|||
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 |
|||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 |
|||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 |
|||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 |
|||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 96.9392% |
|||
# first 1024 sequences:3.0618% |
|||
# rest sequences: 0.2992% |
|||
# negative sequences: 0.0020% |
|||
BulgarianLangModel = ( |
|||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, |
|||
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, |
|||
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, |
|||
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, |
|||
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, |
|||
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, |
|||
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, |
|||
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, |
|||
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, |
|||
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, |
|||
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, |
|||
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, |
|||
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, |
|||
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, |
|||
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, |
|||
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, |
|||
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, |
|||
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, |
|||
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, |
|||
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, |
|||
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, |
|||
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, |
|||
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, |
|||
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, |
|||
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, |
|||
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, |
|||
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, |
|||
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, |
|||
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, |
|||
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, |
|||
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, |
|||
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, |
|||
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, |
|||
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, |
|||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, |
|||
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, |
|||
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, |
|||
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, |
|||
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, |
|||
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, |
|||
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, |
|||
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, |
|||
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, |
|||
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, |
|||
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, |
|||
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, |
|||
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, |
|||
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, |
|||
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, |
|||
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
) |
|||
|
|||
Latin5BulgarianModel = { |
|||
'char_to_order_map': Latin5_BulgarianCharToOrderMap, |
|||
'precedence_matrix': BulgarianLangModel, |
|||
'typical_positive_ratio': 0.969392, |
|||
'keep_english_letter': False, |
|||
'charset_name': "ISO-8859-5", |
|||
'language': 'Bulgairan', |
|||
} |
|||
|
|||
Win1251BulgarianModel = { |
|||
'char_to_order_map': win1251BulgarianCharToOrderMap, |
|||
'precedence_matrix': BulgarianLangModel, |
|||
'typical_positive_ratio': 0.969392, |
|||
'keep_english_letter': False, |
|||
'charset_name': "windows-1251", |
|||
'language': 'Bulgarian', |
|||
} |
@ -0,0 +1,333 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# KOI8-R language model |
|||
# Character Mapping Table: |
|||
KOI8R_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 |
|||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 |
|||
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 |
|||
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 |
|||
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 |
|||
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 |
|||
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 |
|||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 |
|||
) |
|||
|
|||
win1251_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, |
|||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, |
|||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, |
|||
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, |
|||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, |
|||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, |
|||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, |
|||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, |
|||
) |
|||
|
|||
latin5_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, |
|||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, |
|||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, |
|||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, |
|||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, |
|||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, |
|||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, |
|||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, |
|||
) |
|||
|
|||
macCyrillic_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, |
|||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, |
|||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, |
|||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, |
|||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, |
|||
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, |
|||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, |
|||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, |
|||
) |
|||
|
|||
IBM855_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, |
|||
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, |
|||
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, |
|||
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, |
|||
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, |
|||
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, |
|||
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, |
|||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, |
|||
) |
|||
|
|||
IBM866_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 |
|||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 |
|||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 |
|||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 |
|||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, |
|||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, |
|||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, |
|||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, |
|||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, |
|||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, |
|||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, |
|||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 97.6601% |
|||
# first 1024 sequences: 2.3389% |
|||
# rest sequences: 0.1237% |
|||
# negative sequences: 0.0009% |
|||
RussianLangModel = ( |
|||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, |
|||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, |
|||
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, |
|||
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, |
|||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, |
|||
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, |
|||
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, |
|||
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, |
|||
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, |
|||
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, |
|||
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, |
|||
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, |
|||
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, |
|||
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, |
|||
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, |
|||
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, |
|||
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, |
|||
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, |
|||
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, |
|||
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, |
|||
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, |
|||
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, |
|||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, |
|||
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, |
|||
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, |
|||
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, |
|||
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, |
|||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, |
|||
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, |
|||
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, |
|||
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, |
|||
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, |
|||
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, |
|||
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, |
|||
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, |
|||
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, |
|||
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, |
|||
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, |
|||
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, |
|||
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, |
|||
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, |
|||
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, |
|||
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, |
|||
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, |
|||
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, |
|||
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, |
|||
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, |
|||
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, |
|||
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, |
|||
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, |
|||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, |
|||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, |
|||
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, |
|||
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, |
|||
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, |
|||
) |
|||
|
|||
Koi8rModel = { |
|||
'char_to_order_map': KOI8R_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "KOI8-R", |
|||
'language': 'Russian', |
|||
} |
|||
|
|||
Win1251CyrillicModel = { |
|||
'char_to_order_map': win1251_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "windows-1251", |
|||
'language': 'Russian', |
|||
} |
|||
|
|||
Latin5CyrillicModel = { |
|||
'char_to_order_map': latin5_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "ISO-8859-5", |
|||
'language': 'Russian', |
|||
} |
|||
|
|||
MacCyrillicModel = { |
|||
'char_to_order_map': macCyrillic_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "MacCyrillic", |
|||
'language': 'Russian', |
|||
} |
|||
|
|||
Ibm866Model = { |
|||
'char_to_order_map': IBM866_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "IBM866", |
|||
'language': 'Russian', |
|||
} |
|||
|
|||
Ibm855Model = { |
|||
'char_to_order_map': IBM855_char_to_order_map, |
|||
'precedence_matrix': RussianLangModel, |
|||
'typical_positive_ratio': 0.976601, |
|||
'keep_english_letter': False, |
|||
'charset_name': "IBM855", |
|||
'language': 'Russian', |
|||
} |
@ -0,0 +1,225 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# Character Mapping Table: |
|||
Latin7_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 |
|||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 |
|||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 |
|||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 |
|||
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 |
|||
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 |
|||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 |
|||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 |
|||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 |
|||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 |
|||
) |
|||
|
|||
win1253_char_to_order_map = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 |
|||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 |
|||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 |
|||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 |
|||
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 |
|||
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 |
|||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 |
|||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 |
|||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 |
|||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 98.2851% |
|||
# first 1024 sequences:1.7001% |
|||
# rest sequences: 0.0359% |
|||
# negative sequences: 0.0148% |
|||
GreekLangModel = ( |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, |
|||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, |
|||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, |
|||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, |
|||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, |
|||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, |
|||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, |
|||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, |
|||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, |
|||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, |
|||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, |
|||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, |
|||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, |
|||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, |
|||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, |
|||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, |
|||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, |
|||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, |
|||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, |
|||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, |
|||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, |
|||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, |
|||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, |
|||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, |
|||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, |
|||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, |
|||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, |
|||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, |
|||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, |
|||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, |
|||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, |
|||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, |
|||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, |
|||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, |
|||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, |
|||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, |
|||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, |
|||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, |
|||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, |
|||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, |
|||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, |
|||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, |
|||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
) |
|||
|
|||
Latin7GreekModel = { |
|||
'char_to_order_map': Latin7_char_to_order_map, |
|||
'precedence_matrix': GreekLangModel, |
|||
'typical_positive_ratio': 0.982851, |
|||
'keep_english_letter': False, |
|||
'charset_name': "ISO-8859-7", |
|||
'language': 'Greek', |
|||
} |
|||
|
|||
Win1253GreekModel = { |
|||
'char_to_order_map': win1253_char_to_order_map, |
|||
'precedence_matrix': GreekLangModel, |
|||
'typical_positive_ratio': 0.982851, |
|||
'keep_english_letter': False, |
|||
'charset_name': "windows-1253", |
|||
'language': 'Greek', |
|||
} |
@ -0,0 +1,200 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Simon Montagu |
|||
# Portions created by the Initial Developer are Copyright (C) 2005 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# Shoshannah Forbes - original C code (?) |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# Windows-1255 language model |
|||
# Character Mapping Table: |
|||
WIN1255_CHAR_TO_ORDER_MAP = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 |
|||
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 |
|||
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 |
|||
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 |
|||
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, |
|||
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, |
|||
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, |
|||
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, |
|||
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, |
|||
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, |
|||
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, |
|||
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 98.4004% |
|||
# first 1024 sequences: 1.5981% |
|||
# rest sequences: 0.087% |
|||
# negative sequences: 0.0015% |
|||
HEBREW_LANG_MODEL = ( |
|||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, |
|||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, |
|||
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, |
|||
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, |
|||
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, |
|||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, |
|||
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, |
|||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, |
|||
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, |
|||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, |
|||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, |
|||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, |
|||
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, |
|||
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, |
|||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, |
|||
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, |
|||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, |
|||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, |
|||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, |
|||
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, |
|||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, |
|||
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, |
|||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, |
|||
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, |
|||
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, |
|||
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, |
|||
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, |
|||
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, |
|||
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, |
|||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, |
|||
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, |
|||
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, |
|||
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, |
|||
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, |
|||
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, |
|||
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, |
|||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, |
|||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, |
|||
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, |
|||
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, |
|||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, |
|||
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, |
|||
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, |
|||
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, |
|||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, |
|||
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, |
|||
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, |
|||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, |
|||
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, |
|||
) |
|||
|
|||
Win1255HebrewModel = { |
|||
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP, |
|||
'precedence_matrix': HEBREW_LANG_MODEL, |
|||
'typical_positive_ratio': 0.984004, |
|||
'keep_english_letter': False, |
|||
'charset_name': "windows-1255", |
|||
'language': 'Hebrew', |
|||
} |
@ -0,0 +1,225 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# Character Mapping Table: |
|||
Latin2_HungarianCharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, |
|||
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, |
|||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, |
|||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, |
|||
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, |
|||
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, |
|||
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, |
|||
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, |
|||
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, |
|||
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, |
|||
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, |
|||
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, |
|||
) |
|||
|
|||
win1250HungarianCharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, |
|||
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, |
|||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, |
|||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, |
|||
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, |
|||
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, |
|||
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, |
|||
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, |
|||
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, |
|||
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, |
|||
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, |
|||
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 94.7368% |
|||
# first 1024 sequences:5.2623% |
|||
# rest sequences: 0.8894% |
|||
# negative sequences: 0.0009% |
|||
HungarianLangModel = ( |
|||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, |
|||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, |
|||
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, |
|||
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, |
|||
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, |
|||
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, |
|||
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, |
|||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, |
|||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, |
|||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, |
|||
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, |
|||
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, |
|||
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, |
|||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, |
|||
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, |
|||
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, |
|||
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, |
|||
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, |
|||
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, |
|||
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, |
|||
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, |
|||
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, |
|||
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, |
|||
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, |
|||
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, |
|||
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, |
|||
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, |
|||
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, |
|||
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, |
|||
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, |
|||
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, |
|||
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, |
|||
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, |
|||
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, |
|||
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, |
|||
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, |
|||
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, |
|||
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, |
|||
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, |
|||
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, |
|||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, |
|||
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, |
|||
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, |
|||
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, |
|||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, |
|||
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, |
|||
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, |
|||
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, |
|||
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, |
|||
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, |
|||
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, |
|||
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, |
|||
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, |
|||
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, |
|||
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, |
|||
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, |
|||
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, |
|||
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, |
|||
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, |
|||
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, |
|||
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, |
|||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, |
|||
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, |
|||
) |
|||
|
|||
Latin2HungarianModel = { |
|||
'char_to_order_map': Latin2_HungarianCharToOrderMap, |
|||
'precedence_matrix': HungarianLangModel, |
|||
'typical_positive_ratio': 0.947368, |
|||
'keep_english_letter': True, |
|||
'charset_name': "ISO-8859-2", |
|||
'language': 'Hungarian', |
|||
} |
|||
|
|||
Win1250HungarianModel = { |
|||
'char_to_order_map': win1250HungarianCharToOrderMap, |
|||
'precedence_matrix': HungarianLangModel, |
|||
'typical_positive_ratio': 0.947368, |
|||
'keep_english_letter': True, |
|||
'charset_name': "windows-1250", |
|||
'language': 'Hungarian', |
|||
} |
@ -0,0 +1,199 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# The following result for thai was collected from a limited sample (1M). |
|||
|
|||
# Character Mapping Table: |
|||
TIS620CharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 |
|||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 |
|||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 |
|||
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 |
|||
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 |
|||
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 |
|||
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 |
|||
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, |
|||
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, |
|||
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, |
|||
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, |
|||
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, |
|||
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, |
|||
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, |
|||
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, |
|||
) |
|||
|
|||
# Model Table: |
|||
# total sequences: 100% |
|||
# first 512 sequences: 92.6386% |
|||
# first 1024 sequences:7.3177% |
|||
# rest sequences: 1.0230% |
|||
# negative sequences: 0.0436% |
|||
ThaiLangModel = ( |
|||
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, |
|||
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, |
|||
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, |
|||
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, |
|||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, |
|||
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, |
|||
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, |
|||
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, |
|||
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, |
|||
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, |
|||
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, |
|||
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, |
|||
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, |
|||
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, |
|||
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, |
|||
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, |
|||
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, |
|||
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, |
|||
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, |
|||
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, |
|||
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, |
|||
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, |
|||
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, |
|||
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, |
|||
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, |
|||
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, |
|||
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, |
|||
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, |
|||
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, |
|||
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, |
|||
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, |
|||
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, |
|||
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, |
|||
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, |
|||
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, |
|||
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, |
|||
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, |
|||
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, |
|||
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, |
|||
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, |
|||
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, |
|||
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, |
|||
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, |
|||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, |
|||
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, |
|||
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, |
|||
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, |
|||
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, |
|||
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, |
|||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, |
|||
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, |
|||
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, |
|||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, |
|||
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, |
|||
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, |
|||
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, |
|||
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, |
|||
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, |
|||
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, |
|||
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, |
|||
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, |
|||
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, |
|||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, |
|||
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, |
|||
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, |
|||
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, |
|||
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
) |
|||
|
|||
TIS620ThaiModel = { |
|||
'char_to_order_map': TIS620CharToOrderMap, |
|||
'precedence_matrix': ThaiLangModel, |
|||
'typical_positive_ratio': 0.926386, |
|||
'keep_english_letter': False, |
|||
'charset_name': "TIS-620", |
|||
'language': 'Thai', |
|||
} |
@ -0,0 +1,193 @@ |
|||
# -*- coding: utf-8 -*- |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Communicator client code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Özgür Baskın - Turkish Language Model |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
# 255: Control characters that usually does not exist in any text |
|||
# 254: Carriage/Return |
|||
# 253: symbol (punctuation) that does not belong to word |
|||
# 252: 0 - 9 |
|||
|
|||
# Character Mapping Table: |
|||
Latin5_TurkishCharToOrderMap = ( |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
|||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
|||
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42, |
|||
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255, |
|||
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15, |
|||
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255, |
|||
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165, |
|||
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106, |
|||
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136, |
|||
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125, |
|||
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119, |
|||
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86, |
|||
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96, |
|||
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107, |
|||
) |
|||
|
|||
TurkishLangModel = ( |
|||
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3, |
|||
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, |
|||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3, |
|||
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1, |
|||
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3, |
|||
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1, |
|||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2, |
|||
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1, |
|||
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2, |
|||
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0, |
|||
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1, |
|||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2, |
|||
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1, |
|||
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2, |
|||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1, |
|||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2, |
|||
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, |
|||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2, |
|||
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, |
|||
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3, |
|||
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, |
|||
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1, |
|||
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0, |
|||
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1, |
|||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1, |
|||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3, |
|||
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3, |
|||
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, |
|||
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0, |
|||
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0, |
|||
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2, |
|||
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1, |
|||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0, |
|||
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2, |
|||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0, |
|||
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0, |
|||
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1, |
|||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, |
|||
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1, |
|||
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0, |
|||
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3, |
|||
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1, |
|||
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, |
|||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1, |
|||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, |
|||
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0, |
|||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0, |
|||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, |
|||
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1, |
|||
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, |
|||
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2, |
|||
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1, |
|||
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1, |
|||
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2, |
|||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0, |
|||
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0, |
|||
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2, |
|||
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1, |
|||
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0, |
|||
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0, |
|||
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0, |
|||
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0, |
|||
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0, |
|||
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0, |
|||
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0, |
|||
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0, |
|||
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1, |
|||
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, |
|||
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1, |
|||
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0, |
|||
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, |
|||
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0, |
|||
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0, |
|||
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0, |
|||
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0, |
|||
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0, |
|||
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0, |
|||
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0, |
|||
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0, |
|||
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, |
|||
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0, |
|||
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0, |
|||
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, |
|||
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0, |
|||
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0, |
|||
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1, |
|||
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0, |
|||
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0, |
|||
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0, |
|||
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0, |
|||
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0, |
|||
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, |
|||
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1, |
|||
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, |
|||
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|||
) |
|||
|
|||
Latin5TurkishModel = { |
|||
'char_to_order_map': Latin5_TurkishCharToOrderMap, |
|||
'precedence_matrix': TurkishLangModel, |
|||
'typical_positive_ratio': 0.970290, |
|||
'keep_english_letter': True, |
|||
'charset_name': "ISO-8859-9", |
|||
'language': 'Turkish', |
|||
} |
@ -0,0 +1,145 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .enums import ProbingState |
|||
|
|||
FREQ_CAT_NUM = 4 |
|||
|
|||
UDF = 0 # undefined |
|||
OTH = 1 # other |
|||
ASC = 2 # ascii capital letter |
|||
ASS = 3 # ascii small letter |
|||
ACV = 4 # accent capital vowel |
|||
ACO = 5 # accent capital other |
|||
ASV = 6 # accent small vowel |
|||
ASO = 7 # accent small other |
|||
CLASS_NUM = 8 # total classes |
|||
|
|||
Latin1_CharToClass = ( |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F |
|||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 |
|||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F |
|||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 |
|||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F |
|||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 |
|||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F |
|||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 |
|||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F |
|||
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 |
|||
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F |
|||
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 |
|||
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 |
|||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF |
|||
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 |
|||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF |
|||
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 |
|||
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF |
|||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 |
|||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF |
|||
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 |
|||
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF |
|||
) |
|||
|
|||
# 0 : illegal |
|||
# 1 : very unlikely |
|||
# 2 : normal |
|||
# 3 : very likely |
|||
Latin1ClassModel = ( |
|||
# UDF OTH ASC ASS ACV ACO ASV ASO |
|||
0, 0, 0, 0, 0, 0, 0, 0, # UDF |
|||
0, 3, 3, 3, 3, 3, 3, 3, # OTH |
|||
0, 3, 3, 3, 3, 3, 3, 3, # ASC |
|||
0, 3, 3, 3, 1, 1, 3, 3, # ASS |
|||
0, 3, 3, 3, 1, 2, 1, 2, # ACV |
|||
0, 3, 3, 3, 3, 3, 3, 3, # ACO |
|||
0, 3, 1, 3, 1, 1, 1, 3, # ASV |
|||
0, 3, 1, 3, 1, 1, 3, 3, # ASO |
|||
) |
|||
|
|||
|
|||
class Latin1Prober(CharSetProber): |
|||
def __init__(self): |
|||
super(Latin1Prober, self).__init__() |
|||
self._last_char_class = None |
|||
self._freq_counter = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
self._last_char_class = OTH |
|||
self._freq_counter = [0] * FREQ_CAT_NUM |
|||
CharSetProber.reset(self) |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "ISO-8859-1" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "" |
|||
|
|||
def feed(self, byte_str): |
|||
byte_str = self.filter_with_english_letters(byte_str) |
|||
for c in byte_str: |
|||
char_class = Latin1_CharToClass[c] |
|||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) |
|||
+ char_class] |
|||
if freq == 0: |
|||
self._state = ProbingState.NOT_ME |
|||
break |
|||
self._freq_counter[freq] += 1 |
|||
self._last_char_class = char_class |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
if self.state == ProbingState.NOT_ME: |
|||
return 0.01 |
|||
|
|||
total = sum(self._freq_counter) |
|||
if total < 0.01: |
|||
confidence = 0.0 |
|||
else: |
|||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) |
|||
/ total) |
|||
if confidence < 0.0: |
|||
confidence = 0.0 |
|||
# lower the confidence of latin1 so that other more accurate |
|||
# detector can take priority. |
|||
confidence = confidence * 0.73 |
|||
return confidence |
@ -0,0 +1,91 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# Proofpoint, Inc. |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .enums import ProbingState, MachineState |
|||
|
|||
|
|||
class MultiByteCharSetProber(CharSetProber): |
|||
""" |
|||
MultiByteCharSetProber |
|||
""" |
|||
|
|||
def __init__(self, lang_filter=None): |
|||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) |
|||
self.distribution_analyzer = None |
|||
self.coding_sm = None |
|||
self._last_char = [0, 0] |
|||
|
|||
def reset(self): |
|||
super(MultiByteCharSetProber, self).reset() |
|||
if self.coding_sm: |
|||
self.coding_sm.reset() |
|||
if self.distribution_analyzer: |
|||
self.distribution_analyzer.reset() |
|||
self._last_char = [0, 0] |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
raise NotImplementedError |
|||
|
|||
@property |
|||
def language(self): |
|||
raise NotImplementedError |
|||
|
|||
def feed(self, byte_str): |
|||
for i in range(len(byte_str)): |
|||
coding_state = self.coding_sm.next_state(byte_str[i]) |
|||
if coding_state == MachineState.ERROR: |
|||
self.logger.debug('%s %s prober hit error at byte %s', |
|||
self.charset_name, self.language, i) |
|||
self._state = ProbingState.NOT_ME |
|||
break |
|||
elif coding_state == MachineState.ITS_ME: |
|||
self._state = ProbingState.FOUND_IT |
|||
break |
|||
elif coding_state == MachineState.START: |
|||
char_len = self.coding_sm.get_current_charlen() |
|||
if i == 0: |
|||
self._last_char[1] = byte_str[0] |
|||
self.distribution_analyzer.feed(self._last_char, char_len) |
|||
else: |
|||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1], |
|||
char_len) |
|||
|
|||
self._last_char[0] = byte_str[-1] |
|||
|
|||
if self.state == ProbingState.DETECTING: |
|||
if (self.distribution_analyzer.got_enough_data() and |
|||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)): |
|||
self._state = ProbingState.FOUND_IT |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
return self.distribution_analyzer.get_confidence() |
@ -0,0 +1,54 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# Proofpoint, Inc. |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetgroupprober import CharSetGroupProber |
|||
from .utf8prober import UTF8Prober |
|||
from .sjisprober import SJISProber |
|||
from .eucjpprober import EUCJPProber |
|||
from .gb2312prober import GB2312Prober |
|||
from .euckrprober import EUCKRProber |
|||
from .cp949prober import CP949Prober |
|||
from .big5prober import Big5Prober |
|||
from .euctwprober import EUCTWProber |
|||
|
|||
|
|||
class MBCSGroupProber(CharSetGroupProber): |
|||
def __init__(self, lang_filter=None): |
|||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) |
|||
self.probers = [ |
|||
UTF8Prober(), |
|||
SJISProber(), |
|||
EUCJPProber(), |
|||
GB2312Prober(), |
|||
EUCKRProber(), |
|||
CP949Prober(), |
|||
Big5Prober(), |
|||
EUCTWProber() |
|||
] |
|||
self.reset() |
@ -0,0 +1,572 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .enums import MachineState |
|||
|
|||
# BIG5 |
|||
|
|||
BIG5_CLS = ( |
|||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value |
|||
1,1,1,1,1,1,0,0, # 08 - 0f |
|||
1,1,1,1,1,1,1,1, # 10 - 17 |
|||
1,1,1,0,1,1,1,1, # 18 - 1f |
|||
1,1,1,1,1,1,1,1, # 20 - 27 |
|||
1,1,1,1,1,1,1,1, # 28 - 2f |
|||
1,1,1,1,1,1,1,1, # 30 - 37 |
|||
1,1,1,1,1,1,1,1, # 38 - 3f |
|||
2,2,2,2,2,2,2,2, # 40 - 47 |
|||
2,2,2,2,2,2,2,2, # 48 - 4f |
|||
2,2,2,2,2,2,2,2, # 50 - 57 |
|||
2,2,2,2,2,2,2,2, # 58 - 5f |
|||
2,2,2,2,2,2,2,2, # 60 - 67 |
|||
2,2,2,2,2,2,2,2, # 68 - 6f |
|||
2,2,2,2,2,2,2,2, # 70 - 77 |
|||
2,2,2,2,2,2,2,1, # 78 - 7f |
|||
4,4,4,4,4,4,4,4, # 80 - 87 |
|||
4,4,4,4,4,4,4,4, # 88 - 8f |
|||
4,4,4,4,4,4,4,4, # 90 - 97 |
|||
4,4,4,4,4,4,4,4, # 98 - 9f |
|||
4,3,3,3,3,3,3,3, # a0 - a7 |
|||
3,3,3,3,3,3,3,3, # a8 - af |
|||
3,3,3,3,3,3,3,3, # b0 - b7 |
|||
3,3,3,3,3,3,3,3, # b8 - bf |
|||
3,3,3,3,3,3,3,3, # c0 - c7 |
|||
3,3,3,3,3,3,3,3, # c8 - cf |
|||
3,3,3,3,3,3,3,3, # d0 - d7 |
|||
3,3,3,3,3,3,3,3, # d8 - df |
|||
3,3,3,3,3,3,3,3, # e0 - e7 |
|||
3,3,3,3,3,3,3,3, # e8 - ef |
|||
3,3,3,3,3,3,3,3, # f0 - f7 |
|||
3,3,3,3,3,3,3,0 # f8 - ff |
|||
) |
|||
|
|||
BIG5_ST = ( |
|||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f |
|||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 |
|||
) |
|||
|
|||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) |
|||
|
|||
BIG5_SM_MODEL = {'class_table': BIG5_CLS, |
|||
'class_factor': 5, |
|||
'state_table': BIG5_ST, |
|||
'char_len_table': BIG5_CHAR_LEN_TABLE, |
|||
'name': 'Big5'} |
|||
|
|||
# CP949 |
|||
|
|||
CP949_CLS = ( |
|||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f |
|||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f |
|||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f |
|||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f |
|||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f |
|||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f |
|||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f |
|||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f |
|||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f |
|||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f |
|||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af |
|||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf |
|||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf |
|||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df |
|||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef |
|||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff |
|||
) |
|||
|
|||
CP949_ST = ( |
|||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = |
|||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4 |
|||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 |
|||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 |
|||
) |
|||
|
|||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) |
|||
|
|||
CP949_SM_MODEL = {'class_table': CP949_CLS, |
|||
'class_factor': 10, |
|||
'state_table': CP949_ST, |
|||
'char_len_table': CP949_CHAR_LEN_TABLE, |
|||
'name': 'CP949'} |
|||
|
|||
# EUC-JP |
|||
|
|||
EUCJP_CLS = ( |
|||
4,4,4,4,4,4,4,4, # 00 - 07 |
|||
4,4,4,4,4,4,5,5, # 08 - 0f |
|||
4,4,4,4,4,4,4,4, # 10 - 17 |
|||
4,4,4,5,4,4,4,4, # 18 - 1f |
|||
4,4,4,4,4,4,4,4, # 20 - 27 |
|||
4,4,4,4,4,4,4,4, # 28 - 2f |
|||
4,4,4,4,4,4,4,4, # 30 - 37 |
|||
4,4,4,4,4,4,4,4, # 38 - 3f |
|||
4,4,4,4,4,4,4,4, # 40 - 47 |
|||
4,4,4,4,4,4,4,4, # 48 - 4f |
|||
4,4,4,4,4,4,4,4, # 50 - 57 |
|||
4,4,4,4,4,4,4,4, # 58 - 5f |
|||
4,4,4,4,4,4,4,4, # 60 - 67 |
|||
4,4,4,4,4,4,4,4, # 68 - 6f |
|||
4,4,4,4,4,4,4,4, # 70 - 77 |
|||
4,4,4,4,4,4,4,4, # 78 - 7f |
|||
5,5,5,5,5,5,5,5, # 80 - 87 |
|||
5,5,5,5,5,5,1,3, # 88 - 8f |
|||
5,5,5,5,5,5,5,5, # 90 - 97 |
|||
5,5,5,5,5,5,5,5, # 98 - 9f |
|||
5,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,2,2,2, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,2,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
0,0,0,0,0,0,0,0, # e0 - e7 |
|||
0,0,0,0,0,0,0,0, # e8 - ef |
|||
0,0,0,0,0,0,0,0, # f0 - f7 |
|||
0,0,0,0,0,0,0,5 # f8 - ff |
|||
) |
|||
|
|||
EUCJP_ST = ( |
|||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f |
|||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 |
|||
) |
|||
|
|||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) |
|||
|
|||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, |
|||
'class_factor': 6, |
|||
'state_table': EUCJP_ST, |
|||
'char_len_table': EUCJP_CHAR_LEN_TABLE, |
|||
'name': 'EUC-JP'} |
|||
|
|||
# EUC-KR |
|||
|
|||
EUCKR_CLS = ( |
|||
1,1,1,1,1,1,1,1, # 00 - 07 |
|||
1,1,1,1,1,1,0,0, # 08 - 0f |
|||
1,1,1,1,1,1,1,1, # 10 - 17 |
|||
1,1,1,0,1,1,1,1, # 18 - 1f |
|||
1,1,1,1,1,1,1,1, # 20 - 27 |
|||
1,1,1,1,1,1,1,1, # 28 - 2f |
|||
1,1,1,1,1,1,1,1, # 30 - 37 |
|||
1,1,1,1,1,1,1,1, # 38 - 3f |
|||
1,1,1,1,1,1,1,1, # 40 - 47 |
|||
1,1,1,1,1,1,1,1, # 48 - 4f |
|||
1,1,1,1,1,1,1,1, # 50 - 57 |
|||
1,1,1,1,1,1,1,1, # 58 - 5f |
|||
1,1,1,1,1,1,1,1, # 60 - 67 |
|||
1,1,1,1,1,1,1,1, # 68 - 6f |
|||
1,1,1,1,1,1,1,1, # 70 - 77 |
|||
1,1,1,1,1,1,1,1, # 78 - 7f |
|||
0,0,0,0,0,0,0,0, # 80 - 87 |
|||
0,0,0,0,0,0,0,0, # 88 - 8f |
|||
0,0,0,0,0,0,0,0, # 90 - 97 |
|||
0,0,0,0,0,0,0,0, # 98 - 9f |
|||
0,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,3,3,3, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,3,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
2,2,2,2,2,2,2,2, # e0 - e7 |
|||
2,2,2,2,2,2,2,2, # e8 - ef |
|||
2,2,2,2,2,2,2,2, # f0 - f7 |
|||
2,2,2,2,2,2,2,0 # f8 - ff |
|||
) |
|||
|
|||
EUCKR_ST = ( |
|||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f |
|||
) |
|||
|
|||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) |
|||
|
|||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, |
|||
'class_factor': 4, |
|||
'state_table': EUCKR_ST, |
|||
'char_len_table': EUCKR_CHAR_LEN_TABLE, |
|||
'name': 'EUC-KR'} |
|||
|
|||
# EUC-TW |
|||
|
|||
EUCTW_CLS = ( |
|||
2,2,2,2,2,2,2,2, # 00 - 07 |
|||
2,2,2,2,2,2,0,0, # 08 - 0f |
|||
2,2,2,2,2,2,2,2, # 10 - 17 |
|||
2,2,2,0,2,2,2,2, # 18 - 1f |
|||
2,2,2,2,2,2,2,2, # 20 - 27 |
|||
2,2,2,2,2,2,2,2, # 28 - 2f |
|||
2,2,2,2,2,2,2,2, # 30 - 37 |
|||
2,2,2,2,2,2,2,2, # 38 - 3f |
|||
2,2,2,2,2,2,2,2, # 40 - 47 |
|||
2,2,2,2,2,2,2,2, # 48 - 4f |
|||
2,2,2,2,2,2,2,2, # 50 - 57 |
|||
2,2,2,2,2,2,2,2, # 58 - 5f |
|||
2,2,2,2,2,2,2,2, # 60 - 67 |
|||
2,2,2,2,2,2,2,2, # 68 - 6f |
|||
2,2,2,2,2,2,2,2, # 70 - 77 |
|||
2,2,2,2,2,2,2,2, # 78 - 7f |
|||
0,0,0,0,0,0,0,0, # 80 - 87 |
|||
0,0,0,0,0,0,6,0, # 88 - 8f |
|||
0,0,0,0,0,0,0,0, # 90 - 97 |
|||
0,0,0,0,0,0,0,0, # 98 - 9f |
|||
0,3,4,4,4,4,4,4, # a0 - a7 |
|||
5,5,1,1,1,1,1,1, # a8 - af |
|||
1,1,1,1,1,1,1,1, # b0 - b7 |
|||
1,1,1,1,1,1,1,1, # b8 - bf |
|||
1,1,3,1,3,3,3,3, # c0 - c7 |
|||
3,3,3,3,3,3,3,3, # c8 - cf |
|||
3,3,3,3,3,3,3,3, # d0 - d7 |
|||
3,3,3,3,3,3,3,3, # d8 - df |
|||
3,3,3,3,3,3,3,3, # e0 - e7 |
|||
3,3,3,3,3,3,3,3, # e8 - ef |
|||
3,3,3,3,3,3,3,3, # f0 - f7 |
|||
3,3,3,3,3,3,3,0 # f8 - ff |
|||
) |
|||
|
|||
EUCTW_ST = ( |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17 |
|||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f |
|||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 |
|||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f |
|||
) |
|||
|
|||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) |
|||
|
|||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, |
|||
'class_factor': 7, |
|||
'state_table': EUCTW_ST, |
|||
'char_len_table': EUCTW_CHAR_LEN_TABLE, |
|||
'name': 'x-euc-tw'} |
|||
|
|||
# GB2312 |
|||
|
|||
GB2312_CLS = ( |
|||
1,1,1,1,1,1,1,1, # 00 - 07 |
|||
1,1,1,1,1,1,0,0, # 08 - 0f |
|||
1,1,1,1,1,1,1,1, # 10 - 17 |
|||
1,1,1,0,1,1,1,1, # 18 - 1f |
|||
1,1,1,1,1,1,1,1, # 20 - 27 |
|||
1,1,1,1,1,1,1,1, # 28 - 2f |
|||
3,3,3,3,3,3,3,3, # 30 - 37 |
|||
3,3,1,1,1,1,1,1, # 38 - 3f |
|||
2,2,2,2,2,2,2,2, # 40 - 47 |
|||
2,2,2,2,2,2,2,2, # 48 - 4f |
|||
2,2,2,2,2,2,2,2, # 50 - 57 |
|||
2,2,2,2,2,2,2,2, # 58 - 5f |
|||
2,2,2,2,2,2,2,2, # 60 - 67 |
|||
2,2,2,2,2,2,2,2, # 68 - 6f |
|||
2,2,2,2,2,2,2,2, # 70 - 77 |
|||
2,2,2,2,2,2,2,4, # 78 - 7f |
|||
5,6,6,6,6,6,6,6, # 80 - 87 |
|||
6,6,6,6,6,6,6,6, # 88 - 8f |
|||
6,6,6,6,6,6,6,6, # 90 - 97 |
|||
6,6,6,6,6,6,6,6, # 98 - 9f |
|||
6,6,6,6,6,6,6,6, # a0 - a7 |
|||
6,6,6,6,6,6,6,6, # a8 - af |
|||
6,6,6,6,6,6,6,6, # b0 - b7 |
|||
6,6,6,6,6,6,6,6, # b8 - bf |
|||
6,6,6,6,6,6,6,6, # c0 - c7 |
|||
6,6,6,6,6,6,6,6, # c8 - cf |
|||
6,6,6,6,6,6,6,6, # d0 - d7 |
|||
6,6,6,6,6,6,6,6, # d8 - df |
|||
6,6,6,6,6,6,6,6, # e0 - e7 |
|||
6,6,6,6,6,6,6,6, # e8 - ef |
|||
6,6,6,6,6,6,6,6, # f0 - f7 |
|||
6,6,6,6,6,6,6,0 # f8 - ff |
|||
) |
|||
|
|||
GB2312_ST = ( |
|||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17 |
|||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f |
|||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f |
|||
) |
|||
|
|||
# To be accurate, the length of class 6 can be either 2 or 4. |
|||
# But it is not necessary to discriminate between the two since |
|||
# it is used for frequency analysis only, and we are validating |
|||
# each code range there as well. So it is safe to set it to be |
|||
# 2 here. |
|||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) |
|||
|
|||
GB2312_SM_MODEL = {'class_table': GB2312_CLS, |
|||
'class_factor': 7, |
|||
'state_table': GB2312_ST, |
|||
'char_len_table': GB2312_CHAR_LEN_TABLE, |
|||
'name': 'GB2312'} |
|||
|
|||
# Shift_JIS |
|||
|
|||
SJIS_CLS = ( |
|||
1,1,1,1,1,1,1,1, # 00 - 07 |
|||
1,1,1,1,1,1,0,0, # 08 - 0f |
|||
1,1,1,1,1,1,1,1, # 10 - 17 |
|||
1,1,1,0,1,1,1,1, # 18 - 1f |
|||
1,1,1,1,1,1,1,1, # 20 - 27 |
|||
1,1,1,1,1,1,1,1, # 28 - 2f |
|||
1,1,1,1,1,1,1,1, # 30 - 37 |
|||
1,1,1,1,1,1,1,1, # 38 - 3f |
|||
2,2,2,2,2,2,2,2, # 40 - 47 |
|||
2,2,2,2,2,2,2,2, # 48 - 4f |
|||
2,2,2,2,2,2,2,2, # 50 - 57 |
|||
2,2,2,2,2,2,2,2, # 58 - 5f |
|||
2,2,2,2,2,2,2,2, # 60 - 67 |
|||
2,2,2,2,2,2,2,2, # 68 - 6f |
|||
2,2,2,2,2,2,2,2, # 70 - 77 |
|||
2,2,2,2,2,2,2,1, # 78 - 7f |
|||
3,3,3,3,3,2,2,3, # 80 - 87 |
|||
3,3,3,3,3,3,3,3, # 88 - 8f |
|||
3,3,3,3,3,3,3,3, # 90 - 97 |
|||
3,3,3,3,3,3,3,3, # 98 - 9f |
|||
#0xa0 is illegal in sjis encoding, but some pages does |
|||
#contain such byte. We need to be more error forgiven. |
|||
2,2,2,2,2,2,2,2, # a0 - a7 |
|||
2,2,2,2,2,2,2,2, # a8 - af |
|||
2,2,2,2,2,2,2,2, # b0 - b7 |
|||
2,2,2,2,2,2,2,2, # b8 - bf |
|||
2,2,2,2,2,2,2,2, # c0 - c7 |
|||
2,2,2,2,2,2,2,2, # c8 - cf |
|||
2,2,2,2,2,2,2,2, # d0 - d7 |
|||
2,2,2,2,2,2,2,2, # d8 - df |
|||
3,3,3,3,3,3,3,3, # e0 - e7 |
|||
3,3,3,3,3,4,4,4, # e8 - ef |
|||
3,3,3,3,3,3,3,3, # f0 - f7 |
|||
3,3,3,3,3,0,0,0) # f8 - ff |
|||
|
|||
|
|||
SJIS_ST = ( |
|||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 |
|||
) |
|||
|
|||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) |
|||
|
|||
SJIS_SM_MODEL = {'class_table': SJIS_CLS, |
|||
'class_factor': 6, |
|||
'state_table': SJIS_ST, |
|||
'char_len_table': SJIS_CHAR_LEN_TABLE, |
|||
'name': 'Shift_JIS'} |
|||
|
|||
# UCS2-BE |
|||
|
|||
UCS2BE_CLS = ( |
|||
0,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,1,0,0,2,0,0, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,3,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,0,0,0,0, # 20 - 27 |
|||
0,3,3,3,3,3,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
0,0,0,0,0,0,0,0, # 40 - 47 |
|||
0,0,0,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,0,0,0,0,0, # 78 - 7f |
|||
0,0,0,0,0,0,0,0, # 80 - 87 |
|||
0,0,0,0,0,0,0,0, # 88 - 8f |
|||
0,0,0,0,0,0,0,0, # 90 - 97 |
|||
0,0,0,0,0,0,0,0, # 98 - 9f |
|||
0,0,0,0,0,0,0,0, # a0 - a7 |
|||
0,0,0,0,0,0,0,0, # a8 - af |
|||
0,0,0,0,0,0,0,0, # b0 - b7 |
|||
0,0,0,0,0,0,0,0, # b8 - bf |
|||
0,0,0,0,0,0,0,0, # c0 - c7 |
|||
0,0,0,0,0,0,0,0, # c8 - cf |
|||
0,0,0,0,0,0,0,0, # d0 - d7 |
|||
0,0,0,0,0,0,0,0, # d8 - df |
|||
0,0,0,0,0,0,0,0, # e0 - e7 |
|||
0,0,0,0,0,0,0,0, # e8 - ef |
|||
0,0,0,0,0,0,0,0, # f0 - f7 |
|||
0,0,0,0,0,0,4,5 # f8 - ff |
|||
) |
|||
|
|||
UCS2BE_ST = ( |
|||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17 |
|||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f |
|||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27 |
|||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f |
|||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 |
|||
) |
|||
|
|||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) |
|||
|
|||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, |
|||
'class_factor': 6, |
|||
'state_table': UCS2BE_ST, |
|||
'char_len_table': UCS2BE_CHAR_LEN_TABLE, |
|||
'name': 'UTF-16BE'} |
|||
|
|||
# UCS2-LE |
|||
|
|||
UCS2LE_CLS = ( |
|||
0,0,0,0,0,0,0,0, # 00 - 07 |
|||
0,0,1,0,0,2,0,0, # 08 - 0f |
|||
0,0,0,0,0,0,0,0, # 10 - 17 |
|||
0,0,0,3,0,0,0,0, # 18 - 1f |
|||
0,0,0,0,0,0,0,0, # 20 - 27 |
|||
0,3,3,3,3,3,0,0, # 28 - 2f |
|||
0,0,0,0,0,0,0,0, # 30 - 37 |
|||
0,0,0,0,0,0,0,0, # 38 - 3f |
|||
0,0,0,0,0,0,0,0, # 40 - 47 |
|||
0,0,0,0,0,0,0,0, # 48 - 4f |
|||
0,0,0,0,0,0,0,0, # 50 - 57 |
|||
0,0,0,0,0,0,0,0, # 58 - 5f |
|||
0,0,0,0,0,0,0,0, # 60 - 67 |
|||
0,0,0,0,0,0,0,0, # 68 - 6f |
|||
0,0,0,0,0,0,0,0, # 70 - 77 |
|||
0,0,0,0,0,0,0,0, # 78 - 7f |
|||
0,0,0,0,0,0,0,0, # 80 - 87 |
|||
0,0,0,0,0,0,0,0, # 88 - 8f |
|||
0,0,0,0,0,0,0,0, # 90 - 97 |
|||
0,0,0,0,0,0,0,0, # 98 - 9f |
|||
0,0,0,0,0,0,0,0, # a0 - a7 |
|||
0,0,0,0,0,0,0,0, # a8 - af |
|||
0,0,0,0,0,0,0,0, # b0 - b7 |
|||
0,0,0,0,0,0,0,0, # b8 - bf |
|||
0,0,0,0,0,0,0,0, # c0 - c7 |
|||
0,0,0,0,0,0,0,0, # c8 - cf |
|||
0,0,0,0,0,0,0,0, # d0 - d7 |
|||
0,0,0,0,0,0,0,0, # d8 - df |
|||
0,0,0,0,0,0,0,0, # e0 - e7 |
|||
0,0,0,0,0,0,0,0, # e8 - ef |
|||
0,0,0,0,0,0,0,0, # f0 - f7 |
|||
0,0,0,0,0,0,4,5 # f8 - ff |
|||
) |
|||
|
|||
UCS2LE_ST = ( |
|||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f |
|||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17 |
|||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f |
|||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27 |
|||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f |
|||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 |
|||
) |
|||
|
|||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) |
|||
|
|||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, |
|||
'class_factor': 6, |
|||
'state_table': UCS2LE_ST, |
|||
'char_len_table': UCS2LE_CHAR_LEN_TABLE, |
|||
'name': 'UTF-16LE'} |
|||
|
|||
# UTF-8 |
|||
|
|||
UTF8_CLS = ( |
|||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value |
|||
1,1,1,1,1,1,0,0, # 08 - 0f |
|||
1,1,1,1,1,1,1,1, # 10 - 17 |
|||
1,1,1,0,1,1,1,1, # 18 - 1f |
|||
1,1,1,1,1,1,1,1, # 20 - 27 |
|||
1,1,1,1,1,1,1,1, # 28 - 2f |
|||
1,1,1,1,1,1,1,1, # 30 - 37 |
|||
1,1,1,1,1,1,1,1, # 38 - 3f |
|||
1,1,1,1,1,1,1,1, # 40 - 47 |
|||
1,1,1,1,1,1,1,1, # 48 - 4f |
|||
1,1,1,1,1,1,1,1, # 50 - 57 |
|||
1,1,1,1,1,1,1,1, # 58 - 5f |
|||
1,1,1,1,1,1,1,1, # 60 - 67 |
|||
1,1,1,1,1,1,1,1, # 68 - 6f |
|||
1,1,1,1,1,1,1,1, # 70 - 77 |
|||
1,1,1,1,1,1,1,1, # 78 - 7f |
|||
2,2,2,2,3,3,3,3, # 80 - 87 |
|||
4,4,4,4,4,4,4,4, # 88 - 8f |
|||
4,4,4,4,4,4,4,4, # 90 - 97 |
|||
4,4,4,4,4,4,4,4, # 98 - 9f |
|||
5,5,5,5,5,5,5,5, # a0 - a7 |
|||
5,5,5,5,5,5,5,5, # a8 - af |
|||
5,5,5,5,5,5,5,5, # b0 - b7 |
|||
5,5,5,5,5,5,5,5, # b8 - bf |
|||
0,0,6,6,6,6,6,6, # c0 - c7 |
|||
6,6,6,6,6,6,6,6, # c8 - cf |
|||
6,6,6,6,6,6,6,6, # d0 - d7 |
|||
6,6,6,6,6,6,6,6, # d8 - df |
|||
7,8,8,8,8,8,8,8, # e0 - e7 |
|||
8,8,8,8,8,9,8,8, # e8 - ef |
|||
10,11,11,11,11,11,11,11, # f0 - f7 |
|||
12,13,13,13,14,15,0,0 # f8 - ff |
|||
) |
|||
|
|||
UTF8_ST = ( |
|||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07 |
|||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27 |
|||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f |
|||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f |
|||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f |
|||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f |
|||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af |
|||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 |
|||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf |
|||
) |
|||
|
|||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) |
|||
|
|||
UTF8_SM_MODEL = {'class_table': UTF8_CLS, |
|||
'class_factor': 16, |
|||
'state_table': UTF8_ST, |
|||
'char_len_table': UTF8_CHAR_LEN_TABLE, |
|||
'name': 'UTF-8'} |
@ -0,0 +1,132 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood |
|||
|
|||
|
|||
class SingleByteCharSetProber(CharSetProber): |
|||
SAMPLE_SIZE = 64 |
|||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 |
|||
POSITIVE_SHORTCUT_THRESHOLD = 0.95 |
|||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05 |
|||
|
|||
def __init__(self, model, reversed=False, name_prober=None): |
|||
super(SingleByteCharSetProber, self).__init__() |
|||
self._model = model |
|||
# TRUE if we need to reverse every pair in the model lookup |
|||
self._reversed = reversed |
|||
# Optional auxiliary prober for name decision |
|||
self._name_prober = name_prober |
|||
self._last_order = None |
|||
self._seq_counters = None |
|||
self._total_seqs = None |
|||
self._total_char = None |
|||
self._freq_char = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
super(SingleByteCharSetProber, self).reset() |
|||
# char order of last character |
|||
self._last_order = 255 |
|||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories() |
|||
self._total_seqs = 0 |
|||
self._total_char = 0 |
|||
# characters that fall in our sampling range |
|||
self._freq_char = 0 |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
if self._name_prober: |
|||
return self._name_prober.charset_name |
|||
else: |
|||
return self._model['charset_name'] |
|||
|
|||
@property |
|||
def language(self): |
|||
if self._name_prober: |
|||
return self._name_prober.language |
|||
else: |
|||
return self._model.get('language') |
|||
|
|||
def feed(self, byte_str): |
|||
if not self._model['keep_english_letter']: |
|||
byte_str = self.filter_international_words(byte_str) |
|||
if not byte_str: |
|||
return self.state |
|||
char_to_order_map = self._model['char_to_order_map'] |
|||
for i, c in enumerate(byte_str): |
|||
# XXX: Order is in range 1-64, so one would think we want 0-63 here, |
|||
# but that leads to 27 more test failures than before. |
|||
order = char_to_order_map[c] |
|||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but |
|||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL |
|||
# to make it closer to the original intent. The only difference |
|||
# is whether or not we count digits and control characters for |
|||
# _total_char purposes. |
|||
if order < CharacterCategory.CONTROL: |
|||
self._total_char += 1 |
|||
if order < self.SAMPLE_SIZE: |
|||
self._freq_char += 1 |
|||
if self._last_order < self.SAMPLE_SIZE: |
|||
self._total_seqs += 1 |
|||
if not self._reversed: |
|||
i = (self._last_order * self.SAMPLE_SIZE) + order |
|||
model = self._model['precedence_matrix'][i] |
|||
else: # reverse the order of the letters in the lookup |
|||
i = (order * self.SAMPLE_SIZE) + self._last_order |
|||
model = self._model['precedence_matrix'][i] |
|||
self._seq_counters[model] += 1 |
|||
self._last_order = order |
|||
|
|||
charset_name = self._model['charset_name'] |
|||
if self.state == ProbingState.DETECTING: |
|||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: |
|||
confidence = self.get_confidence() |
|||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: |
|||
self.logger.debug('%s confidence = %s, we have a winner', |
|||
charset_name, confidence) |
|||
self._state = ProbingState.FOUND_IT |
|||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: |
|||
self.logger.debug('%s confidence = %s, below negative ' |
|||
'shortcut threshhold %s', charset_name, |
|||
confidence, |
|||
self.NEGATIVE_SHORTCUT_THRESHOLD) |
|||
self._state = ProbingState.NOT_ME |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
r = 0.01 |
|||
if self._total_seqs > 0: |
|||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / |
|||
self._total_seqs / self._model['typical_positive_ratio']) |
|||
r = r * self._freq_char / self._total_char |
|||
if r >= 1.0: |
|||
r = 0.99 |
|||
return r |
@ -0,0 +1,73 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetgroupprober import CharSetGroupProber |
|||
from .sbcharsetprober import SingleByteCharSetProber |
|||
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, |
|||
Latin5CyrillicModel, MacCyrillicModel, |
|||
Ibm866Model, Ibm855Model) |
|||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel |
|||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel |
|||
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel |
|||
from .langthaimodel import TIS620ThaiModel |
|||
from .langhebrewmodel import Win1255HebrewModel |
|||
from .hebrewprober import HebrewProber |
|||
from .langturkishmodel import Latin5TurkishModel |
|||
|
|||
|
|||
class SBCSGroupProber(CharSetGroupProber): |
|||
def __init__(self): |
|||
super(SBCSGroupProber, self).__init__() |
|||
self.probers = [ |
|||
SingleByteCharSetProber(Win1251CyrillicModel), |
|||
SingleByteCharSetProber(Koi8rModel), |
|||
SingleByteCharSetProber(Latin5CyrillicModel), |
|||
SingleByteCharSetProber(MacCyrillicModel), |
|||
SingleByteCharSetProber(Ibm866Model), |
|||
SingleByteCharSetProber(Ibm855Model), |
|||
SingleByteCharSetProber(Latin7GreekModel), |
|||
SingleByteCharSetProber(Win1253GreekModel), |
|||
SingleByteCharSetProber(Latin5BulgarianModel), |
|||
SingleByteCharSetProber(Win1251BulgarianModel), |
|||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) |
|||
# after we retrain model. |
|||
# SingleByteCharSetProber(Latin2HungarianModel), |
|||
# SingleByteCharSetProber(Win1250HungarianModel), |
|||
SingleByteCharSetProber(TIS620ThaiModel), |
|||
SingleByteCharSetProber(Latin5TurkishModel), |
|||
] |
|||
hebrew_prober = HebrewProber() |
|||
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, |
|||
False, hebrew_prober) |
|||
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, |
|||
hebrew_prober) |
|||
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) |
|||
self.probers.extend([hebrew_prober, logical_hebrew_prober, |
|||
visual_hebrew_prober]) |
|||
|
|||
self.reset() |
@ -0,0 +1,92 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .mbcharsetprober import MultiByteCharSetProber |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .chardistribution import SJISDistributionAnalysis |
|||
from .jpcntx import SJISContextAnalysis |
|||
from .mbcssm import SJIS_SM_MODEL |
|||
from .enums import ProbingState, MachineState |
|||
|
|||
|
|||
class SJISProber(MultiByteCharSetProber): |
|||
def __init__(self): |
|||
super(SJISProber, self).__init__() |
|||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) |
|||
self.distribution_analyzer = SJISDistributionAnalysis() |
|||
self.context_analyzer = SJISContextAnalysis() |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
super(SJISProber, self).reset() |
|||
self.context_analyzer.reset() |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return self.context_analyzer.charset_name |
|||
|
|||
@property |
|||
def language(self): |
|||
return "Japanese" |
|||
|
|||
def feed(self, byte_str): |
|||
for i in range(len(byte_str)): |
|||
coding_state = self.coding_sm.next_state(byte_str[i]) |
|||
if coding_state == MachineState.ERROR: |
|||
self.logger.debug('%s %s prober hit error at byte %s', |
|||
self.charset_name, self.language, i) |
|||
self._state = ProbingState.NOT_ME |
|||
break |
|||
elif coding_state == MachineState.ITS_ME: |
|||
self._state = ProbingState.FOUND_IT |
|||
break |
|||
elif coding_state == MachineState.START: |
|||
char_len = self.coding_sm.get_current_charlen() |
|||
if i == 0: |
|||
self._last_char[1] = byte_str[0] |
|||
self.context_analyzer.feed(self._last_char[2 - char_len:], |
|||
char_len) |
|||
self.distribution_analyzer.feed(self._last_char, char_len) |
|||
else: |
|||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 |
|||
- char_len], char_len) |
|||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1], |
|||
char_len) |
|||
|
|||
self._last_char[0] = byte_str[-1] |
|||
|
|||
if self.state == ProbingState.DETECTING: |
|||
if (self.context_analyzer.got_enough_data() and |
|||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)): |
|||
self._state = ProbingState.FOUND_IT |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
context_conf = self.context_analyzer.get_confidence() |
|||
distrib_conf = self.distribution_analyzer.get_confidence() |
|||
return max(context_conf, distrib_conf) |
@ -0,0 +1,286 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is Mozilla Universal charset detector code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 2001 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# Shy Shalom - original C code |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
""" |
|||
Module containing the UniversalDetector detector class, which is the primary |
|||
class a user of ``chardet`` should use. |
|||
|
|||
:author: Mark Pilgrim (initial port to Python) |
|||
:author: Shy Shalom (original C code) |
|||
:author: Dan Blanchard (major refactoring for 3.0) |
|||
:author: Ian Cordasco |
|||
""" |
|||
|
|||
|
|||
import codecs |
|||
import logging |
|||
import re |
|||
|
|||
from .charsetgroupprober import CharSetGroupProber |
|||
from .enums import InputState, LanguageFilter, ProbingState |
|||
from .escprober import EscCharSetProber |
|||
from .latin1prober import Latin1Prober |
|||
from .mbcsgroupprober import MBCSGroupProber |
|||
from .sbcsgroupprober import SBCSGroupProber |
|||
|
|||
|
|||
class UniversalDetector(object): |
|||
""" |
|||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function |
|||
and coordinates all of the different charset probers. |
|||
|
|||
To get a ``dict`` containing an encoding and its confidence, you can simply |
|||
run: |
|||
|
|||
.. code:: |
|||
|
|||
u = UniversalDetector() |
|||
u.feed(some_bytes) |
|||
u.close() |
|||
detected = u.result |
|||
|
|||
""" |
|||
|
|||
MINIMUM_THRESHOLD = 0.20 |
|||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') |
|||
ESC_DETECTOR = re.compile(b'(\033|~{)') |
|||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]') |
|||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252', |
|||
'iso-8859-2': 'Windows-1250', |
|||
'iso-8859-5': 'Windows-1251', |
|||
'iso-8859-6': 'Windows-1256', |
|||
'iso-8859-7': 'Windows-1253', |
|||
'iso-8859-8': 'Windows-1255', |
|||
'iso-8859-9': 'Windows-1254', |
|||
'iso-8859-13': 'Windows-1257'} |
|||
|
|||
def __init__(self, lang_filter=LanguageFilter.ALL): |
|||
self._esc_charset_prober = None |
|||
self._charset_probers = [] |
|||
self.result = None |
|||
self.done = None |
|||
self._got_data = None |
|||
self._input_state = None |
|||
self._last_char = None |
|||
self.lang_filter = lang_filter |
|||
self.logger = logging.getLogger(__name__) |
|||
self._has_win_bytes = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
""" |
|||
Reset the UniversalDetector and all of its probers back to their |
|||
initial states. This is called by ``__init__``, so you only need to |
|||
call this directly in between analyses of different documents. |
|||
""" |
|||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None} |
|||
self.done = False |
|||
self._got_data = False |
|||
self._has_win_bytes = False |
|||
self._input_state = InputState.PURE_ASCII |
|||
self._last_char = b'' |
|||
if self._esc_charset_prober: |
|||
self._esc_charset_prober.reset() |
|||
for prober in self._charset_probers: |
|||
prober.reset() |
|||
|
|||
def feed(self, byte_str): |
|||
""" |
|||
Takes a chunk of a document and feeds it through all of the relevant |
|||
charset probers. |
|||
|
|||
After calling ``feed``, you can check the value of the ``done`` |
|||
attribute to see if you need to continue feeding the |
|||
``UniversalDetector`` more data, or if it has made a prediction |
|||
(in the ``result`` attribute). |
|||
|
|||
.. note:: |
|||
You should always call ``close`` when you're done feeding in your |
|||
document if ``done`` is not already ``True``. |
|||
""" |
|||
if self.done: |
|||
return |
|||
|
|||
if not len(byte_str): |
|||
return |
|||
|
|||
if not isinstance(byte_str, bytearray): |
|||
byte_str = bytearray(byte_str) |
|||
|
|||
# First check for known BOMs, since these are guaranteed to be correct |
|||
if not self._got_data: |
|||
# If the data starts with BOM, we know it is UTF |
|||
if byte_str.startswith(codecs.BOM_UTF8): |
|||
# EF BB BF UTF-8 with BOM |
|||
self.result = {'encoding': "UTF-8-SIG", |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
elif byte_str.startswith((codecs.BOM_UTF32_LE, |
|||
codecs.BOM_UTF32_BE)): |
|||
# FF FE 00 00 UTF-32, little-endian BOM |
|||
# 00 00 FE FF UTF-32, big-endian BOM |
|||
self.result = {'encoding': "UTF-32", |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'): |
|||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412) |
|||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412", |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'): |
|||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143) |
|||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143", |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)): |
|||
# FF FE UTF-16, little endian BOM |
|||
# FE FF UTF-16, big endian BOM |
|||
self.result = {'encoding': "UTF-16", |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
|
|||
self._got_data = True |
|||
if self.result['encoding'] is not None: |
|||
self.done = True |
|||
return |
|||
|
|||
# If none of those matched and we've only see ASCII so far, check |
|||
# for high bytes and escape sequences |
|||
if self._input_state == InputState.PURE_ASCII: |
|||
if self.HIGH_BYTE_DETECTOR.search(byte_str): |
|||
self._input_state = InputState.HIGH_BYTE |
|||
elif self._input_state == InputState.PURE_ASCII and \ |
|||
self.ESC_DETECTOR.search(self._last_char + byte_str): |
|||
self._input_state = InputState.ESC_ASCII |
|||
|
|||
self._last_char = byte_str[-1:] |
|||
|
|||
# If we've seen escape sequences, use the EscCharSetProber, which |
|||
# uses a simple state machine to check for known escape sequences in |
|||
# HZ and ISO-2022 encodings, since those are the only encodings that |
|||
# use such sequences. |
|||
if self._input_state == InputState.ESC_ASCII: |
|||
if not self._esc_charset_prober: |
|||
self._esc_charset_prober = EscCharSetProber(self.lang_filter) |
|||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT: |
|||
self.result = {'encoding': |
|||
self._esc_charset_prober.charset_name, |
|||
'confidence': |
|||
self._esc_charset_prober.get_confidence(), |
|||
'language': |
|||
self._esc_charset_prober.language} |
|||
self.done = True |
|||
# If we've seen high bytes (i.e., those with values greater than 127), |
|||
# we need to do more complicated checks using all our multi-byte and |
|||
# single-byte probers that are left. The single-byte probers |
|||
# use character bigram distributions to determine the encoding, whereas |
|||
# the multi-byte probers use a combination of character unigram and |
|||
# bigram distributions. |
|||
elif self._input_state == InputState.HIGH_BYTE: |
|||
if not self._charset_probers: |
|||
self._charset_probers = [MBCSGroupProber(self.lang_filter)] |
|||
# If we're checking non-CJK encodings, use single-byte prober |
|||
if self.lang_filter & LanguageFilter.NON_CJK: |
|||
self._charset_probers.append(SBCSGroupProber()) |
|||
self._charset_probers.append(Latin1Prober()) |
|||
for prober in self._charset_probers: |
|||
if prober.feed(byte_str) == ProbingState.FOUND_IT: |
|||
self.result = {'encoding': prober.charset_name, |
|||
'confidence': prober.get_confidence(), |
|||
'language': prober.language} |
|||
self.done = True |
|||
break |
|||
if self.WIN_BYTE_DETECTOR.search(byte_str): |
|||
self._has_win_bytes = True |
|||
|
|||
def close(self): |
|||
""" |
|||
Stop analyzing the current document and come up with a final |
|||
prediction. |
|||
|
|||
:returns: The ``result`` attribute, a ``dict`` with the keys |
|||
`encoding`, `confidence`, and `language`. |
|||
""" |
|||
# Don't bother with checks if we're already done |
|||
if self.done: |
|||
return self.result |
|||
self.done = True |
|||
|
|||
if not self._got_data: |
|||
self.logger.debug('no data received!') |
|||
|
|||
# Default to ASCII if it is all we've seen so far |
|||
elif self._input_state == InputState.PURE_ASCII: |
|||
self.result = {'encoding': 'ascii', |
|||
'confidence': 1.0, |
|||
'language': ''} |
|||
|
|||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD |
|||
elif self._input_state == InputState.HIGH_BYTE: |
|||
prober_confidence = None |
|||
max_prober_confidence = 0.0 |
|||
max_prober = None |
|||
for prober in self._charset_probers: |
|||
if not prober: |
|||
continue |
|||
prober_confidence = prober.get_confidence() |
|||
if prober_confidence > max_prober_confidence: |
|||
max_prober_confidence = prober_confidence |
|||
max_prober = prober |
|||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): |
|||
charset_name = max_prober.charset_name |
|||
lower_charset_name = max_prober.charset_name.lower() |
|||
confidence = max_prober.get_confidence() |
|||
# Use Windows encoding name instead of ISO-8859 if we saw any |
|||
# extra Windows-specific bytes |
|||
if lower_charset_name.startswith('iso-8859'): |
|||
if self._has_win_bytes: |
|||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name, |
|||
charset_name) |
|||
self.result = {'encoding': charset_name, |
|||
'confidence': confidence, |
|||
'language': max_prober.language} |
|||
|
|||
# Log all prober confidences if none met MINIMUM_THRESHOLD |
|||
if self.logger.getEffectiveLevel() == logging.DEBUG: |
|||
if self.result['encoding'] is None: |
|||
self.logger.debug('no probers hit minimum threshold') |
|||
for group_prober in self._charset_probers: |
|||
if not group_prober: |
|||
continue |
|||
if isinstance(group_prober, CharSetGroupProber): |
|||
for prober in group_prober.probers: |
|||
self.logger.debug('%s %s confidence = %s', |
|||
prober.charset_name, |
|||
prober.language, |
|||
prober.get_confidence()) |
|||
else: |
|||
self.logger.debug('%s %s confidence = %s', |
|||
prober.charset_name, |
|||
prober.language, |
|||
prober.get_confidence()) |
|||
return self.result |
@ -0,0 +1,82 @@ |
|||
######################## BEGIN LICENSE BLOCK ######################## |
|||
# The Original Code is mozilla.org code. |
|||
# |
|||
# The Initial Developer of the Original Code is |
|||
# Netscape Communications Corporation. |
|||
# Portions created by the Initial Developer are Copyright (C) 1998 |
|||
# the Initial Developer. All Rights Reserved. |
|||
# |
|||
# Contributor(s): |
|||
# Mark Pilgrim - port to Python |
|||
# |
|||
# This library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
# |
|||
# This library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with this library; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|||
# 02110-1301 USA |
|||
######################### END LICENSE BLOCK ######################### |
|||
|
|||
from .charsetprober import CharSetProber |
|||
from .enums import ProbingState, MachineState |
|||
from .codingstatemachine import CodingStateMachine |
|||
from .mbcssm import UTF8_SM_MODEL |
|||
|
|||
|
|||
|
|||
class UTF8Prober(CharSetProber): |
|||
ONE_CHAR_PROB = 0.5 |
|||
|
|||
def __init__(self): |
|||
super(UTF8Prober, self).__init__() |
|||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) |
|||
self._num_mb_chars = None |
|||
self.reset() |
|||
|
|||
def reset(self): |
|||
super(UTF8Prober, self).reset() |
|||
self.coding_sm.reset() |
|||
self._num_mb_chars = 0 |
|||
|
|||
@property |
|||
def charset_name(self): |
|||
return "utf-8" |
|||
|
|||
@property |
|||
def language(self): |
|||
return "" |
|||
|
|||
def feed(self, byte_str): |
|||
for c in byte_str: |
|||
coding_state = self.coding_sm.next_state(c) |
|||
if coding_state == MachineState.ERROR: |
|||
self._state = ProbingState.NOT_ME |
|||
break |
|||
elif coding_state == MachineState.ITS_ME: |
|||
self._state = ProbingState.FOUND_IT |
|||
break |
|||
elif coding_state == MachineState.START: |
|||
if self.coding_sm.get_current_charlen() >= 2: |
|||
self._num_mb_chars += 1 |
|||
|
|||
if self.state == ProbingState.DETECTING: |
|||
if self.get_confidence() > self.SHORTCUT_THRESHOLD: |
|||
self._state = ProbingState.FOUND_IT |
|||
|
|||
return self.state |
|||
|
|||
def get_confidence(self): |
|||
unlike = 0.99 |
|||
if self._num_mb_chars < 6: |
|||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars |
|||
return 1.0 - unlike |
|||
else: |
|||
return unlike |
@ -0,0 +1,9 @@ |
|||
""" |
|||
This module exists only to simplify retrieving the version number of chardet |
|||
from within setup.py and from chardet subpackages. |
|||
|
|||
:author: Dan Blanchard (dan.blanchard@gmail.com) |
|||
""" |
|||
|
|||
__version__ = "3.0.4" |
|||
VERSION = __version__.split('.') |
@ -0,0 +1,64 @@ |
|||
#! /usr/bin/python3 |
|||
|
|||
import sys |
|||
import os |
|||
import dropbox |
|||
from dropbox.exceptions import ApiError, AuthError |
|||
|
|||
# Add OAuth2 access token here. |
|||
# You can generate one for yourself in the App Console. |
|||
# TOKEN = os.environ.get('DROPBOX_TOKEN') |
|||
TOKEN = os.environ.get('DROPBOX_TOKEN') |
|||
|
|||
LOCALFILE = 'backup.tar.gz' |
|||
|
|||
file_size = file_size = os.path.getsize(LOCALFILE) |
|||
|
|||
CHUNK_SIZE = 4 * 1024 * 1024 |
|||
|
|||
# Check for an access token |
|||
if (len(TOKEN) == 0): |
|||
sys.exit("ERROR: Looks like you didn't add your access token.") |
|||
print("Creating a Dropbox object...") |
|||
dbx = dropbox.Dropbox(TOKEN) |
|||
# Check that the access token is valid |
|||
try: |
|||
dbx.users_get_current_account() |
|||
except AuthError: |
|||
sys.exit("ERROR: Invalid access token; try re-generating an \ |
|||
access token from the app console on the web.") |
|||
with open(LOCALFILE, 'rb') as f: |
|||
# We use WriteMode=overwrite to make sure that the settings in the file |
|||
# are changed on upload |
|||
print("Uploading " + LOCALFILE + " to Dropbox ...") |
|||
if file_size <= CHUNK_SIZE: |
|||
print(dbx.files_upload(f.read(), '/backup.tar.gz')) |
|||
try: |
|||
upload_session_start_result = \ |
|||
dbx.files_upload_session_start(f.read(CHUNK_SIZE)) |
|||
cursor = dropbox.files.UploadSessionCursor( |
|||
session_id=upload_session_start_result.session_id, |
|||
offset=f.tell()) |
|||
commit = dropbox.files.CommitInfo(path='/backup.tar.gz') |
|||
|
|||
while f.tell() < file_size: |
|||
if ((file_size - f.tell()) <= CHUNK_SIZE): |
|||
print(dbx.files_upload_session_finish( |
|||
f.read(CHUNK_SIZE), cursor, commit)) |
|||
else: |
|||
dbx.files_upload_session_append(f.read(CHUNK_SIZE), |
|||
cursor.session_id, |
|||
cursor.offset) |
|||
cursor.offset = f.tell() |
|||
except ApiError as err: |
|||
# This checks for the specific error where a user doesn't have |
|||
# enough Dropbox space quota to upload this file |
|||
if (err.error.is_path() and |
|||
err.error.get_path().reason.is_insufficient_space()): |
|||
sys.exit("ERROR: Cannot back up; insufficient space.") |
|||
elif err.user_message_text: |
|||
print(err.user_message_text) |
|||
sys.exit() |
|||
else: |
|||
print(err) |
|||
sys.exit() |
@ -0,0 +1,4 @@ |
|||
from __future__ import absolute_import |
|||
|
|||
from .dropbox import __version__, Dropbox, DropboxTeam, create_session # noqa: F401 |
|||
from .oauth import DropboxOAuth2Flow, DropboxOAuth2FlowNoRedirect # noqa: F401 |
@ -0,0 +1,7 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
# If you have issues importing this module because Python recognizes it as a keyword, use async_ instead. |
|||
from async_ import * |
@ -0,0 +1,329 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
class LaunchResultBase(bb.Union): |
|||
""" |
|||
Result returned by methods that launch an asynchronous job. A method who may |
|||
either launch an asynchronous job, or complete the request synchronously, |
|||
can use this union by extending it, and adding a 'complete' field with the |
|||
type of the synchronous response. See :class:`LaunchEmptyResult` for an |
|||
example. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar str async_job_id: This response indicates that the processing is |
|||
asynchronous. The string is an id that can be used to obtain the status |
|||
of the asynchronous job. |
|||
""" |
|||
|
|||
_catch_all = None |
|||
|
|||
@classmethod |
|||
def async_job_id(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``async_job_id`` tag with |
|||
value ``val``. |
|||
|
|||
:param str val: |
|||
:rtype: LaunchResultBase |
|||
""" |
|||
return cls('async_job_id', val) |
|||
|
|||
def is_async_job_id(self): |
|||
""" |
|||
Check if the union tag is ``async_job_id``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'async_job_id' |
|||
|
|||
def get_async_job_id(self): |
|||
""" |
|||
This response indicates that the processing is asynchronous. The string |
|||
is an id that can be used to obtain the status of the asynchronous job. |
|||
|
|||
Only call this if :meth:`is_async_job_id` is true. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if not self.is_async_job_id(): |
|||
raise AttributeError("tag 'async_job_id' not set") |
|||
return self._value |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(LaunchResultBase, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'LaunchResultBase(%r, %r)' % (self._tag, self._value) |
|||
|
|||
LaunchResultBase_validator = bv.Union(LaunchResultBase) |
|||
|
|||
class LaunchEmptyResult(LaunchResultBase): |
|||
""" |
|||
Result returned by methods that may either launch an asynchronous job or |
|||
complete synchronously. Upon synchronous completion of the job, no |
|||
additional information is returned. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar complete: The job finished synchronously and successfully. |
|||
""" |
|||
|
|||
# Attribute is overwritten below the class definition |
|||
complete = None |
|||
|
|||
def is_complete(self): |
|||
""" |
|||
Check if the union tag is ``complete``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'complete' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(LaunchEmptyResult, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'LaunchEmptyResult(%r, %r)' % (self._tag, self._value) |
|||
|
|||
LaunchEmptyResult_validator = bv.Union(LaunchEmptyResult) |
|||
|
|||
class PollArg(bb.Struct): |
|||
""" |
|||
Arguments for methods that poll the status of an asynchronous job. |
|||
|
|||
:ivar async_job_id: Id of the asynchronous job. This is the value of a |
|||
response returned from the method that launched the job. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_async_job_id_value', |
|||
'_async_job_id_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
async_job_id=None): |
|||
self._async_job_id_value = None |
|||
self._async_job_id_present = False |
|||
if async_job_id is not None: |
|||
self.async_job_id = async_job_id |
|||
|
|||
@property |
|||
def async_job_id(self): |
|||
""" |
|||
Id of the asynchronous job. This is the value of a response returned |
|||
from the method that launched the job. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._async_job_id_present: |
|||
return self._async_job_id_value |
|||
else: |
|||
raise AttributeError("missing required field 'async_job_id'") |
|||
|
|||
@async_job_id.setter |
|||
def async_job_id(self, val): |
|||
val = self._async_job_id_validator.validate(val) |
|||
self._async_job_id_value = val |
|||
self._async_job_id_present = True |
|||
|
|||
@async_job_id.deleter |
|||
def async_job_id(self): |
|||
self._async_job_id_value = None |
|||
self._async_job_id_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PollArg, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PollArg(async_job_id={!r})'.format( |
|||
self._async_job_id_value, |
|||
) |
|||
|
|||
PollArg_validator = bv.Struct(PollArg) |
|||
|
|||
class PollResultBase(bb.Union): |
|||
""" |
|||
Result returned by methods that poll for the status of an asynchronous job. |
|||
Unions that extend this union should add a 'complete' field with a type of |
|||
the information returned upon job completion. See :class:`PollEmptyResult` |
|||
for an example. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar in_progress: The asynchronous job is still in progress. |
|||
""" |
|||
|
|||
_catch_all = None |
|||
# Attribute is overwritten below the class definition |
|||
in_progress = None |
|||
|
|||
def is_in_progress(self): |
|||
""" |
|||
Check if the union tag is ``in_progress``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'in_progress' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PollResultBase, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PollResultBase(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PollResultBase_validator = bv.Union(PollResultBase) |
|||
|
|||
class PollEmptyResult(PollResultBase): |
|||
""" |
|||
Result returned by methods that poll for the status of an asynchronous job. |
|||
Upon completion of the job, no additional information is returned. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar complete: The asynchronous job has completed successfully. |
|||
""" |
|||
|
|||
# Attribute is overwritten below the class definition |
|||
complete = None |
|||
|
|||
def is_complete(self): |
|||
""" |
|||
Check if the union tag is ``complete``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'complete' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PollEmptyResult, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PollEmptyResult(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PollEmptyResult_validator = bv.Union(PollEmptyResult) |
|||
|
|||
class PollError(bb.Union): |
|||
""" |
|||
Error returned by methods for polling the status of asynchronous job. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar invalid_async_job_id: The job ID is invalid. |
|||
:ivar internal_error: Something went wrong with the job on Dropbox's end. |
|||
You'll need to verify that the action you were taking succeeded, and if |
|||
not, try again. This should happen very rarely. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
invalid_async_job_id = None |
|||
# Attribute is overwritten below the class definition |
|||
internal_error = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_invalid_async_job_id(self): |
|||
""" |
|||
Check if the union tag is ``invalid_async_job_id``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_async_job_id' |
|||
|
|||
def is_internal_error(self): |
|||
""" |
|||
Check if the union tag is ``internal_error``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'internal_error' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PollError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PollError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PollError_validator = bv.Union(PollError) |
|||
|
|||
AsyncJobId_validator = bv.String(min_length=1) |
|||
LaunchResultBase._async_job_id_validator = AsyncJobId_validator |
|||
LaunchResultBase._tagmap = { |
|||
'async_job_id': LaunchResultBase._async_job_id_validator, |
|||
} |
|||
|
|||
LaunchEmptyResult._complete_validator = bv.Void() |
|||
LaunchEmptyResult._tagmap = { |
|||
'complete': LaunchEmptyResult._complete_validator, |
|||
} |
|||
LaunchEmptyResult._tagmap.update(LaunchResultBase._tagmap) |
|||
|
|||
LaunchEmptyResult.complete = LaunchEmptyResult('complete') |
|||
|
|||
PollArg._async_job_id_validator = AsyncJobId_validator |
|||
PollArg._all_field_names_ = set(['async_job_id']) |
|||
PollArg._all_fields_ = [('async_job_id', PollArg._async_job_id_validator)] |
|||
|
|||
PollResultBase._in_progress_validator = bv.Void() |
|||
PollResultBase._tagmap = { |
|||
'in_progress': PollResultBase._in_progress_validator, |
|||
} |
|||
|
|||
PollResultBase.in_progress = PollResultBase('in_progress') |
|||
|
|||
PollEmptyResult._complete_validator = bv.Void() |
|||
PollEmptyResult._tagmap = { |
|||
'complete': PollEmptyResult._complete_validator, |
|||
} |
|||
PollEmptyResult._tagmap.update(PollResultBase._tagmap) |
|||
|
|||
PollEmptyResult.complete = PollEmptyResult('complete') |
|||
|
|||
PollError._invalid_async_job_id_validator = bv.Void() |
|||
PollError._internal_error_validator = bv.Void() |
|||
PollError._other_validator = bv.Void() |
|||
PollError._tagmap = { |
|||
'invalid_async_job_id': PollError._invalid_async_job_id_validator, |
|||
'internal_error': PollError._internal_error_validator, |
|||
'other': PollError._other_validator, |
|||
} |
|||
|
|||
PollError.invalid_async_job_id = PollError('invalid_async_job_id') |
|||
PollError.internal_error = PollError('internal_error') |
|||
PollError.other = PollError('other') |
|||
|
|||
ROUTES = { |
|||
} |
|||
|
@ -0,0 +1,767 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
class AccessError(bb.Union): |
|||
""" |
|||
Error occurred because the account doesn't have permission to access the |
|||
resource. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar InvalidAccountTypeError invalid_account_type: Current account type |
|||
cannot access the resource. |
|||
:ivar PaperAccessError paper_access_denied: Current account cannot access |
|||
Paper. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
@classmethod |
|||
def invalid_account_type(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``invalid_account_type`` tag |
|||
with value ``val``. |
|||
|
|||
:param InvalidAccountTypeError val: |
|||
:rtype: AccessError |
|||
""" |
|||
return cls('invalid_account_type', val) |
|||
|
|||
@classmethod |
|||
def paper_access_denied(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``paper_access_denied`` tag |
|||
with value ``val``. |
|||
|
|||
:param PaperAccessError val: |
|||
:rtype: AccessError |
|||
""" |
|||
return cls('paper_access_denied', val) |
|||
|
|||
def is_invalid_account_type(self): |
|||
""" |
|||
Check if the union tag is ``invalid_account_type``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_account_type' |
|||
|
|||
def is_paper_access_denied(self): |
|||
""" |
|||
Check if the union tag is ``paper_access_denied``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'paper_access_denied' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def get_invalid_account_type(self): |
|||
""" |
|||
Current account type cannot access the resource. |
|||
|
|||
Only call this if :meth:`is_invalid_account_type` is true. |
|||
|
|||
:rtype: InvalidAccountTypeError |
|||
""" |
|||
if not self.is_invalid_account_type(): |
|||
raise AttributeError("tag 'invalid_account_type' not set") |
|||
return self._value |
|||
|
|||
def get_paper_access_denied(self): |
|||
""" |
|||
Current account cannot access Paper. |
|||
|
|||
Only call this if :meth:`is_paper_access_denied` is true. |
|||
|
|||
:rtype: PaperAccessError |
|||
""" |
|||
if not self.is_paper_access_denied(): |
|||
raise AttributeError("tag 'paper_access_denied' not set") |
|||
return self._value |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(AccessError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'AccessError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
AccessError_validator = bv.Union(AccessError) |
|||
|
|||
class AuthError(bb.Union): |
|||
""" |
|||
Errors occurred during authentication. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar invalid_access_token: The access token is invalid. |
|||
:ivar invalid_select_user: The user specified in 'Dropbox-API-Select-User' |
|||
is no longer on the team. |
|||
:ivar invalid_select_admin: The user specified in 'Dropbox-API-Select-Admin' |
|||
is not a Dropbox Business team admin. |
|||
:ivar user_suspended: The user has been suspended. |
|||
:ivar expired_access_token: The access token has expired. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
invalid_access_token = None |
|||
# Attribute is overwritten below the class definition |
|||
invalid_select_user = None |
|||
# Attribute is overwritten below the class definition |
|||
invalid_select_admin = None |
|||
# Attribute is overwritten below the class definition |
|||
user_suspended = None |
|||
# Attribute is overwritten below the class definition |
|||
expired_access_token = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_invalid_access_token(self): |
|||
""" |
|||
Check if the union tag is ``invalid_access_token``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_access_token' |
|||
|
|||
def is_invalid_select_user(self): |
|||
""" |
|||
Check if the union tag is ``invalid_select_user``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_select_user' |
|||
|
|||
def is_invalid_select_admin(self): |
|||
""" |
|||
Check if the union tag is ``invalid_select_admin``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_select_admin' |
|||
|
|||
def is_user_suspended(self): |
|||
""" |
|||
Check if the union tag is ``user_suspended``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'user_suspended' |
|||
|
|||
def is_expired_access_token(self): |
|||
""" |
|||
Check if the union tag is ``expired_access_token``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'expired_access_token' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(AuthError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'AuthError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
AuthError_validator = bv.Union(AuthError) |
|||
|
|||
class InvalidAccountTypeError(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar endpoint: Current account type doesn't have permission to access this |
|||
route endpoint. |
|||
:ivar feature: Current account type doesn't have permission to access this |
|||
feature. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
endpoint = None |
|||
# Attribute is overwritten below the class definition |
|||
feature = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_endpoint(self): |
|||
""" |
|||
Check if the union tag is ``endpoint``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'endpoint' |
|||
|
|||
def is_feature(self): |
|||
""" |
|||
Check if the union tag is ``feature``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'feature' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(InvalidAccountTypeError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'InvalidAccountTypeError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
InvalidAccountTypeError_validator = bv.Union(InvalidAccountTypeError) |
|||
|
|||
class PaperAccessError(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar paper_disabled: Paper is disabled. |
|||
:ivar not_paper_user: The provided user has not used Paper yet. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
paper_disabled = None |
|||
# Attribute is overwritten below the class definition |
|||
not_paper_user = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_paper_disabled(self): |
|||
""" |
|||
Check if the union tag is ``paper_disabled``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'paper_disabled' |
|||
|
|||
def is_not_paper_user(self): |
|||
""" |
|||
Check if the union tag is ``not_paper_user``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'not_paper_user' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PaperAccessError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PaperAccessError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PaperAccessError_validator = bv.Union(PaperAccessError) |
|||
|
|||
class RateLimitError(bb.Struct): |
|||
""" |
|||
Error occurred because the app is being rate limited. |
|||
|
|||
:ivar reason: The reason why the app is being rate limited. |
|||
:ivar retry_after: The number of seconds that the app should wait before |
|||
making another request. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_reason_value', |
|||
'_reason_present', |
|||
'_retry_after_value', |
|||
'_retry_after_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
reason=None, |
|||
retry_after=None): |
|||
self._reason_value = None |
|||
self._reason_present = False |
|||
self._retry_after_value = None |
|||
self._retry_after_present = False |
|||
if reason is not None: |
|||
self.reason = reason |
|||
if retry_after is not None: |
|||
self.retry_after = retry_after |
|||
|
|||
@property |
|||
def reason(self): |
|||
""" |
|||
The reason why the app is being rate limited. |
|||
|
|||
:rtype: RateLimitReason |
|||
""" |
|||
if self._reason_present: |
|||
return self._reason_value |
|||
else: |
|||
raise AttributeError("missing required field 'reason'") |
|||
|
|||
@reason.setter |
|||
def reason(self, val): |
|||
self._reason_validator.validate_type_only(val) |
|||
self._reason_value = val |
|||
self._reason_present = True |
|||
|
|||
@reason.deleter |
|||
def reason(self): |
|||
self._reason_value = None |
|||
self._reason_present = False |
|||
|
|||
@property |
|||
def retry_after(self): |
|||
""" |
|||
The number of seconds that the app should wait before making another |
|||
request. |
|||
|
|||
:rtype: int |
|||
""" |
|||
if self._retry_after_present: |
|||
return self._retry_after_value |
|||
else: |
|||
return 1 |
|||
|
|||
@retry_after.setter |
|||
def retry_after(self, val): |
|||
val = self._retry_after_validator.validate(val) |
|||
self._retry_after_value = val |
|||
self._retry_after_present = True |
|||
|
|||
@retry_after.deleter |
|||
def retry_after(self): |
|||
self._retry_after_value = None |
|||
self._retry_after_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(RateLimitError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'RateLimitError(reason={!r}, retry_after={!r})'.format( |
|||
self._reason_value, |
|||
self._retry_after_value, |
|||
) |
|||
|
|||
RateLimitError_validator = bv.Struct(RateLimitError) |
|||
|
|||
class RateLimitReason(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar too_many_requests: You are making too many requests in the past few |
|||
minutes. |
|||
:ivar too_many_write_operations: There are currently too many write |
|||
operations happening in the user's Dropbox. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
too_many_requests = None |
|||
# Attribute is overwritten below the class definition |
|||
too_many_write_operations = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_too_many_requests(self): |
|||
""" |
|||
Check if the union tag is ``too_many_requests``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'too_many_requests' |
|||
|
|||
def is_too_many_write_operations(self): |
|||
""" |
|||
Check if the union tag is ``too_many_write_operations``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'too_many_write_operations' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(RateLimitReason, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'RateLimitReason(%r, %r)' % (self._tag, self._value) |
|||
|
|||
RateLimitReason_validator = bv.Union(RateLimitReason) |
|||
|
|||
class TokenFromOAuth1Arg(bb.Struct): |
|||
""" |
|||
:ivar oauth1_token: The supplied OAuth 1.0 access token. |
|||
:ivar oauth1_token_secret: The token secret associated with the supplied |
|||
access token. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_oauth1_token_value', |
|||
'_oauth1_token_present', |
|||
'_oauth1_token_secret_value', |
|||
'_oauth1_token_secret_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
oauth1_token=None, |
|||
oauth1_token_secret=None): |
|||
self._oauth1_token_value = None |
|||
self._oauth1_token_present = False |
|||
self._oauth1_token_secret_value = None |
|||
self._oauth1_token_secret_present = False |
|||
if oauth1_token is not None: |
|||
self.oauth1_token = oauth1_token |
|||
if oauth1_token_secret is not None: |
|||
self.oauth1_token_secret = oauth1_token_secret |
|||
|
|||
@property |
|||
def oauth1_token(self): |
|||
""" |
|||
The supplied OAuth 1.0 access token. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._oauth1_token_present: |
|||
return self._oauth1_token_value |
|||
else: |
|||
raise AttributeError("missing required field 'oauth1_token'") |
|||
|
|||
@oauth1_token.setter |
|||
def oauth1_token(self, val): |
|||
val = self._oauth1_token_validator.validate(val) |
|||
self._oauth1_token_value = val |
|||
self._oauth1_token_present = True |
|||
|
|||
@oauth1_token.deleter |
|||
def oauth1_token(self): |
|||
self._oauth1_token_value = None |
|||
self._oauth1_token_present = False |
|||
|
|||
@property |
|||
def oauth1_token_secret(self): |
|||
""" |
|||
The token secret associated with the supplied access token. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._oauth1_token_secret_present: |
|||
return self._oauth1_token_secret_value |
|||
else: |
|||
raise AttributeError("missing required field 'oauth1_token_secret'") |
|||
|
|||
@oauth1_token_secret.setter |
|||
def oauth1_token_secret(self, val): |
|||
val = self._oauth1_token_secret_validator.validate(val) |
|||
self._oauth1_token_secret_value = val |
|||
self._oauth1_token_secret_present = True |
|||
|
|||
@oauth1_token_secret.deleter |
|||
def oauth1_token_secret(self): |
|||
self._oauth1_token_secret_value = None |
|||
self._oauth1_token_secret_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(TokenFromOAuth1Arg, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'TokenFromOAuth1Arg(oauth1_token={!r}, oauth1_token_secret={!r})'.format( |
|||
self._oauth1_token_value, |
|||
self._oauth1_token_secret_value, |
|||
) |
|||
|
|||
TokenFromOAuth1Arg_validator = bv.Struct(TokenFromOAuth1Arg) |
|||
|
|||
class TokenFromOAuth1Error(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar invalid_oauth1_token_info: Part or all of the OAuth 1.0 access token |
|||
info is invalid. |
|||
:ivar app_id_mismatch: The authorized app does not match the app associated |
|||
with the supplied access token. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
invalid_oauth1_token_info = None |
|||
# Attribute is overwritten below the class definition |
|||
app_id_mismatch = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_invalid_oauth1_token_info(self): |
|||
""" |
|||
Check if the union tag is ``invalid_oauth1_token_info``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_oauth1_token_info' |
|||
|
|||
def is_app_id_mismatch(self): |
|||
""" |
|||
Check if the union tag is ``app_id_mismatch``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'app_id_mismatch' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(TokenFromOAuth1Error, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'TokenFromOAuth1Error(%r, %r)' % (self._tag, self._value) |
|||
|
|||
TokenFromOAuth1Error_validator = bv.Union(TokenFromOAuth1Error) |
|||
|
|||
class TokenFromOAuth1Result(bb.Struct): |
|||
""" |
|||
:ivar oauth2_token: The OAuth 2.0 token generated from the supplied OAuth |
|||
1.0 token. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_oauth2_token_value', |
|||
'_oauth2_token_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
oauth2_token=None): |
|||
self._oauth2_token_value = None |
|||
self._oauth2_token_present = False |
|||
if oauth2_token is not None: |
|||
self.oauth2_token = oauth2_token |
|||
|
|||
@property |
|||
def oauth2_token(self): |
|||
""" |
|||
The OAuth 2.0 token generated from the supplied OAuth 1.0 token. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._oauth2_token_present: |
|||
return self._oauth2_token_value |
|||
else: |
|||
raise AttributeError("missing required field 'oauth2_token'") |
|||
|
|||
@oauth2_token.setter |
|||
def oauth2_token(self, val): |
|||
val = self._oauth2_token_validator.validate(val) |
|||
self._oauth2_token_value = val |
|||
self._oauth2_token_present = True |
|||
|
|||
@oauth2_token.deleter |
|||
def oauth2_token(self): |
|||
self._oauth2_token_value = None |
|||
self._oauth2_token_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(TokenFromOAuth1Result, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'TokenFromOAuth1Result(oauth2_token={!r})'.format( |
|||
self._oauth2_token_value, |
|||
) |
|||
|
|||
TokenFromOAuth1Result_validator = bv.Struct(TokenFromOAuth1Result) |
|||
|
|||
AccessError._invalid_account_type_validator = InvalidAccountTypeError_validator |
|||
AccessError._paper_access_denied_validator = PaperAccessError_validator |
|||
AccessError._other_validator = bv.Void() |
|||
AccessError._tagmap = { |
|||
'invalid_account_type': AccessError._invalid_account_type_validator, |
|||
'paper_access_denied': AccessError._paper_access_denied_validator, |
|||
'other': AccessError._other_validator, |
|||
} |
|||
|
|||
AccessError.other = AccessError('other') |
|||
|
|||
AuthError._invalid_access_token_validator = bv.Void() |
|||
AuthError._invalid_select_user_validator = bv.Void() |
|||
AuthError._invalid_select_admin_validator = bv.Void() |
|||
AuthError._user_suspended_validator = bv.Void() |
|||
AuthError._expired_access_token_validator = bv.Void() |
|||
AuthError._other_validator = bv.Void() |
|||
AuthError._tagmap = { |
|||
'invalid_access_token': AuthError._invalid_access_token_validator, |
|||
'invalid_select_user': AuthError._invalid_select_user_validator, |
|||
'invalid_select_admin': AuthError._invalid_select_admin_validator, |
|||
'user_suspended': AuthError._user_suspended_validator, |
|||
'expired_access_token': AuthError._expired_access_token_validator, |
|||
'other': AuthError._other_validator, |
|||
} |
|||
|
|||
AuthError.invalid_access_token = AuthError('invalid_access_token') |
|||
AuthError.invalid_select_user = AuthError('invalid_select_user') |
|||
AuthError.invalid_select_admin = AuthError('invalid_select_admin') |
|||
AuthError.user_suspended = AuthError('user_suspended') |
|||
AuthError.expired_access_token = AuthError('expired_access_token') |
|||
AuthError.other = AuthError('other') |
|||
|
|||
InvalidAccountTypeError._endpoint_validator = bv.Void() |
|||
InvalidAccountTypeError._feature_validator = bv.Void() |
|||
InvalidAccountTypeError._other_validator = bv.Void() |
|||
InvalidAccountTypeError._tagmap = { |
|||
'endpoint': InvalidAccountTypeError._endpoint_validator, |
|||
'feature': InvalidAccountTypeError._feature_validator, |
|||
'other': InvalidAccountTypeError._other_validator, |
|||
} |
|||
|
|||
InvalidAccountTypeError.endpoint = InvalidAccountTypeError('endpoint') |
|||
InvalidAccountTypeError.feature = InvalidAccountTypeError('feature') |
|||
InvalidAccountTypeError.other = InvalidAccountTypeError('other') |
|||
|
|||
PaperAccessError._paper_disabled_validator = bv.Void() |
|||
PaperAccessError._not_paper_user_validator = bv.Void() |
|||
PaperAccessError._other_validator = bv.Void() |
|||
PaperAccessError._tagmap = { |
|||
'paper_disabled': PaperAccessError._paper_disabled_validator, |
|||
'not_paper_user': PaperAccessError._not_paper_user_validator, |
|||
'other': PaperAccessError._other_validator, |
|||
} |
|||
|
|||
PaperAccessError.paper_disabled = PaperAccessError('paper_disabled') |
|||
PaperAccessError.not_paper_user = PaperAccessError('not_paper_user') |
|||
PaperAccessError.other = PaperAccessError('other') |
|||
|
|||
RateLimitError._reason_validator = RateLimitReason_validator |
|||
RateLimitError._retry_after_validator = bv.UInt64() |
|||
RateLimitError._all_field_names_ = set([ |
|||
'reason', |
|||
'retry_after', |
|||
]) |
|||
RateLimitError._all_fields_ = [ |
|||
('reason', RateLimitError._reason_validator), |
|||
('retry_after', RateLimitError._retry_after_validator), |
|||
] |
|||
|
|||
RateLimitReason._too_many_requests_validator = bv.Void() |
|||
RateLimitReason._too_many_write_operations_validator = bv.Void() |
|||
RateLimitReason._other_validator = bv.Void() |
|||
RateLimitReason._tagmap = { |
|||
'too_many_requests': RateLimitReason._too_many_requests_validator, |
|||
'too_many_write_operations': RateLimitReason._too_many_write_operations_validator, |
|||
'other': RateLimitReason._other_validator, |
|||
} |
|||
|
|||
RateLimitReason.too_many_requests = RateLimitReason('too_many_requests') |
|||
RateLimitReason.too_many_write_operations = RateLimitReason('too_many_write_operations') |
|||
RateLimitReason.other = RateLimitReason('other') |
|||
|
|||
TokenFromOAuth1Arg._oauth1_token_validator = bv.String(min_length=1) |
|||
TokenFromOAuth1Arg._oauth1_token_secret_validator = bv.String(min_length=1) |
|||
TokenFromOAuth1Arg._all_field_names_ = set([ |
|||
'oauth1_token', |
|||
'oauth1_token_secret', |
|||
]) |
|||
TokenFromOAuth1Arg._all_fields_ = [ |
|||
('oauth1_token', TokenFromOAuth1Arg._oauth1_token_validator), |
|||
('oauth1_token_secret', TokenFromOAuth1Arg._oauth1_token_secret_validator), |
|||
] |
|||
|
|||
TokenFromOAuth1Error._invalid_oauth1_token_info_validator = bv.Void() |
|||
TokenFromOAuth1Error._app_id_mismatch_validator = bv.Void() |
|||
TokenFromOAuth1Error._other_validator = bv.Void() |
|||
TokenFromOAuth1Error._tagmap = { |
|||
'invalid_oauth1_token_info': TokenFromOAuth1Error._invalid_oauth1_token_info_validator, |
|||
'app_id_mismatch': TokenFromOAuth1Error._app_id_mismatch_validator, |
|||
'other': TokenFromOAuth1Error._other_validator, |
|||
} |
|||
|
|||
TokenFromOAuth1Error.invalid_oauth1_token_info = TokenFromOAuth1Error('invalid_oauth1_token_info') |
|||
TokenFromOAuth1Error.app_id_mismatch = TokenFromOAuth1Error('app_id_mismatch') |
|||
TokenFromOAuth1Error.other = TokenFromOAuth1Error('other') |
|||
|
|||
TokenFromOAuth1Result._oauth2_token_validator = bv.String(min_length=1) |
|||
TokenFromOAuth1Result._all_field_names_ = set(['oauth2_token']) |
|||
TokenFromOAuth1Result._all_fields_ = [('oauth2_token', TokenFromOAuth1Result._oauth2_token_validator)] |
|||
|
|||
token_from_oauth1 = bb.Route( |
|||
'token/from_oauth1', |
|||
1, |
|||
False, |
|||
TokenFromOAuth1Arg_validator, |
|||
TokenFromOAuth1Result_validator, |
|||
TokenFromOAuth1Error_validator, |
|||
{'host': u'api', |
|||
'style': u'rpc'}, |
|||
) |
|||
token_revoke = bb.Route( |
|||
'token/revoke', |
|||
1, |
|||
False, |
|||
bv.Void(), |
|||
bv.Void(), |
|||
bv.Void(), |
|||
{'host': u'api', |
|||
'style': u'rpc'}, |
|||
) |
|||
|
|||
ROUTES = { |
|||
'token/from_oauth1': token_from_oauth1, |
|||
'token/revoke': token_revoke, |
|||
} |
|||
|
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,455 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
class PathRoot(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar home: Paths are relative to the authenticating user's home namespace, |
|||
whether or not that user belongs to a team. |
|||
:ivar str root: Paths are relative to the authenticating user's root |
|||
namespace (This results in :field:`PathRootError.invalid_root` if the |
|||
user's root namespace has changed.). |
|||
:ivar str namespace_id: Paths are relative to given namespace id (This |
|||
results in :field:`PathRootError.no_permission` if you don't have access |
|||
to this namespace.). |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
home = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
@classmethod |
|||
def root(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``root`` tag with value |
|||
``val``. |
|||
|
|||
:param str val: |
|||
:rtype: PathRoot |
|||
""" |
|||
return cls('root', val) |
|||
|
|||
@classmethod |
|||
def namespace_id(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``namespace_id`` tag with |
|||
value ``val``. |
|||
|
|||
:param str val: |
|||
:rtype: PathRoot |
|||
""" |
|||
return cls('namespace_id', val) |
|||
|
|||
def is_home(self): |
|||
""" |
|||
Check if the union tag is ``home``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'home' |
|||
|
|||
def is_root(self): |
|||
""" |
|||
Check if the union tag is ``root``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'root' |
|||
|
|||
def is_namespace_id(self): |
|||
""" |
|||
Check if the union tag is ``namespace_id``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'namespace_id' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def get_root(self): |
|||
""" |
|||
Paths are relative to the authenticating user's root namespace (This |
|||
results in ``PathRootError.invalid_root`` if the user's root namespace |
|||
has changed.). |
|||
|
|||
Only call this if :meth:`is_root` is true. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if not self.is_root(): |
|||
raise AttributeError("tag 'root' not set") |
|||
return self._value |
|||
|
|||
def get_namespace_id(self): |
|||
""" |
|||
Paths are relative to given namespace id (This results in |
|||
``PathRootError.no_permission`` if you don't have access to this |
|||
namespace.). |
|||
|
|||
Only call this if :meth:`is_namespace_id` is true. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if not self.is_namespace_id(): |
|||
raise AttributeError("tag 'namespace_id' not set") |
|||
return self._value |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PathRoot, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PathRoot(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PathRoot_validator = bv.Union(PathRoot) |
|||
|
|||
class PathRootError(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar RootInfo invalid_root: The root namespace id in Dropbox-API-Path-Root |
|||
header is not valid. The value of this error is use's latest root info. |
|||
:ivar no_permission: You don't have permission to access the namespace id in |
|||
Dropbox-API-Path-Root header. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
no_permission = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
@classmethod |
|||
def invalid_root(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``invalid_root`` tag with |
|||
value ``val``. |
|||
|
|||
:param RootInfo val: |
|||
:rtype: PathRootError |
|||
""" |
|||
return cls('invalid_root', val) |
|||
|
|||
def is_invalid_root(self): |
|||
""" |
|||
Check if the union tag is ``invalid_root``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'invalid_root' |
|||
|
|||
def is_no_permission(self): |
|||
""" |
|||
Check if the union tag is ``no_permission``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'no_permission' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def get_invalid_root(self): |
|||
""" |
|||
The root namespace id in Dropbox-API-Path-Root header is not valid. The |
|||
value of this error is use's latest root info. |
|||
|
|||
Only call this if :meth:`is_invalid_root` is true. |
|||
|
|||
:rtype: RootInfo |
|||
""" |
|||
if not self.is_invalid_root(): |
|||
raise AttributeError("tag 'invalid_root' not set") |
|||
return self._value |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PathRootError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PathRootError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PathRootError_validator = bv.Union(PathRootError) |
|||
|
|||
class RootInfo(bb.Struct): |
|||
""" |
|||
Information about current user's root. |
|||
|
|||
:ivar root_namespace_id: The namespace ID for user's root namespace. It will |
|||
be the namespace ID of the shared team root if the user is member of a |
|||
team with a separate team root. Otherwise it will be same as |
|||
``RootInfo.home_namespace_id``. |
|||
:ivar home_namespace_id: The namespace ID for user's home namespace. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_root_namespace_id_value', |
|||
'_root_namespace_id_present', |
|||
'_home_namespace_id_value', |
|||
'_home_namespace_id_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
root_namespace_id=None, |
|||
home_namespace_id=None): |
|||
self._root_namespace_id_value = None |
|||
self._root_namespace_id_present = False |
|||
self._home_namespace_id_value = None |
|||
self._home_namespace_id_present = False |
|||
if root_namespace_id is not None: |
|||
self.root_namespace_id = root_namespace_id |
|||
if home_namespace_id is not None: |
|||
self.home_namespace_id = home_namespace_id |
|||
|
|||
@property |
|||
def root_namespace_id(self): |
|||
""" |
|||
The namespace ID for user's root namespace. It will be the namespace ID |
|||
of the shared team root if the user is member of a team with a separate |
|||
team root. Otherwise it will be same as ``RootInfo.home_namespace_id``. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._root_namespace_id_present: |
|||
return self._root_namespace_id_value |
|||
else: |
|||
raise AttributeError("missing required field 'root_namespace_id'") |
|||
|
|||
@root_namespace_id.setter |
|||
def root_namespace_id(self, val): |
|||
val = self._root_namespace_id_validator.validate(val) |
|||
self._root_namespace_id_value = val |
|||
self._root_namespace_id_present = True |
|||
|
|||
@root_namespace_id.deleter |
|||
def root_namespace_id(self): |
|||
self._root_namespace_id_value = None |
|||
self._root_namespace_id_present = False |
|||
|
|||
@property |
|||
def home_namespace_id(self): |
|||
""" |
|||
The namespace ID for user's home namespace. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._home_namespace_id_present: |
|||
return self._home_namespace_id_value |
|||
else: |
|||
raise AttributeError("missing required field 'home_namespace_id'") |
|||
|
|||
@home_namespace_id.setter |
|||
def home_namespace_id(self, val): |
|||
val = self._home_namespace_id_validator.validate(val) |
|||
self._home_namespace_id_value = val |
|||
self._home_namespace_id_present = True |
|||
|
|||
@home_namespace_id.deleter |
|||
def home_namespace_id(self): |
|||
self._home_namespace_id_value = None |
|||
self._home_namespace_id_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(RootInfo, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'RootInfo(root_namespace_id={!r}, home_namespace_id={!r})'.format( |
|||
self._root_namespace_id_value, |
|||
self._home_namespace_id_value, |
|||
) |
|||
|
|||
RootInfo_validator = bv.StructTree(RootInfo) |
|||
|
|||
class TeamRootInfo(RootInfo): |
|||
""" |
|||
Root info when user is member of a team with a separate root namespace ID. |
|||
|
|||
:ivar home_path: The path for user's home directory under the shared team |
|||
root. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_home_path_value', |
|||
'_home_path_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
root_namespace_id=None, |
|||
home_namespace_id=None, |
|||
home_path=None): |
|||
super(TeamRootInfo, self).__init__(root_namespace_id, |
|||
home_namespace_id) |
|||
self._home_path_value = None |
|||
self._home_path_present = False |
|||
if home_path is not None: |
|||
self.home_path = home_path |
|||
|
|||
@property |
|||
def home_path(self): |
|||
""" |
|||
The path for user's home directory under the shared team root. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._home_path_present: |
|||
return self._home_path_value |
|||
else: |
|||
raise AttributeError("missing required field 'home_path'") |
|||
|
|||
@home_path.setter |
|||
def home_path(self, val): |
|||
val = self._home_path_validator.validate(val) |
|||
self._home_path_value = val |
|||
self._home_path_present = True |
|||
|
|||
@home_path.deleter |
|||
def home_path(self): |
|||
self._home_path_value = None |
|||
self._home_path_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(TeamRootInfo, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'TeamRootInfo(root_namespace_id={!r}, home_namespace_id={!r}, home_path={!r})'.format( |
|||
self._root_namespace_id_value, |
|||
self._home_namespace_id_value, |
|||
self._home_path_value, |
|||
) |
|||
|
|||
TeamRootInfo_validator = bv.Struct(TeamRootInfo) |
|||
|
|||
class UserRootInfo(RootInfo): |
|||
""" |
|||
Root info when user is not member of a team or the user is a member of a |
|||
team and the team does not have a separate root namespace. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
root_namespace_id=None, |
|||
home_namespace_id=None): |
|||
super(UserRootInfo, self).__init__(root_namespace_id, |
|||
home_namespace_id) |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(UserRootInfo, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'UserRootInfo(root_namespace_id={!r}, home_namespace_id={!r})'.format( |
|||
self._root_namespace_id_value, |
|||
self._home_namespace_id_value, |
|||
) |
|||
|
|||
UserRootInfo_validator = bv.Struct(UserRootInfo) |
|||
|
|||
Date_validator = bv.Timestamp(u'%Y-%m-%d') |
|||
DisplayName_validator = bv.String(min_length=1, pattern=u'[^/:?*<>"|]*') |
|||
DisplayNameLegacy_validator = bv.String() |
|||
DropboxTimestamp_validator = bv.Timestamp(u'%Y-%m-%dT%H:%M:%SZ') |
|||
EmailAddress_validator = bv.String(max_length=255, pattern=u"^['&A-Za-z0-9._%+-]+@[A-Za-z0-9-][A-Za-z0-9.-]*\\.[A-Za-z]{2,15}$") |
|||
# A ISO639-1 code. |
|||
LanguageCode_validator = bv.String(min_length=2) |
|||
NamePart_validator = bv.String(min_length=1, max_length=100, pattern=u'[^/:?*<>"|]*') |
|||
NamespaceId_validator = bv.String(pattern=u'[-_0-9a-zA-Z:]+') |
|||
OptionalNamePart_validator = bv.String(max_length=100, pattern=u'[^/:?*<>"|]*') |
|||
SessionId_validator = bv.String() |
|||
SharedFolderId_validator = NamespaceId_validator |
|||
PathRoot._home_validator = bv.Void() |
|||
PathRoot._root_validator = NamespaceId_validator |
|||
PathRoot._namespace_id_validator = NamespaceId_validator |
|||
PathRoot._other_validator = bv.Void() |
|||
PathRoot._tagmap = { |
|||
'home': PathRoot._home_validator, |
|||
'root': PathRoot._root_validator, |
|||
'namespace_id': PathRoot._namespace_id_validator, |
|||
'other': PathRoot._other_validator, |
|||
} |
|||
|
|||
PathRoot.home = PathRoot('home') |
|||
PathRoot.other = PathRoot('other') |
|||
|
|||
PathRootError._invalid_root_validator = RootInfo_validator |
|||
PathRootError._no_permission_validator = bv.Void() |
|||
PathRootError._other_validator = bv.Void() |
|||
PathRootError._tagmap = { |
|||
'invalid_root': PathRootError._invalid_root_validator, |
|||
'no_permission': PathRootError._no_permission_validator, |
|||
'other': PathRootError._other_validator, |
|||
} |
|||
|
|||
PathRootError.no_permission = PathRootError('no_permission') |
|||
PathRootError.other = PathRootError('other') |
|||
|
|||
RootInfo._root_namespace_id_validator = NamespaceId_validator |
|||
RootInfo._home_namespace_id_validator = NamespaceId_validator |
|||
RootInfo._field_names_ = set([ |
|||
'root_namespace_id', |
|||
'home_namespace_id', |
|||
]) |
|||
RootInfo._all_field_names_ = RootInfo._field_names_ |
|||
RootInfo._fields_ = [ |
|||
('root_namespace_id', RootInfo._root_namespace_id_validator), |
|||
('home_namespace_id', RootInfo._home_namespace_id_validator), |
|||
] |
|||
RootInfo._all_fields_ = RootInfo._fields_ |
|||
|
|||
RootInfo._tag_to_subtype_ = { |
|||
(u'team',): TeamRootInfo_validator, |
|||
(u'user',): UserRootInfo_validator, |
|||
} |
|||
RootInfo._pytype_to_tag_and_subtype_ = { |
|||
TeamRootInfo: ((u'team',), TeamRootInfo_validator), |
|||
UserRootInfo: ((u'user',), UserRootInfo_validator), |
|||
} |
|||
RootInfo._is_catch_all_ = True |
|||
|
|||
TeamRootInfo._home_path_validator = bv.String() |
|||
TeamRootInfo._field_names_ = set(['home_path']) |
|||
TeamRootInfo._all_field_names_ = RootInfo._all_field_names_.union(TeamRootInfo._field_names_) |
|||
TeamRootInfo._fields_ = [('home_path', TeamRootInfo._home_path_validator)] |
|||
TeamRootInfo._all_fields_ = RootInfo._all_fields_ + TeamRootInfo._fields_ |
|||
|
|||
UserRootInfo._field_names_ = set([]) |
|||
UserRootInfo._all_field_names_ = RootInfo._all_field_names_.union(UserRootInfo._field_names_) |
|||
UserRootInfo._fields_ = [] |
|||
UserRootInfo._all_fields_ = RootInfo._all_fields_ + UserRootInfo._fields_ |
|||
|
|||
ROUTES = { |
|||
} |
|||
|
@ -0,0 +1,175 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
try: |
|||
from . import ( |
|||
common, |
|||
) |
|||
except (ImportError, SystemError, ValueError): |
|||
import common |
|||
|
|||
class DeleteManualContactsArg(bb.Struct): |
|||
""" |
|||
:ivar email_addresses: List of manually added contacts to be deleted. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_email_addresses_value', |
|||
'_email_addresses_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
email_addresses=None): |
|||
self._email_addresses_value = None |
|||
self._email_addresses_present = False |
|||
if email_addresses is not None: |
|||
self.email_addresses = email_addresses |
|||
|
|||
@property |
|||
def email_addresses(self): |
|||
""" |
|||
List of manually added contacts to be deleted. |
|||
|
|||
:rtype: list of [str] |
|||
""" |
|||
if self._email_addresses_present: |
|||
return self._email_addresses_value |
|||
else: |
|||
raise AttributeError("missing required field 'email_addresses'") |
|||
|
|||
@email_addresses.setter |
|||
def email_addresses(self, val): |
|||
val = self._email_addresses_validator.validate(val) |
|||
self._email_addresses_value = val |
|||
self._email_addresses_present = True |
|||
|
|||
@email_addresses.deleter |
|||
def email_addresses(self): |
|||
self._email_addresses_value = None |
|||
self._email_addresses_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(DeleteManualContactsArg, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'DeleteManualContactsArg(email_addresses={!r})'.format( |
|||
self._email_addresses_value, |
|||
) |
|||
|
|||
DeleteManualContactsArg_validator = bv.Struct(DeleteManualContactsArg) |
|||
|
|||
class DeleteManualContactsError(bb.Union): |
|||
""" |
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar list of [str] contacts_not_found: Can't delete contacts from this |
|||
list. Make sure the list only has manually added contacts. The deletion |
|||
was cancelled. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
@classmethod |
|||
def contacts_not_found(cls, val): |
|||
""" |
|||
Create an instance of this class set to the ``contacts_not_found`` tag |
|||
with value ``val``. |
|||
|
|||
:param list of [str] val: |
|||
:rtype: DeleteManualContactsError |
|||
""" |
|||
return cls('contacts_not_found', val) |
|||
|
|||
def is_contacts_not_found(self): |
|||
""" |
|||
Check if the union tag is ``contacts_not_found``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'contacts_not_found' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def get_contacts_not_found(self): |
|||
""" |
|||
Can't delete contacts from this list. Make sure the list only has |
|||
manually added contacts. The deletion was cancelled. |
|||
|
|||
Only call this if :meth:`is_contacts_not_found` is true. |
|||
|
|||
:rtype: list of [str] |
|||
""" |
|||
if not self.is_contacts_not_found(): |
|||
raise AttributeError("tag 'contacts_not_found' not set") |
|||
return self._value |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(DeleteManualContactsError, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'DeleteManualContactsError(%r, %r)' % (self._tag, self._value) |
|||
|
|||
DeleteManualContactsError_validator = bv.Union(DeleteManualContactsError) |
|||
|
|||
DeleteManualContactsArg._email_addresses_validator = bv.List(common.EmailAddress_validator) |
|||
DeleteManualContactsArg._all_field_names_ = set(['email_addresses']) |
|||
DeleteManualContactsArg._all_fields_ = [('email_addresses', DeleteManualContactsArg._email_addresses_validator)] |
|||
|
|||
DeleteManualContactsError._contacts_not_found_validator = bv.List(common.EmailAddress_validator) |
|||
DeleteManualContactsError._other_validator = bv.Void() |
|||
DeleteManualContactsError._tagmap = { |
|||
'contacts_not_found': DeleteManualContactsError._contacts_not_found_validator, |
|||
'other': DeleteManualContactsError._other_validator, |
|||
} |
|||
|
|||
DeleteManualContactsError.other = DeleteManualContactsError('other') |
|||
|
|||
delete_manual_contacts = bb.Route( |
|||
'delete_manual_contacts', |
|||
1, |
|||
False, |
|||
bv.Void(), |
|||
bv.Void(), |
|||
bv.Void(), |
|||
{'host': u'api', |
|||
'style': u'rpc'}, |
|||
) |
|||
delete_manual_contacts_batch = bb.Route( |
|||
'delete_manual_contacts_batch', |
|||
1, |
|||
False, |
|||
DeleteManualContactsArg_validator, |
|||
bv.Void(), |
|||
DeleteManualContactsError_validator, |
|||
{'host': u'api', |
|||
'style': u'rpc'}, |
|||
) |
|||
|
|||
ROUTES = { |
|||
'delete_manual_contacts': delete_manual_contacts, |
|||
'delete_manual_contacts_batch': delete_manual_contacts_batch, |
|||
} |
|||
|
@ -0,0 +1,609 @@ |
|||
__all__ = [ |
|||
'Dropbox', |
|||
'DropboxTeam', |
|||
'create_session', |
|||
] |
|||
|
|||
# This should always be 0.0.0 in master. Only update this after tagging |
|||
# before release. |
|||
__version__ = '9.3.0' |
|||
|
|||
import contextlib |
|||
import json |
|||
import logging |
|||
import random |
|||
import time |
|||
|
|||
import requests |
|||
import six |
|||
|
|||
from . import files, stone_serializers |
|||
from .auth import ( |
|||
AuthError_validator, |
|||
RateLimitError_validator, |
|||
) |
|||
from .common import ( |
|||
PathRoot, |
|||
PathRoot_validator, |
|||
PathRootError_validator |
|||
) |
|||
from .base import DropboxBase |
|||
from .base_team import DropboxTeamBase |
|||
from .exceptions import ( |
|||
ApiError, |
|||
AuthError, |
|||
BadInputError, |
|||
HttpError, |
|||
PathRootError, |
|||
InternalServerError, |
|||
RateLimitError, |
|||
) |
|||
from .session import ( |
|||
API_HOST, |
|||
API_CONTENT_HOST, |
|||
API_NOTIFICATION_HOST, |
|||
HOST_API, |
|||
HOST_CONTENT, |
|||
HOST_NOTIFY, |
|||
pinned_session, |
|||
) |
|||
|
|||
PATH_ROOT_HEADER = 'Dropbox-API-Path-Root' |
|||
HTTP_STATUS_INVALID_PATH_ROOT = 422 |
|||
|
|||
class RouteResult(object): |
|||
"""The successful result of a call to a route.""" |
|||
|
|||
def __init__(self, obj_result, http_resp=None): |
|||
""" |
|||
:param str obj_result: The result of a route not including the binary |
|||
payload portion, if one exists. Must be serialized JSON. |
|||
:param requests.models.Response http_resp: A raw HTTP response. It will |
|||
be used to stream the binary-body payload of the response. |
|||
""" |
|||
assert isinstance(obj_result, six.string_types), \ |
|||
'obj_result: expected string, got %r' % type(obj_result) |
|||
if http_resp is not None: |
|||
assert isinstance(http_resp, requests.models.Response), \ |
|||
'http_resp: expected requests.models.Response, got %r' % \ |
|||
type(http_resp) |
|||
self.obj_result = obj_result |
|||
self.http_resp = http_resp |
|||
|
|||
class RouteErrorResult(object): |
|||
"""The error result of a call to a route.""" |
|||
|
|||
def __init__(self, request_id, obj_result): |
|||
""" |
|||
:param str request_id: A request_id can be shared with Dropbox Support |
|||
to pinpoint the exact request that returns an error. |
|||
:param str obj_result: The result of a route not including the binary |
|||
payload portion, if one exists. |
|||
""" |
|||
self.request_id = request_id |
|||
self.obj_result = obj_result |
|||
|
|||
def create_session(max_connections=8, proxies=None): |
|||
""" |
|||
Creates a session object that can be used by multiple :class:`Dropbox` and |
|||
:class:`DropboxTeam` instances. This lets you share a connection pool |
|||
amongst them, as well as proxy parameters. |
|||
|
|||
:param int max_connections: Maximum connection pool size. |
|||
:param dict proxies: See the `requests module |
|||
<http://docs.python-requests.org/en/latest/user/advanced/#proxies>`_ |
|||
for more details. |
|||
:rtype: :class:`requests.sessions.Session`. `See the requests module |
|||
<http://docs.python-requests.org/en/latest/user/advanced/#session-objects>`_ |
|||
for more details. |
|||
""" |
|||
# We only need as many pool_connections as we have unique hostnames. |
|||
session = pinned_session(pool_maxsize=max_connections) |
|||
if proxies: |
|||
session.proxies = proxies |
|||
return session |
|||
|
|||
class _DropboxTransport(object): |
|||
""" |
|||
Responsible for implementing the wire protocol for making requests to the |
|||
Dropbox API. |
|||
""" |
|||
|
|||
_API_VERSION = '2' |
|||
|
|||
# Download style means that the route argument goes in a Dropbox-API-Arg |
|||
# header, and the result comes back in a Dropbox-API-Result header. The |
|||
# HTTP response body contains a binary payload. |
|||
_ROUTE_STYLE_DOWNLOAD = 'download' |
|||
|
|||
# Upload style means that the route argument goes in a Dropbox-API-Arg |
|||
# header. The HTTP request body contains a binary payload. The result |
|||
# comes back in a Dropbox-API-Result header. |
|||
_ROUTE_STYLE_UPLOAD = 'upload' |
|||
|
|||
# RPC style means that the argument and result of a route are contained in |
|||
# the HTTP body. |
|||
_ROUTE_STYLE_RPC = 'rpc' |
|||
|
|||
# This is the default longest time we'll block on receiving data from the server |
|||
_DEFAULT_TIMEOUT = 30 |
|||
|
|||
def __init__(self, |
|||
oauth2_access_token, |
|||
max_retries_on_error=4, |
|||
max_retries_on_rate_limit=None, |
|||
user_agent=None, |
|||
session=None, |
|||
headers=None, |
|||
timeout=_DEFAULT_TIMEOUT): |
|||
""" |
|||
:param str oauth2_access_token: OAuth2 access token for making client |
|||
requests. |
|||
|
|||
:param int max_retries_on_error: On 5xx errors, the number of times to |
|||
retry. |
|||
:param Optional[int] max_retries_on_rate_limit: On 429 errors, the |
|||
number of times to retry. If `None`, always retries. |
|||
:param str user_agent: The user agent to use when making requests. This |
|||
helps us identify requests coming from your application. We |
|||
recommend you use the format "AppName/Version". If set, we append |
|||
"/OfficialDropboxPythonSDKv2/__version__" to the user_agent, |
|||
:param session: If not provided, a new session (connection pool) is |
|||
created. To share a session across multiple clients, use |
|||
:func:`create_session`. |
|||
:type session: :class:`requests.sessions.Session` |
|||
:param dict headers: Additional headers to add to requests. |
|||
:param Optional[float] timeout: Maximum duration in seconds that |
|||
client will wait for any single packet from the |
|||
server. After the timeout the client will give up on |
|||
connection. If `None`, client will wait forever. Defaults |
|||
to 30 seconds. |
|||
""" |
|||
assert len(oauth2_access_token) > 0, \ |
|||
'OAuth2 access token cannot be empty.' |
|||
assert headers is None or isinstance(headers, dict), \ |
|||
'Expected dict, got %r' % headers |
|||
self._oauth2_access_token = oauth2_access_token |
|||
|
|||
self._max_retries_on_error = max_retries_on_error |
|||
self._max_retries_on_rate_limit = max_retries_on_rate_limit |
|||
if session: |
|||
assert isinstance(session, requests.sessions.Session), \ |
|||
'Expected requests.sessions.Session, got %r' % session |
|||
self._session = session |
|||
else: |
|||
self._session = create_session() |
|||
self._headers = headers |
|||
|
|||
base_user_agent = 'OfficialDropboxPythonSDKv2/' + __version__ |
|||
if user_agent: |
|||
self._raw_user_agent = user_agent |
|||
self._user_agent = '{}/{}'.format(user_agent, base_user_agent) |
|||
else: |
|||
self._raw_user_agent = None |
|||
self._user_agent = base_user_agent |
|||
|
|||
self._logger = logging.getLogger('dropbox') |
|||
|
|||
self._host_map = {HOST_API: API_HOST, |
|||
HOST_CONTENT: API_CONTENT_HOST, |
|||
HOST_NOTIFY: API_NOTIFICATION_HOST} |
|||
|
|||
self._timeout = timeout |
|||
|
|||
def clone( |
|||
self, |
|||
oauth2_access_token=None, |
|||
max_retries_on_error=None, |
|||
max_retries_on_rate_limit=None, |
|||
user_agent=None, |
|||
session=None, |
|||
headers=None, |
|||
timeout=None): |
|||
""" |
|||
Creates a new copy of the Dropbox client with the same defaults unless modified by |
|||
arguments to clone() |
|||
|
|||
See constructor for original parameter descriptions. |
|||
|
|||
:return: New instance of Dropbox clent |
|||
:rtype: Dropbox |
|||
""" |
|||
|
|||
return self.__class__( |
|||
oauth2_access_token or self._oauth2_access_token, |
|||
max_retries_on_error or self._max_retries_on_error, |
|||
max_retries_on_rate_limit or self._max_retries_on_rate_limit, |
|||
user_agent or self._user_agent, |
|||
session or self._session, |
|||
headers or self._headers, |
|||
timeout or self._timeout |
|||
) |
|||
|
|||
def request(self, |
|||
route, |
|||
namespace, |
|||
request_arg, |
|||
request_binary, |
|||
timeout=None): |
|||
""" |
|||
Makes a request to the Dropbox API and in the process validates that |
|||
the route argument and result are the expected data types. The |
|||
request_arg is converted to JSON based on the arg_data_type. Likewise, |
|||
the response is deserialized from JSON and converted to an object based |
|||
on the {result,error}_data_type. |
|||
|
|||
:param host: The Dropbox API host to connect to. |
|||
:param route: The route to make the request to. |
|||
:type route: :class:`.datatypes.stone_base.Route` |
|||
:param request_arg: Argument for the route that conforms to the |
|||
validator specified by route.arg_type. |
|||
:param request_binary: String or file pointer representing the binary |
|||
payload. Use None if there is no binary payload. |
|||
:param Optional[float] timeout: Maximum duration in seconds |
|||
that client will wait for any single packet from the |
|||
server. After the timeout the client will give up on |
|||
connection. If `None`, will use default timeout set on |
|||
Dropbox object. Defaults to `None`. |
|||
:return: The route's result. |
|||
""" |
|||
host = route.attrs['host'] or 'api' |
|||
route_name = namespace + '/' + route.name |
|||
if route.version > 1: |
|||
route_name += '_v{}'.format(route.version) |
|||
route_style = route.attrs['style'] or 'rpc' |
|||
serialized_arg = stone_serializers.json_encode(route.arg_type, |
|||
request_arg) |
|||
|
|||
if (timeout is None and |
|||
route == files.list_folder_longpoll): |
|||
# The client normally sends a timeout value to the |
|||
# longpoll route. The server will respond after |
|||
# <timeout> + random(0, 90) seconds. We increase the |
|||
# socket timeout to the longpoll timeout value plus 90 |
|||
# seconds so that we don't cut the server response short |
|||
# due to a shorter socket timeout. |
|||
# NB: This is done here because base.py is auto-generated |
|||
timeout = request_arg.timeout + 90 |
|||
|
|||
res = self.request_json_string_with_retry(host, |
|||
route_name, |
|||
route_style, |
|||
serialized_arg, |
|||
request_binary, |
|||
timeout=timeout) |
|||
decoded_obj_result = json.loads(res.obj_result) |
|||
if isinstance(res, RouteResult): |
|||
returned_data_type = route.result_type |
|||
obj = decoded_obj_result |
|||
elif isinstance(res, RouteErrorResult): |
|||
returned_data_type = route.error_type |
|||
obj = decoded_obj_result['error'] |
|||
user_message = decoded_obj_result.get('user_message') |
|||
user_message_text = user_message and user_message.get('text') |
|||
user_message_locale = user_message and user_message.get('locale') |
|||
else: |
|||
raise AssertionError('Expected RouteResult or RouteErrorResult, ' |
|||
'but res is %s' % type(res)) |
|||
|
|||
deserialized_result = stone_serializers.json_compat_obj_decode( |
|||
returned_data_type, obj, strict=False) |
|||
|
|||
if isinstance(res, RouteErrorResult): |
|||
raise ApiError(res.request_id, |
|||
deserialized_result, |
|||
user_message_text, |
|||
user_message_locale) |
|||
elif route_style == self._ROUTE_STYLE_DOWNLOAD: |
|||
return (deserialized_result, res.http_resp) |
|||
else: |
|||
return deserialized_result |
|||
|
|||
def request_json_object(self, |
|||
host, |
|||
route_name, |
|||
route_style, |
|||
request_arg, |
|||
request_binary, |
|||
timeout=None): |
|||
""" |
|||
Makes a request to the Dropbox API, taking a JSON-serializable Python |
|||
object as an argument, and returning one as a response. |
|||
|
|||
:param host: The Dropbox API host to connect to. |
|||
:param route_name: The name of the route to invoke. |
|||
:param route_style: The style of the route. |
|||
:param str request_arg: A JSON-serializable Python object representing |
|||
the argument for the route. |
|||
:param Optional[bytes] request_binary: Bytes representing the binary |
|||
payload. Use None if there is no binary payload. |
|||
:param Optional[float] timeout: Maximum duration in seconds |
|||
that client will wait for any single packet from the |
|||
server. After the timeout the client will give up on |
|||
connection. If `None`, will use default timeout set on |
|||
Dropbox object. Defaults to `None`. |
|||
:return: The route's result as a JSON-serializable Python object. |
|||
""" |
|||
serialized_arg = json.dumps(request_arg) |
|||
res = self.request_json_string_with_retry(host, |
|||
route_name, |
|||
route_style, |
|||
serialized_arg, |
|||
request_binary, |
|||
timeout=timeout) |
|||
# This can throw a ValueError if the result is not deserializable, |
|||
# but that would be completely unexpected. |
|||
deserialized_result = json.loads(res.obj_result) |
|||
if isinstance(res, RouteResult) and res.http_resp is not None: |
|||
return (deserialized_result, res.http_resp) |
|||
else: |
|||
return deserialized_result |
|||
|
|||
def request_json_string_with_retry(self, |
|||
host, |
|||
route_name, |
|||
route_style, |
|||
request_json_arg, |
|||
request_binary, |
|||
timeout=None): |
|||
""" |
|||
See :meth:`request_json_object` for description of parameters. |
|||
|
|||
:param request_json_arg: A string representing the serialized JSON |
|||
argument to the route. |
|||
""" |
|||
attempt = 0 |
|||
rate_limit_errors = 0 |
|||
while True: |
|||
self._logger.info('Request to %s', route_name) |
|||
try: |
|||
return self.request_json_string(host, |
|||
route_name, |
|||
route_style, |
|||
request_json_arg, |
|||
request_binary, |
|||
timeout=timeout) |
|||
except InternalServerError as e: |
|||
attempt += 1 |
|||
if attempt <= self._max_retries_on_error: |
|||
# Use exponential backoff |
|||
backoff = 2**attempt * random.random() |
|||
self._logger.info( |
|||
'HttpError status_code=%s: Retrying in %.1f seconds', |
|||
e.status_code, backoff) |
|||
time.sleep(backoff) |
|||
else: |
|||
raise |
|||
except RateLimitError as e: |
|||
rate_limit_errors += 1 |
|||
if (self._max_retries_on_rate_limit is None or |
|||
self._max_retries_on_rate_limit >= rate_limit_errors): |
|||
# Set default backoff to 5 seconds. |
|||
backoff = e.backoff if e.backoff is not None else 5.0 |
|||
self._logger.info( |
|||
'Ratelimit: Retrying in %.1f seconds.', backoff) |
|||
time.sleep(backoff) |
|||
else: |
|||
raise |
|||
|
|||
def request_json_string(self, |
|||
host, |
|||
func_name, |
|||
route_style, |
|||
request_json_arg, |
|||
request_binary, |
|||
timeout=None): |
|||
""" |
|||
See :meth:`request_json_string_with_retry` for description of |
|||
parameters. |
|||
""" |
|||
if host not in self._host_map: |
|||
raise ValueError('Unknown value for host: %r' % host) |
|||
|
|||
if not isinstance(request_binary, (six.binary_type, type(None))): |
|||
# Disallow streams and file-like objects even though the underlying |
|||
# requests library supports them. This is to prevent incorrect |
|||
# behavior when a non-rewindable stream is read from, but the |
|||
# request fails and needs to be re-tried at a later time. |
|||
raise TypeError('expected request_binary as binary type, got %s' % |
|||
type(request_binary)) |
|||
|
|||
# Fully qualified hostname |
|||
fq_hostname = self._host_map[host] |
|||
url = self._get_route_url(fq_hostname, func_name) |
|||
|
|||
headers = {'User-Agent': self._user_agent} |
|||
if host != HOST_NOTIFY: |
|||
headers['Authorization'] = 'Bearer %s' % self._oauth2_access_token |
|||
if self._headers: |
|||
headers.update(self._headers) |
|||
|
|||
# The contents of the body of the HTTP request |
|||
body = None |
|||
# Whether the response should be streamed incrementally, or buffered |
|||
# entirely. If stream is True, the caller is responsible for closing |
|||
# the HTTP response. |
|||
stream = False |
|||
|
|||
if route_style == self._ROUTE_STYLE_RPC: |
|||
headers['Content-Type'] = 'application/json' |
|||
body = request_json_arg |
|||
elif route_style == self._ROUTE_STYLE_DOWNLOAD: |
|||
headers['Dropbox-API-Arg'] = request_json_arg |
|||
stream = True |
|||
elif route_style == self._ROUTE_STYLE_UPLOAD: |
|||
headers['Content-Type'] = 'application/octet-stream' |
|||
headers['Dropbox-API-Arg'] = request_json_arg |
|||
body = request_binary |
|||
else: |
|||
raise ValueError('Unknown operation style: %r' % route_style) |
|||
|
|||
if timeout is None: |
|||
timeout = self._timeout |
|||
|
|||
r = self._session.post(url, |
|||
headers=headers, |
|||
data=body, |
|||
stream=stream, |
|||
verify=True, |
|||
timeout=timeout, |
|||
) |
|||
|
|||
request_id = r.headers.get('x-dropbox-request-id') |
|||
if r.status_code >= 500: |
|||
raise InternalServerError(request_id, r.status_code, r.text) |
|||
elif r.status_code == 400: |
|||
raise BadInputError(request_id, r.text) |
|||
elif r.status_code == 401: |
|||
assert r.headers.get('content-type') == 'application/json', ( |
|||
'Expected content-type to be application/json, got %r' % |
|||
r.headers.get('content-type')) |
|||
err = stone_serializers.json_compat_obj_decode( |
|||
AuthError_validator, r.json()['error']) |
|||
raise AuthError(request_id, err) |
|||
elif r.status_code == HTTP_STATUS_INVALID_PATH_ROOT: |
|||
err = stone_serializers.json_compat_obj_decode( |
|||
PathRootError_validator, r.json()['error']) |
|||
raise PathRootError(request_id, err) |
|||
elif r.status_code == 429: |
|||
err = None |
|||
if r.headers.get('content-type') == 'application/json': |
|||
err = stone_serializers.json_compat_obj_decode( |
|||
RateLimitError_validator, r.json()['error']) |
|||
retry_after = err.retry_after |
|||
else: |
|||
retry_after_str = r.headers.get('retry-after') |
|||
if retry_after_str is not None: |
|||
retry_after = int(retry_after_str) |
|||
else: |
|||
retry_after = None |
|||
raise RateLimitError(request_id, err, retry_after) |
|||
elif 200 <= r.status_code <= 299: |
|||
if route_style == self._ROUTE_STYLE_DOWNLOAD: |
|||
raw_resp = r.headers['dropbox-api-result'] |
|||
else: |
|||
assert r.headers.get('content-type') == 'application/json', ( |
|||
'Expected content-type to be application/json, got %r' % |
|||
r.headers.get('content-type')) |
|||
raw_resp = r.content.decode('utf-8') |
|||
if route_style == self._ROUTE_STYLE_DOWNLOAD: |
|||
return RouteResult(raw_resp, r) |
|||
else: |
|||
return RouteResult(raw_resp) |
|||
elif r.status_code in (403, 404, 409): |
|||
raw_resp = r.content.decode('utf-8') |
|||
return RouteErrorResult(request_id, raw_resp) |
|||
else: |
|||
raise HttpError(request_id, r.status_code, r.text) |
|||
|
|||
def _get_route_url(self, hostname, route_name): |
|||
"""Returns the URL of the route. |
|||
|
|||
:param str hostname: Hostname to make the request to. |
|||
:param str route_name: Name of the route. |
|||
:rtype: str |
|||
""" |
|||
return 'https://{hostname}/{version}/{route_name}'.format( |
|||
hostname=hostname, |
|||
version=Dropbox._API_VERSION, |
|||
route_name=route_name, |
|||
) |
|||
|
|||
def _save_body_to_file(self, download_path, http_resp, chunksize=2**16): |
|||
""" |
|||
Saves the body of an HTTP response to a file. |
|||
|
|||
:param str download_path: Local path to save data to. |
|||
:param http_resp: The HTTP response whose body will be saved. |
|||
:type http_resp: :class:`requests.models.Response` |
|||
:rtype: None |
|||
""" |
|||
with open(download_path, 'wb') as f: |
|||
with contextlib.closing(http_resp): |
|||
for c in http_resp.iter_content(chunksize): |
|||
f.write(c) |
|||
|
|||
def with_path_root(self, path_root): |
|||
""" |
|||
Creates a clone of the Dropbox instance with the Dropbox-API-Path-Root header |
|||
as the appropriate serialized instance of PathRoot. |
|||
|
|||
For more information, see |
|||
https://www.dropbox.com/developers/reference/namespace-guide#pathrootmodes |
|||
|
|||
:param PathRoot path_root: instance of PathRoot to serialize into the headers field |
|||
:return: A :class: `Dropbox` |
|||
:rtype: Dropbox |
|||
""" |
|||
|
|||
if not isinstance(path_root, PathRoot): |
|||
raise ValueError("path_root must be an instance of PathRoot") |
|||
|
|||
return self.clone( |
|||
headers={ |
|||
PATH_ROOT_HEADER: stone_serializers.json_encode(PathRoot_validator, path_root) |
|||
} |
|||
) |
|||
|
|||
class Dropbox(_DropboxTransport, DropboxBase): |
|||
""" |
|||
Use this class to make requests to the Dropbox API using a user's access |
|||
token. Methods of this class are meant to act on the corresponding user's |
|||
Dropbox. |
|||
""" |
|||
pass |
|||
|
|||
class DropboxTeam(_DropboxTransport, DropboxTeamBase): |
|||
""" |
|||
Use this class to make requests to the Dropbox API using a team's access |
|||
token. Methods of this class are meant to act on the team, but there is |
|||
also an :meth:`as_user` method for assuming a team member's identity. |
|||
""" |
|||
def as_admin(self, team_member_id): |
|||
""" |
|||
Allows a team credential to assume the identity of an administrator on the team |
|||
and perform operations on any team-owned content. |
|||
|
|||
:param str team_member_id: team member id of administrator to perform actions with |
|||
:return: A :class:`Dropbox` object that can be used to query on behalf |
|||
of this admin of the team. |
|||
:rtype: Dropbox |
|||
""" |
|||
return self._get_dropbox_client_with_select_header('Dropbox-API-Select-Admin', |
|||
team_member_id) |
|||
|
|||
def as_user(self, team_member_id): |
|||
""" |
|||
Allows a team credential to assume the identity of a member of the |
|||
team. |
|||
|
|||
:param str team_member_id: team member id of team member to perform actions with |
|||
:return: A :class:`Dropbox` object that can be used to query on behalf |
|||
of this member of the team. |
|||
:rtype: Dropbox |
|||
""" |
|||
return self._get_dropbox_client_with_select_header('Dropbox-API-Select-User', |
|||
team_member_id) |
|||
|
|||
def _get_dropbox_client_with_select_header(self, select_header_name, team_member_id): |
|||
""" |
|||
Get Dropbox client with modified headers |
|||
|
|||
:param str select_header_name: Header name used to select users |
|||
:param str team_member_id: team member id of team member to perform actions with |
|||
:return: A :class:`Dropbox` object that can be used to query on behalf |
|||
of a member or admin of the team |
|||
:rtype: Dropbox |
|||
""" |
|||
|
|||
new_headers = self._headers.copy() if self._headers else {} |
|||
new_headers[select_header_name] = team_member_id |
|||
return Dropbox( |
|||
self._oauth2_access_token, |
|||
max_retries_on_error=self._max_retries_on_error, |
|||
max_retries_on_rate_limit=self._max_retries_on_rate_limit, |
|||
timeout=self._timeout, |
|||
user_agent=self._raw_user_agent, |
|||
session=self._session, |
|||
headers=new_headers, |
|||
) |
@ -0,0 +1,100 @@ |
|||
class DropboxException(Exception): |
|||
"""All errors related to making an API request extend this.""" |
|||
|
|||
def __init__(self, request_id, *args, **kwargs): |
|||
# A request_id can be shared with Dropbox Support to pinpoint the exact |
|||
# request that returns an error. |
|||
super(DropboxException, self).__init__(request_id, *args, **kwargs) |
|||
self.request_id = request_id |
|||
|
|||
def __str__(self): |
|||
return repr(self) |
|||
|
|||
|
|||
class ApiError(DropboxException): |
|||
"""Errors produced by the Dropbox API.""" |
|||
|
|||
def __init__(self, request_id, error, user_message_text, user_message_locale): |
|||
""" |
|||
:param (str) request_id: A request_id can be shared with Dropbox |
|||
Support to pinpoint the exact request that returns an error. |
|||
:param error: An instance of the error data type for the route. |
|||
:param (str) user_message_text: A human-readable message that can be |
|||
displayed to the end user. Is None, if unavailable. |
|||
:param (str) user_message_locale: The locale of ``user_message_text``, |
|||
if present. |
|||
""" |
|||
super(ApiError, self).__init__(request_id, error) |
|||
self.error = error |
|||
self.user_message_text = user_message_text |
|||
self.user_message_locale = user_message_locale |
|||
|
|||
def __repr__(self): |
|||
return 'ApiError({!r}, {})'.format(self.request_id, self.error) |
|||
|
|||
|
|||
class HttpError(DropboxException): |
|||
"""Errors produced at the HTTP layer.""" |
|||
|
|||
def __init__(self, request_id, status_code, body): |
|||
super(HttpError, self).__init__(request_id, status_code, body) |
|||
self.status_code = status_code |
|||
self.body = body |
|||
|
|||
def __repr__(self): |
|||
return 'HttpError({!r}, {}, {!r})'.format(self.request_id, |
|||
self.status_code, self.body) |
|||
|
|||
|
|||
class PathRootError(HttpError): |
|||
"""Error caused by an invalid path root.""" |
|||
|
|||
def __init__(self, request_id, error=None): |
|||
super(PathRootError, self).__init__(request_id, 422, None) |
|||
self.error = error |
|||
|
|||
def __repr__(self): |
|||
return 'PathRootError({!r}, {!r})'.format(self.request_id, self.error) |
|||
|
|||
|
|||
class BadInputError(HttpError): |
|||
"""Errors due to bad input parameters to an API Operation.""" |
|||
|
|||
def __init__(self, request_id, message): |
|||
super(BadInputError, self).__init__(request_id, 400, message) |
|||
self.message = message |
|||
|
|||
def __repr__(self): |
|||
return 'BadInputError({!r}, {!r})'.format(self.request_id, self.message) |
|||
|
|||
|
|||
class AuthError(HttpError): |
|||
"""Errors due to invalid authentication credentials.""" |
|||
|
|||
def __init__(self, request_id, error): |
|||
super(AuthError, self).__init__(request_id, 401, None) |
|||
self.error = error |
|||
|
|||
def __repr__(self): |
|||
return 'AuthError({!r}, {!r})'.format(self.request_id, self.error) |
|||
|
|||
|
|||
class RateLimitError(HttpError): |
|||
"""Error caused by rate limiting.""" |
|||
|
|||
def __init__(self, request_id, error=None, backoff=None): |
|||
super(RateLimitError, self).__init__(request_id, 429, None) |
|||
self.error = error |
|||
self.backoff = backoff |
|||
|
|||
def __repr__(self): |
|||
return 'RateLimitError({!r}, {!r}, {!r})'.format( |
|||
self.request_id, self.error, self.backoff) |
|||
|
|||
|
|||
class InternalServerError(HttpError): |
|||
"""Errors due to a problem on Dropbox.""" |
|||
|
|||
def __repr__(self): |
|||
return 'InternalServerError({!r}, {}, {!r})'.format( |
|||
self.request_id, self.status_code, self.body) |
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,515 @@ |
|||
__all__ = [ |
|||
'BadRequestException', |
|||
'BadStateException', |
|||
'CsrfException', |
|||
'DropboxOAuth2Flow', |
|||
'DropboxOAuth2FlowNoRedirect', |
|||
'NotApprovedException', |
|||
'OAuth2FlowNoRedirectResult', |
|||
'OAuth2FlowResult', |
|||
'ProviderException', |
|||
] |
|||
|
|||
import base64 |
|||
import os |
|||
import six |
|||
import urllib |
|||
|
|||
from .session import ( |
|||
API_HOST, |
|||
WEB_HOST, |
|||
pinned_session, |
|||
) |
|||
|
|||
if six.PY3: |
|||
url_path_quote = urllib.parse.quote # pylint: disable=no-member,useless-suppression |
|||
url_encode = urllib.parse.urlencode # pylint: disable=no-member,useless-suppression |
|||
else: |
|||
url_path_quote = urllib.quote # pylint: disable=no-member,useless-suppression |
|||
url_encode = urllib.urlencode # pylint: disable=no-member,useless-suppression |
|||
|
|||
|
|||
class OAuth2FlowNoRedirectResult(object): |
|||
""" |
|||
Authorization information for an OAuth2Flow performed with no redirect. |
|||
""" |
|||
|
|||
def __init__(self, access_token, account_id, user_id): |
|||
""" |
|||
Args: |
|||
access_token (str): Token to be used to authenticate later |
|||
requests. |
|||
account_id (str): The Dropbox user's account ID. |
|||
user_id (str): Deprecated (use account_id instead). |
|||
""" |
|||
self.access_token = access_token |
|||
self.account_id = account_id |
|||
self.user_id = user_id |
|||
|
|||
def __repr__(self): |
|||
return 'OAuth2FlowNoRedirectResult(%r, %r, %r)' % ( |
|||
self.access_token, |
|||
self.account_id, |
|||
self.user_id, |
|||
) |
|||
|
|||
|
|||
class OAuth2FlowResult(OAuth2FlowNoRedirectResult): |
|||
""" |
|||
Authorization information for an OAuth2Flow with redirect. |
|||
""" |
|||
|
|||
def __init__(self, access_token, account_id, user_id, url_state): |
|||
""" |
|||
Same as OAuth2FlowNoRedirectResult but with url_state. |
|||
|
|||
Args: |
|||
url_state (str): The url state that was set by |
|||
:meth:`DropboxOAuth2Flow.start`. |
|||
""" |
|||
super(OAuth2FlowResult, self).__init__( |
|||
access_token, account_id, user_id) |
|||
self.url_state = url_state |
|||
|
|||
@classmethod |
|||
def from_no_redirect_result(cls, result, url_state): |
|||
assert isinstance(result, OAuth2FlowNoRedirectResult) |
|||
return cls( |
|||
result.access_token, result.account_id, result.user_id, url_state) |
|||
|
|||
def __repr__(self): |
|||
return 'OAuth2FlowResult(%r, %r, %r, %r)' % ( |
|||
self.access_token, |
|||
self.account_id, |
|||
self.user_id, |
|||
self.url_state, |
|||
) |
|||
|
|||
|
|||
class DropboxOAuth2FlowBase(object): |
|||
|
|||
def __init__(self, consumer_key, consumer_secret, locale=None): |
|||
self.consumer_key = consumer_key |
|||
self.consumer_secret = consumer_secret |
|||
self.locale = locale |
|||
self.requests_session = pinned_session() |
|||
|
|||
def _get_authorize_url(self, redirect_uri, state): |
|||
params = dict(response_type='code', |
|||
client_id=self.consumer_key) |
|||
if redirect_uri is not None: |
|||
params['redirect_uri'] = redirect_uri |
|||
if state is not None: |
|||
params['state'] = state |
|||
|
|||
return self.build_url('/oauth2/authorize', params, WEB_HOST) |
|||
|
|||
def _finish(self, code, redirect_uri): |
|||
url = self.build_url('/oauth2/token') |
|||
params = {'grant_type': 'authorization_code', |
|||
'code': code, |
|||
'client_id': self.consumer_key, |
|||
'client_secret': self.consumer_secret, |
|||
} |
|||
if self.locale is not None: |
|||
params['locale'] = self.locale |
|||
if redirect_uri is not None: |
|||
params['redirect_uri'] = redirect_uri |
|||
|
|||
resp = self.requests_session.post(url, data=params) |
|||
resp.raise_for_status() |
|||
|
|||
d = resp.json() |
|||
|
|||
if 'team_id' in d: |
|||
account_id = d['team_id'] |
|||
else: |
|||
account_id = d['account_id'] |
|||
|
|||
access_token = d['access_token'] |
|||
uid = d['uid'] |
|||
|
|||
return OAuth2FlowNoRedirectResult( |
|||
access_token, |
|||
account_id, |
|||
uid) |
|||
|
|||
def build_path(self, target, params=None): |
|||
"""Build the path component for an API URL. |
|||
|
|||
This method urlencodes the parameters, adds them |
|||
to the end of the target url, and puts a marker for the API |
|||
version in front. |
|||
|
|||
:param str target: A target url (e.g. '/files') to build upon. |
|||
:param dict params: Optional dictionary of parameters (name to value). |
|||
:return: The path and parameters components of an API URL. |
|||
:rtype: str |
|||
""" |
|||
if six.PY2 and isinstance(target, six.text_type): |
|||
target = target.encode('utf8') |
|||
|
|||
target_path = url_path_quote(target) |
|||
|
|||
params = params or {} |
|||
params = params.copy() |
|||
|
|||
if self.locale: |
|||
params['locale'] = self.locale |
|||
|
|||
if params: |
|||
query_string = _params_to_urlencoded(params) |
|||
return "%s?%s" % (target_path, query_string) |
|||
else: |
|||
return target_path |
|||
|
|||
def build_url(self, target, params=None, host=API_HOST): |
|||
"""Build an API URL. |
|||
|
|||
This method adds scheme and hostname to the path |
|||
returned from build_path. |
|||
|
|||
:param str target: A target url (e.g. '/files') to build upon. |
|||
:param dict params: Optional dictionary of parameters (name to value). |
|||
:return: The full API URL. |
|||
:rtype: str |
|||
""" |
|||
return "https://%s%s" % (host, self.build_path(target, params)) |
|||
|
|||
|
|||
class DropboxOAuth2FlowNoRedirect(DropboxOAuth2FlowBase): |
|||
""" |
|||
OAuth 2 authorization helper for apps that can't provide a redirect URI |
|||
(such as the command-line example apps). |
|||
|
|||
Example:: |
|||
|
|||
from dropbox import DropboxOAuth2FlowNoRedirect |
|||
|
|||
auth_flow = DropboxOAuth2FlowNoRedirect(APP_KEY, APP_SECRET) |
|||
|
|||
authorize_url = auth_flow.start() |
|||
print "1. Go to: " + authorize_url |
|||
print "2. Click \\"Allow\\" (you might have to log in first)." |
|||
print "3. Copy the authorization code." |
|||
auth_code = raw_input("Enter the authorization code here: ").strip() |
|||
|
|||
try: |
|||
oauth_result = auth_flow.finish(auth_code) |
|||
except Exception, e: |
|||
print('Error: %s' % (e,)) |
|||
return |
|||
|
|||
dbx = Dropbox(oauth_result.access_token) |
|||
""" |
|||
|
|||
def __init__(self, consumer_key, consumer_secret, locale=None): # noqa: E501; pylint: disable=useless-super-delegation |
|||
""" |
|||
Construct an instance. |
|||
|
|||
Parameters |
|||
:param str consumer_key: Your API app's "app key". |
|||
:param str consumer_secret: Your API app's "app secret". |
|||
:param str locale: The locale of the user of your application. For |
|||
example "en" or "en_US". Some API calls return localized data and |
|||
error messages; this setting tells the server which locale to use. |
|||
By default, the server uses "en_US". |
|||
""" |
|||
# pylint: disable=useless-super-delegation |
|||
super(DropboxOAuth2FlowNoRedirect, self).__init__( |
|||
consumer_key, |
|||
consumer_secret, |
|||
locale, |
|||
) |
|||
|
|||
def start(self): |
|||
""" |
|||
Starts the OAuth 2 authorization process. |
|||
|
|||
:return: The URL for a page on Dropbox's website. This page will let |
|||
the user "approve" your app, which gives your app permission to |
|||
access the user's Dropbox account. Tell the user to visit this URL |
|||
and approve your app. |
|||
""" |
|||
return self._get_authorize_url(None, None) |
|||
|
|||
def finish(self, code): |
|||
""" |
|||
If the user approves your app, they will be presented with an |
|||
"authorization code". Have the user copy/paste that authorization code |
|||
into your app and then call this method to get an access token. |
|||
|
|||
:param str code: The authorization code shown to the user when they |
|||
approved your app. |
|||
:rtype: OAuth2FlowNoRedirectResult |
|||
:raises: The same exceptions as :meth:`DropboxOAuth2Flow.finish()`. |
|||
""" |
|||
return self._finish(code, None) |
|||
|
|||
|
|||
class DropboxOAuth2Flow(DropboxOAuth2FlowBase): |
|||
""" |
|||
OAuth 2 authorization helper. Use this for web apps. |
|||
|
|||
OAuth 2 has a two-step authorization process. The first step is having the |
|||
user authorize your app. The second involves getting an OAuth 2 access |
|||
token from Dropbox. |
|||
|
|||
Example:: |
|||
|
|||
from dropbox import DropboxOAuth2Flow |
|||
|
|||
def get_dropbox_auth_flow(web_app_session): |
|||
redirect_uri = "https://my-web-server.org/dropbox-auth-finish" |
|||
return DropboxOAuth2Flow( |
|||
APP_KEY, APP_SECRET, redirect_uri, web_app_session, |
|||
"dropbox-auth-csrf-token") |
|||
|
|||
# URL handler for /dropbox-auth-start |
|||
def dropbox_auth_start(web_app_session, request): |
|||
authorize_url = get_dropbox_auth_flow(web_app_session).start() |
|||
redirect_to(authorize_url) |
|||
|
|||
# URL handler for /dropbox-auth-finish |
|||
def dropbox_auth_finish(web_app_session, request): |
|||
try: |
|||
oauth_result = \\ |
|||
get_dropbox_auth_flow(web_app_session).finish( |
|||
request.query_params) |
|||
except BadRequestException, e: |
|||
http_status(400) |
|||
except BadStateException, e: |
|||
# Start the auth flow again. |
|||
redirect_to("/dropbox-auth-start") |
|||
except CsrfException, e: |
|||
http_status(403) |
|||
except NotApprovedException, e: |
|||
flash('Not approved? Why not?') |
|||
return redirect_to("/home") |
|||
except ProviderException, e: |
|||
logger.log("Auth error: %s" % (e,)) |
|||
http_status(403) |
|||
|
|||
""" |
|||
|
|||
def __init__(self, consumer_key, consumer_secret, redirect_uri, session, |
|||
csrf_token_session_key, locale=None): |
|||
""" |
|||
Construct an instance. |
|||
|
|||
:param str consumer_key: Your API app's "app key". |
|||
:param str consumer_secret: Your API app's "app secret". |
|||
:param str redirect_uri: The URI that the Dropbox server will redirect |
|||
the user to after the user finishes authorizing your app. This URI |
|||
must be HTTPS-based and pre-registered with the Dropbox servers, |
|||
though localhost URIs are allowed without pre-registration and can |
|||
be either HTTP or HTTPS. |
|||
:param dict session: A dict-like object that represents the current |
|||
user's web session (will be used to save the CSRF token). |
|||
:param str csrf_token_session_key: The key to use when storing the CSRF |
|||
token in the session (for example: "dropbox-auth-csrf-token"). |
|||
:param str locale: The locale of the user of your application. For |
|||
example "en" or "en_US". Some API calls return localized data and |
|||
error messages; this setting tells the server which locale to use. |
|||
By default, the server uses "en_US". |
|||
""" |
|||
super(DropboxOAuth2Flow, self).__init__(consumer_key, consumer_secret, locale) |
|||
self.redirect_uri = redirect_uri |
|||
self.session = session |
|||
self.csrf_token_session_key = csrf_token_session_key |
|||
|
|||
def start(self, url_state=None): |
|||
""" |
|||
Starts the OAuth 2 authorization process. |
|||
|
|||
This function builds an "authorization URL". You should redirect your |
|||
user's browser to this URL, which will give them an opportunity to |
|||
grant your app access to their Dropbox account. When the user |
|||
completes this process, they will be automatically redirected to the |
|||
``redirect_uri`` you passed in to the constructor. |
|||
|
|||
This function will also save a CSRF token to |
|||
``session[csrf_token_session_key]`` (as provided to the constructor). |
|||
This CSRF token will be checked on :meth:`finish()` to prevent request |
|||
forgery. |
|||
|
|||
:param str url_state: Any data that you would like to keep in the URL |
|||
through the authorization process. This exact value will be |
|||
returned to you by :meth:`finish()`. |
|||
:return: The URL for a page on Dropbox's website. This page will let |
|||
the user "approve" your app, which gives your app permission to |
|||
access the user's Dropbox account. Tell the user to visit this URL |
|||
and approve your app. |
|||
""" |
|||
csrf_token = base64.urlsafe_b64encode(os.urandom(16)).decode('ascii') |
|||
state = csrf_token |
|||
if url_state is not None: |
|||
state += "|" + url_state |
|||
self.session[self.csrf_token_session_key] = csrf_token |
|||
|
|||
return self._get_authorize_url(self.redirect_uri, state) |
|||
|
|||
def finish(self, query_params): |
|||
""" |
|||
Call this after the user has visited the authorize URL (see |
|||
:meth:`start()`), approved your app and was redirected to your redirect |
|||
URI. |
|||
|
|||
:param dict query_params: The query parameters on the GET request to |
|||
your redirect URI. |
|||
:rtype: OAuth2FlowResult |
|||
:raises: :class:`BadRequestException` If the redirect URL was missing |
|||
parameters or if the given parameters were not valid. |
|||
:raises: :class:`BadStateException` If there's no CSRF token in the |
|||
session. |
|||
:raises: :class:`CsrfException` If the ``state`` query parameter |
|||
doesn't contain the CSRF token from the user's session. |
|||
:raises: :class:`NotApprovedException` If the user chose not to |
|||
approve your app. |
|||
:raises: :class:`ProviderException` If Dropbox redirected to your |
|||
redirect URI with some unexpected error identifier and error message. |
|||
""" |
|||
# Check well-formedness of request. |
|||
|
|||
state = query_params.get('state') |
|||
if state is None: |
|||
raise BadRequestException("Missing query parameter 'state'.") |
|||
|
|||
error = query_params.get('error') |
|||
error_description = query_params.get('error_description') |
|||
code = query_params.get('code') |
|||
|
|||
if error is not None and code is not None: |
|||
raise BadRequestException( |
|||
"Query parameters 'code' and 'error' are both set; " |
|||
"only one must be set.") |
|||
if error is None and code is None: |
|||
raise BadRequestException( |
|||
"Neither query parameter 'code' or 'error' is set.") |
|||
|
|||
# Check CSRF token |
|||
|
|||
if self.csrf_token_session_key not in self.session: |
|||
raise BadStateException('Missing CSRF token in session.') |
|||
csrf_token_from_session = self.session[self.csrf_token_session_key] |
|||
if len(csrf_token_from_session) <= 20: |
|||
raise AssertionError('CSRF token unexpectedly short: %r' % |
|||
csrf_token_from_session) |
|||
|
|||
split_pos = state.find('|') |
|||
if split_pos < 0: |
|||
given_csrf_token = state |
|||
url_state = None |
|||
else: |
|||
given_csrf_token = state[0:split_pos] |
|||
url_state = state[split_pos + 1:] |
|||
|
|||
if not _safe_equals(csrf_token_from_session, given_csrf_token): |
|||
raise CsrfException('expected %r, got %r' % |
|||
(csrf_token_from_session, given_csrf_token)) |
|||
|
|||
del self.session[self.csrf_token_session_key] |
|||
|
|||
# Check for error identifier |
|||
|
|||
if error is not None: |
|||
if error == 'access_denied': |
|||
# The user clicked "Deny" |
|||
if error_description is None: |
|||
raise NotApprovedException( |
|||
'No additional description from Dropbox') |
|||
else: |
|||
raise NotApprovedException( |
|||
'Additional description from Dropbox: %s' % |
|||
error_description) |
|||
else: |
|||
# All other errors |
|||
full_message = error |
|||
if error_description is not None: |
|||
full_message += ": " + error_description |
|||
raise ProviderException(full_message) |
|||
|
|||
# If everything went ok, make the network call to get an access token. |
|||
|
|||
no_redirect_result = self._finish(code, self.redirect_uri) |
|||
return OAuth2FlowResult.from_no_redirect_result( |
|||
no_redirect_result, url_state) |
|||
|
|||
|
|||
class BadRequestException(Exception): |
|||
""" |
|||
Thrown if the redirect URL was missing parameters or if the |
|||
given parameters were not valid. |
|||
|
|||
The recommended action is to show an HTTP 400 error page. |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class BadStateException(Exception): |
|||
""" |
|||
Thrown if all the parameters are correct, but there's no CSRF token in the |
|||
session. This probably means that the session expired. |
|||
|
|||
The recommended action is to redirect the user's browser to try the |
|||
approval process again. |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class CsrfException(Exception): |
|||
""" |
|||
Thrown if the given 'state' parameter doesn't contain the CSRF token from |
|||
the user's session. This is blocked to prevent CSRF attacks. |
|||
|
|||
The recommended action is to respond with an HTTP 403 error page. |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class NotApprovedException(Exception): |
|||
""" |
|||
The user chose not to approve your app. |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class ProviderException(Exception): |
|||
""" |
|||
Dropbox redirected to your redirect URI with some unexpected error |
|||
identifier and error message. |
|||
|
|||
The recommended action is to log the error, tell the user something went |
|||
wrong, and let them try again. |
|||
""" |
|||
pass |
|||
|
|||
|
|||
def _safe_equals(a, b): |
|||
if len(a) != len(b): |
|||
return False |
|||
res = 0 |
|||
for ca, cb in zip(a, b): |
|||
res |= ord(ca) ^ ord(cb) |
|||
return res == 0 |
|||
|
|||
|
|||
def _params_to_urlencoded(params): |
|||
""" |
|||
Returns a application/x-www-form-urlencoded ``str`` representing the |
|||
key/value pairs in ``params``. |
|||
|
|||
Keys are values are ``str()``'d before calling ``urllib.urlencode``, with |
|||
the exception of unicode objects which are utf8-encoded. |
|||
""" |
|||
def encode(o): |
|||
if isinstance(o, six.binary_type): |
|||
return o |
|||
else: |
|||
if isinstance(o, six.text_type): |
|||
return o.encode('utf-8') |
|||
else: |
|||
return str(o).encode('utf-8') |
|||
|
|||
utf8_params = {encode(k): encode(v) for k, v in six.iteritems(params)} |
|||
return url_encode(utf8_params) |
File diff suppressed because it is too large
@ -0,0 +1,110 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
class PlatformType(bb.Union): |
|||
""" |
|||
Possible platforms on which a user may view content. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar web: The content was viewed on the web. |
|||
:ivar mobile: The content was viewed on a mobile client. |
|||
:ivar desktop: The content was viewed on a desktop client. |
|||
:ivar unknown: The content was viewed on an unknown platform. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
web = None |
|||
# Attribute is overwritten below the class definition |
|||
mobile = None |
|||
# Attribute is overwritten below the class definition |
|||
desktop = None |
|||
# Attribute is overwritten below the class definition |
|||
unknown = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_web(self): |
|||
""" |
|||
Check if the union tag is ``web``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'web' |
|||
|
|||
def is_mobile(self): |
|||
""" |
|||
Check if the union tag is ``mobile``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'mobile' |
|||
|
|||
def is_desktop(self): |
|||
""" |
|||
Check if the union tag is ``desktop``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'desktop' |
|||
|
|||
def is_unknown(self): |
|||
""" |
|||
Check if the union tag is ``unknown``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'unknown' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(PlatformType, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'PlatformType(%r, %r)' % (self._tag, self._value) |
|||
|
|||
PlatformType_validator = bv.Union(PlatformType) |
|||
|
|||
PlatformType._web_validator = bv.Void() |
|||
PlatformType._mobile_validator = bv.Void() |
|||
PlatformType._desktop_validator = bv.Void() |
|||
PlatformType._unknown_validator = bv.Void() |
|||
PlatformType._other_validator = bv.Void() |
|||
PlatformType._tagmap = { |
|||
'web': PlatformType._web_validator, |
|||
'mobile': PlatformType._mobile_validator, |
|||
'desktop': PlatformType._desktop_validator, |
|||
'unknown': PlatformType._unknown_validator, |
|||
'other': PlatformType._other_validator, |
|||
} |
|||
|
|||
PlatformType.web = PlatformType('web') |
|||
PlatformType.mobile = PlatformType('mobile') |
|||
PlatformType.desktop = PlatformType('desktop') |
|||
PlatformType.unknown = PlatformType('unknown') |
|||
PlatformType.other = PlatformType('other') |
|||
|
|||
ROUTES = { |
|||
} |
|||
|
@ -0,0 +1,51 @@ |
|||
import pkg_resources |
|||
import os |
|||
import ssl |
|||
|
|||
import requests |
|||
from requests.adapters import HTTPAdapter |
|||
from urllib3.poolmanager import PoolManager |
|||
|
|||
API_DOMAIN = os.environ.get('DROPBOX_API_DOMAIN', |
|||
os.environ.get('DROPBOX_DOMAIN', '.dropboxapi.com')) |
|||
|
|||
WEB_DOMAIN = os.environ.get('DROPBOX_WEB_DOMAIN', |
|||
os.environ.get('DROPBOX_DOMAIN', '.dropbox.com')) |
|||
|
|||
# Default short hostname for RPC-style routes. |
|||
HOST_API = 'api' |
|||
|
|||
# Default short hostname for upload and download-style routes. |
|||
HOST_CONTENT = 'content' |
|||
|
|||
# Default short hostname for longpoll routes. |
|||
HOST_NOTIFY = 'notify' |
|||
|
|||
# Default short hostname for the Drobox website. |
|||
HOST_WWW = 'www' |
|||
|
|||
API_HOST = os.environ.get('DROPBOX_API_HOST', HOST_API + API_DOMAIN) |
|||
API_CONTENT_HOST = os.environ.get('DROPBOX_API_CONTENT_HOST', HOST_CONTENT + API_DOMAIN) |
|||
API_NOTIFICATION_HOST = os.environ.get('DROPBOX_API_NOTIFY_HOST', HOST_NOTIFY + API_DOMAIN) |
|||
WEB_HOST = os.environ.get('DROPBOX_WEB_HOST', HOST_WWW + WEB_DOMAIN) |
|||
|
|||
_TRUSTED_CERT_FILE = pkg_resources.resource_filename(__name__, 'trusted-certs.crt') |
|||
|
|||
# TODO(kelkabany): We probably only want to instantiate this once so that even |
|||
# if multiple Dropbox objects are instantiated, they all share the same pool. |
|||
class _SSLAdapter(HTTPAdapter): |
|||
def init_poolmanager(self, connections, maxsize, block=False, **_): |
|||
self.poolmanager = PoolManager( |
|||
num_pools=connections, |
|||
maxsize=maxsize, |
|||
block=block, |
|||
cert_reqs=ssl.CERT_REQUIRED, |
|||
ca_certs=_TRUSTED_CERT_FILE, |
|||
) |
|||
|
|||
def pinned_session(pool_maxsize=8): |
|||
http_adapter = _SSLAdapter(pool_connections=4, pool_maxsize=pool_maxsize) |
|||
_session = requests.session() |
|||
_session.mount('https://', http_adapter) |
|||
|
|||
return _session |
File diff suppressed because it is too large
@ -0,0 +1,152 @@ |
|||
""" |
|||
Helpers for representing Stone data types in Python. |
|||
|
|||
This module should be dropped into a project that requires the use of Stone. In |
|||
the future, this could be imported from a pre-installed Python package, rather |
|||
than being added to a project. |
|||
""" |
|||
|
|||
from __future__ import absolute_import, unicode_literals |
|||
|
|||
import functools |
|||
|
|||
try: |
|||
from . import stone_validators as bv |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv # type: ignore |
|||
|
|||
_MYPY = False |
|||
if _MYPY: |
|||
import typing # noqa: F401 # pylint: disable=import-error,unused-import,useless-suppression |
|||
|
|||
class AnnotationType(object): |
|||
# This is a base class for all annotation types. |
|||
pass |
|||
|
|||
if _MYPY: |
|||
T = typing.TypeVar('T', bound=AnnotationType) |
|||
U = typing.TypeVar('U') |
|||
|
|||
class Struct(object): |
|||
# This is a base class for all classes representing Stone structs. |
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
# type: (typing.Type[T], typing.Callable[[T, U], U]) -> None |
|||
pass |
|||
|
|||
class Union(object): |
|||
# TODO(kelkabany): Possible optimization is to remove _value if a |
|||
# union is composed of only symbols. |
|||
__slots__ = ['_tag', '_value'] |
|||
_tagmap = {} # type: typing.Dict[typing.Text, bv.Validator] |
|||
_permissioned_tagmaps = set() # type: typing.Set[typing.Text] |
|||
|
|||
def __init__(self, tag, value=None): |
|||
validator = None |
|||
tagmap_names = ['_{}_tagmap'.format(map_name) for map_name in self._permissioned_tagmaps] |
|||
for tagmap_name in ['_tagmap'] + tagmap_names: |
|||
if tag in getattr(self, tagmap_name): |
|||
validator = getattr(self, tagmap_name)[tag] |
|||
assert validator is not None, 'Invalid tag %r.' % tag |
|||
if isinstance(validator, bv.Void): |
|||
assert value is None, 'Void type union member must have None value.' |
|||
elif isinstance(validator, (bv.Struct, bv.Union)): |
|||
validator.validate_type_only(value) |
|||
else: |
|||
validator.validate(value) |
|||
self._tag = tag |
|||
self._value = value |
|||
|
|||
def __eq__(self, other): |
|||
# Also need to check if one class is a subclass of another. If one union extends another, |
|||
# the common fields should be able to be compared to each other. |
|||
return ( |
|||
isinstance(other, Union) and |
|||
(isinstance(self, other.__class__) or isinstance(other, self.__class__)) and |
|||
self._tag == other._tag and self._value == other._value |
|||
) |
|||
|
|||
def __ne__(self, other): |
|||
return not self == other |
|||
|
|||
def __hash__(self): |
|||
return hash((self._tag, self._value)) |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
# type: (typing.Type[T], typing.Callable[[T, U], U]) -> None |
|||
pass |
|||
|
|||
@classmethod |
|||
def _is_tag_present(cls, tag, caller_permissions): |
|||
assert tag, 'tag value should not be None' |
|||
|
|||
if tag in cls._tagmap: |
|||
return True |
|||
|
|||
for extra_permission in caller_permissions.permissions: |
|||
tagmap_name = '_{}_tagmap'.format(extra_permission) |
|||
if hasattr(cls, tagmap_name) and tag in getattr(cls, tagmap_name): |
|||
return True |
|||
|
|||
return False |
|||
|
|||
@classmethod |
|||
def _get_val_data_type(cls, tag, caller_permissions): |
|||
assert tag, 'tag value should not be None' |
|||
|
|||
for extra_permission in caller_permissions.permissions: |
|||
tagmap_name = '_{}_tagmap'.format(extra_permission) |
|||
if hasattr(cls, tagmap_name) and tag in getattr(cls, tagmap_name): |
|||
return getattr(cls, tagmap_name)[tag] |
|||
|
|||
return cls._tagmap[tag] |
|||
|
|||
class Route(object): |
|||
|
|||
def __init__(self, name, version, deprecated, arg_type, result_type, error_type, attrs): |
|||
self.name = name |
|||
self.version = version |
|||
self.deprecated = deprecated |
|||
self.arg_type = arg_type |
|||
self.result_type = result_type |
|||
self.error_type = error_type |
|||
assert isinstance(attrs, dict), 'Expected dict, got %r' % attrs |
|||
self.attrs = attrs |
|||
|
|||
def __repr__(self): |
|||
return 'Route({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})'.format( |
|||
self.name, |
|||
self.version, |
|||
self.deprecated, |
|||
self.arg_type, |
|||
self.result_type, |
|||
self.error_type, |
|||
self.attrs) |
|||
|
|||
# helper functions used when constructing custom annotation processors |
|||
|
|||
# put this here so that every other file doesn't need to import functools |
|||
partially_apply = functools.partial |
|||
|
|||
def make_struct_annotation_processor(annotation_type, processor): |
|||
def g(struct): |
|||
if struct is None: |
|||
return struct |
|||
struct._process_custom_annotations(annotation_type, processor) |
|||
return struct |
|||
return g |
|||
|
|||
def make_list_annotation_processor(processor): |
|||
def g(list_): |
|||
if list_ is None: |
|||
return list_ |
|||
return [processor(x) for x in list_] |
|||
return g |
|||
|
|||
def make_map_value_annotation_processor(processor): |
|||
def g(map_): |
|||
if map_ is None: |
|||
return map_ |
|||
return {k: processor(v) for k, v in map_.items()} |
|||
return g |
File diff suppressed because it is too large
@ -0,0 +1,673 @@ |
|||
""" |
|||
Defines classes to represent each Stone type in Python. These classes should |
|||
be used to validate Python objects and normalize them for a given type. |
|||
|
|||
The data types defined here should not be specific to an RPC or serialization |
|||
format. |
|||
|
|||
This module should be dropped into a project that requires the use of Stone. In |
|||
the future, this could be imported from a pre-installed Python package, rather |
|||
than being added to a project. |
|||
""" |
|||
|
|||
from __future__ import absolute_import, unicode_literals |
|||
|
|||
from abc import ABCMeta, abstractmethod |
|||
import datetime |
|||
import hashlib |
|||
import math |
|||
import numbers |
|||
import re |
|||
import six |
|||
|
|||
_MYPY = False |
|||
if _MYPY: |
|||
import typing # noqa: F401 # pylint: disable=import-error,unused-import,useless-suppression |
|||
|
|||
# See <http://python3porting.com/differences.html#buffer> |
|||
if six.PY3: |
|||
_binary_types = (bytes, memoryview) # noqa: E501,F821 # pylint: disable=undefined-variable,useless-suppression |
|||
else: |
|||
_binary_types = (bytes, buffer) # noqa: E501,F821 # pylint: disable=undefined-variable,useless-suppression |
|||
|
|||
|
|||
class ValidationError(Exception): |
|||
"""Raised when a value doesn't pass validation by its validator.""" |
|||
|
|||
def __init__(self, message, parent=None): |
|||
""" |
|||
Args: |
|||
message (str): Error message detailing validation failure. |
|||
parent (str): Adds the parent as the closest reference point for |
|||
the error. Use :meth:`add_parent` to add more. |
|||
""" |
|||
super(ValidationError, self).__init__(message) |
|||
self.message = message |
|||
self._parents = [] |
|||
if parent: |
|||
self._parents.append(parent) |
|||
|
|||
def add_parent(self, parent): |
|||
""" |
|||
Args: |
|||
parent (str): Adds the parent to the top of the tree of references |
|||
that lead to the validator that failed. |
|||
""" |
|||
self._parents.append(parent) |
|||
|
|||
def __str__(self): |
|||
""" |
|||
Returns: |
|||
str: A descriptive message of the validation error that may also |
|||
include the path to the validator that failed. |
|||
""" |
|||
if self._parents: |
|||
return '{}: {}'.format('.'.join(self._parents[::-1]), self.message) |
|||
else: |
|||
return self.message |
|||
|
|||
def __repr__(self): |
|||
# Not a perfect repr, but includes the error location information. |
|||
return 'ValidationError(%r)' % six.text_type(self) |
|||
|
|||
|
|||
def generic_type_name(v): |
|||
"""Return a descriptive type name that isn't Python specific. For example, |
|||
an int value will return 'integer' rather than 'int'.""" |
|||
if isinstance(v, bool): |
|||
# Must come before any numbers checks since booleans are integers too |
|||
return 'boolean' |
|||
elif isinstance(v, numbers.Integral): |
|||
# Must come before real numbers check since integrals are reals too |
|||
return 'integer' |
|||
elif isinstance(v, numbers.Real): |
|||
return 'float' |
|||
elif isinstance(v, (tuple, list)): |
|||
return 'list' |
|||
elif isinstance(v, six.string_types): |
|||
return 'string' |
|||
elif v is None: |
|||
return 'null' |
|||
else: |
|||
return type(v).__name__ |
|||
|
|||
|
|||
class Validator(object): |
|||
"""All primitive and composite data types should be a subclass of this.""" |
|||
__metaclass__ = ABCMeta |
|||
|
|||
@abstractmethod |
|||
def validate(self, val): |
|||
"""Validates that val is of this data type. |
|||
|
|||
Returns: A normalized value if validation succeeds. |
|||
Raises: ValidationError |
|||
""" |
|||
pass |
|||
|
|||
def has_default(self): |
|||
return False |
|||
|
|||
def get_default(self): |
|||
raise AssertionError('No default available.') |
|||
|
|||
|
|||
class Primitive(Validator): |
|||
"""A basic type that is defined by Stone.""" |
|||
# pylint: disable=abstract-method |
|||
pass |
|||
|
|||
|
|||
class Boolean(Primitive): |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, bool): |
|||
raise ValidationError('%r is not a valid boolean' % val) |
|||
return val |
|||
|
|||
|
|||
class Integer(Primitive): |
|||
""" |
|||
Do not use this class directly. Extend it and specify a 'minimum' and |
|||
'maximum' value as class variables for a more restrictive integer range. |
|||
""" |
|||
minimum = None # type: typing.Optional[int] |
|||
maximum = None # type: typing.Optional[int] |
|||
|
|||
def __init__(self, min_value=None, max_value=None): |
|||
""" |
|||
A more restrictive minimum or maximum value can be specified than the |
|||
range inherent to the defined type. |
|||
""" |
|||
if min_value is not None: |
|||
assert isinstance(min_value, numbers.Integral), \ |
|||
'min_value must be an integral number' |
|||
assert min_value >= self.minimum, \ |
|||
'min_value cannot be less than the minimum value for this ' \ |
|||
'type (%d < %d)' % (min_value, self.minimum) |
|||
self.minimum = min_value |
|||
if max_value is not None: |
|||
assert isinstance(max_value, numbers.Integral), \ |
|||
'max_value must be an integral number' |
|||
assert max_value <= self.maximum, \ |
|||
'max_value cannot be greater than the maximum value for ' \ |
|||
'this type (%d < %d)' % (max_value, self.maximum) |
|||
self.maximum = max_value |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, numbers.Integral): |
|||
raise ValidationError('expected integer, got %s' |
|||
% generic_type_name(val)) |
|||
elif not (self.minimum <= val <= self.maximum): |
|||
raise ValidationError('%d is not within range [%d, %d]' |
|||
% (val, self.minimum, self.maximum)) |
|||
return val |
|||
|
|||
def __repr__(self): |
|||
return '%s()' % self.__class__.__name__ |
|||
|
|||
|
|||
class Int32(Integer): |
|||
minimum = -2**31 |
|||
maximum = 2**31 - 1 |
|||
|
|||
|
|||
class UInt32(Integer): |
|||
minimum = 0 |
|||
maximum = 2**32 - 1 |
|||
|
|||
|
|||
class Int64(Integer): |
|||
minimum = -2**63 |
|||
maximum = 2**63 - 1 |
|||
|
|||
|
|||
class UInt64(Integer): |
|||
minimum = 0 |
|||
maximum = 2**64 - 1 |
|||
|
|||
|
|||
class Real(Primitive): |
|||
""" |
|||
Do not use this class directly. Extend it and optionally set a 'minimum' |
|||
and 'maximum' value to enforce a range that's a subset of the Python float |
|||
implementation. Python floats are doubles. |
|||
""" |
|||
minimum = None # type: typing.Optional[float] |
|||
maximum = None # type: typing.Optional[float] |
|||
|
|||
def __init__(self, min_value=None, max_value=None): |
|||
""" |
|||
A more restrictive minimum or maximum value can be specified than the |
|||
range inherent to the defined type. |
|||
""" |
|||
if min_value is not None: |
|||
assert isinstance(min_value, numbers.Real), \ |
|||
'min_value must be a real number' |
|||
if not isinstance(min_value, float): |
|||
try: |
|||
min_value = float(min_value) |
|||
except OverflowError: |
|||
raise AssertionError('min_value is too small for a float') |
|||
if self.minimum is not None and min_value < self.minimum: |
|||
raise AssertionError('min_value cannot be less than the ' |
|||
'minimum value for this type (%f < %f)' % |
|||
(min_value, self.minimum)) |
|||
self.minimum = min_value |
|||
if max_value is not None: |
|||
assert isinstance(max_value, numbers.Real), \ |
|||
'max_value must be a real number' |
|||
if not isinstance(max_value, float): |
|||
try: |
|||
max_value = float(max_value) |
|||
except OverflowError: |
|||
raise AssertionError('max_value is too large for a float') |
|||
if self.maximum is not None and max_value > self.maximum: |
|||
raise AssertionError('max_value cannot be greater than the ' |
|||
'maximum value for this type (%f < %f)' % |
|||
(max_value, self.maximum)) |
|||
self.maximum = max_value |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, numbers.Real): |
|||
raise ValidationError('expected real number, got %s' % |
|||
generic_type_name(val)) |
|||
if not isinstance(val, float): |
|||
# This checks for the case where a number is passed in with a |
|||
# magnitude larger than supported by float64. |
|||
try: |
|||
val = float(val) |
|||
except OverflowError: |
|||
raise ValidationError('too large for float') |
|||
if math.isnan(val) or math.isinf(val): |
|||
raise ValidationError('%f values are not supported' % val) |
|||
if self.minimum is not None and val < self.minimum: |
|||
raise ValidationError('%f is not greater than %f' % |
|||
(val, self.minimum)) |
|||
if self.maximum is not None and val > self.maximum: |
|||
raise ValidationError('%f is not less than %f' % |
|||
(val, self.maximum)) |
|||
return val |
|||
|
|||
def __repr__(self): |
|||
return '%s()' % self.__class__.__name__ |
|||
|
|||
|
|||
class Float32(Real): |
|||
# Maximum and minimums from the IEEE 754-1985 standard |
|||
minimum = -3.40282 * 10**38 |
|||
maximum = 3.40282 * 10**38 |
|||
|
|||
|
|||
class Float64(Real): |
|||
pass |
|||
|
|||
|
|||
class String(Primitive): |
|||
"""Represents a unicode string.""" |
|||
|
|||
def __init__(self, min_length=None, max_length=None, pattern=None): |
|||
if min_length is not None: |
|||
assert isinstance(min_length, numbers.Integral), \ |
|||
'min_length must be an integral number' |
|||
assert min_length >= 0, 'min_length must be >= 0' |
|||
if max_length is not None: |
|||
assert isinstance(max_length, numbers.Integral), \ |
|||
'max_length must be an integral number' |
|||
assert max_length > 0, 'max_length must be > 0' |
|||
if min_length and max_length: |
|||
assert max_length >= min_length, 'max_length must be >= min_length' |
|||
if pattern is not None: |
|||
assert isinstance(pattern, six.string_types), \ |
|||
'pattern must be a string' |
|||
|
|||
self.min_length = min_length |
|||
self.max_length = max_length |
|||
self.pattern = pattern |
|||
self.pattern_re = None |
|||
|
|||
if pattern: |
|||
try: |
|||
self.pattern_re = re.compile(r"\A(?:" + pattern + r")\Z") |
|||
except re.error as e: |
|||
raise AssertionError('Regex {!r} failed: {}'.format( |
|||
pattern, e.args[0])) |
|||
|
|||
def validate(self, val): |
|||
""" |
|||
A unicode string of the correct length and pattern will pass validation. |
|||
In PY2, we enforce that a str type must be valid utf-8, and a unicode |
|||
string will be returned. |
|||
""" |
|||
if not isinstance(val, six.string_types): |
|||
raise ValidationError("'%s' expected to be a string, got %s" |
|||
% (val, generic_type_name(val))) |
|||
if not six.PY3 and isinstance(val, str): |
|||
try: |
|||
val = val.decode('utf-8') |
|||
except UnicodeDecodeError: |
|||
raise ValidationError("'%s' was not valid utf-8") |
|||
|
|||
if self.max_length is not None and len(val) > self.max_length: |
|||
raise ValidationError("'%s' must be at most %d characters, got %d" |
|||
% (val, self.max_length, len(val))) |
|||
if self.min_length is not None and len(val) < self.min_length: |
|||
raise ValidationError("'%s' must be at least %d characters, got %d" |
|||
% (val, self.min_length, len(val))) |
|||
|
|||
if self.pattern and not self.pattern_re.match(val): |
|||
raise ValidationError("'%s' did not match pattern '%s'" |
|||
% (val, self.pattern)) |
|||
return val |
|||
|
|||
|
|||
class Bytes(Primitive): |
|||
|
|||
def __init__(self, min_length=None, max_length=None): |
|||
if min_length is not None: |
|||
assert isinstance(min_length, numbers.Integral), \ |
|||
'min_length must be an integral number' |
|||
assert min_length >= 0, 'min_length must be >= 0' |
|||
if max_length is not None: |
|||
assert isinstance(max_length, numbers.Integral), \ |
|||
'max_length must be an integral number' |
|||
assert max_length > 0, 'max_length must be > 0' |
|||
if min_length is not None and max_length is not None: |
|||
assert max_length >= min_length, 'max_length must be >= min_length' |
|||
|
|||
self.min_length = min_length |
|||
self.max_length = max_length |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, _binary_types): |
|||
raise ValidationError("expected bytes type, got %s" |
|||
% generic_type_name(val)) |
|||
elif self.max_length is not None and len(val) > self.max_length: |
|||
raise ValidationError("'%s' must have at most %d bytes, got %d" |
|||
% (val, self.max_length, len(val))) |
|||
elif self.min_length is not None and len(val) < self.min_length: |
|||
raise ValidationError("'%s' has fewer than %d bytes, got %d" |
|||
% (val, self.min_length, len(val))) |
|||
return val |
|||
|
|||
|
|||
class Timestamp(Primitive): |
|||
"""Note that while a format is specified, it isn't used in validation |
|||
since a native Python datetime object is preferred. The format, however, |
|||
can and should be used by serializers.""" |
|||
|
|||
def __init__(self, fmt): |
|||
"""fmt must be composed of format codes that the C standard (1989) |
|||
supports, most notably in its strftime() function.""" |
|||
assert isinstance(fmt, six.text_type), 'format must be a string' |
|||
self.format = fmt |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, datetime.datetime): |
|||
raise ValidationError('expected timestamp, got %s' |
|||
% generic_type_name(val)) |
|||
elif val.tzinfo is not None and \ |
|||
val.tzinfo.utcoffset(val).total_seconds() != 0: |
|||
raise ValidationError('timestamp should have either a UTC ' |
|||
'timezone or none set at all') |
|||
return val |
|||
|
|||
|
|||
class Composite(Validator): |
|||
"""Validator for a type that builds on other primitive and composite |
|||
types.""" |
|||
# pylint: disable=abstract-method |
|||
pass |
|||
|
|||
|
|||
class List(Composite): |
|||
"""Assumes list contents are homogeneous with respect to types.""" |
|||
|
|||
def __init__(self, item_validator, min_items=None, max_items=None): |
|||
"""Every list item will be validated with item_validator.""" |
|||
self.item_validator = item_validator |
|||
if min_items is not None: |
|||
assert isinstance(min_items, numbers.Integral), \ |
|||
'min_items must be an integral number' |
|||
assert min_items >= 0, 'min_items must be >= 0' |
|||
if max_items is not None: |
|||
assert isinstance(max_items, numbers.Integral), \ |
|||
'max_items must be an integral number' |
|||
assert max_items > 0, 'max_items must be > 0' |
|||
if min_items is not None and max_items is not None: |
|||
assert max_items >= min_items, 'max_items must be >= min_items' |
|||
|
|||
self.min_items = min_items |
|||
self.max_items = max_items |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, (tuple, list)): |
|||
raise ValidationError('%r is not a valid list' % val) |
|||
elif self.max_items is not None and len(val) > self.max_items: |
|||
raise ValidationError('%r has more than %s items' |
|||
% (val, self.max_items)) |
|||
elif self.min_items is not None and len(val) < self.min_items: |
|||
raise ValidationError('%r has fewer than %s items' |
|||
% (val, self.min_items)) |
|||
return [self.item_validator.validate(item) for item in val] |
|||
|
|||
|
|||
class Map(Composite): |
|||
"""Assumes map keys and values are homogeneous with respect to types.""" |
|||
|
|||
def __init__(self, key_validator, value_validator): |
|||
""" |
|||
Every Map key/value pair will be validated with item_validator. |
|||
key validators must be a subclass of a String validator |
|||
""" |
|||
self.key_validator = key_validator |
|||
self.value_validator = value_validator |
|||
|
|||
def validate(self, val): |
|||
if not isinstance(val, dict): |
|||
raise ValidationError('%r is not a valid dict' % val) |
|||
return { |
|||
self.key_validator.validate(key): |
|||
self.value_validator.validate(value) for key, value in val.items() |
|||
} |
|||
|
|||
|
|||
class Struct(Composite): |
|||
|
|||
def __init__(self, definition): |
|||
""" |
|||
Args: |
|||
definition (class): A generated class representing a Stone struct |
|||
from a spec. Must have a _fields_ attribute with the following |
|||
structure: |
|||
|
|||
_fields_ = [(field_name, validator), ...] |
|||
|
|||
where |
|||
field_name: Name of the field (str). |
|||
validator: Validator object. |
|||
""" |
|||
super(Struct, self).__init__() |
|||
self.definition = definition |
|||
|
|||
def validate(self, val): |
|||
""" |
|||
For a val to pass validation, val must be of the correct type and have |
|||
all required fields present. |
|||
""" |
|||
self.validate_type_only(val) |
|||
self.validate_fields_only(val) |
|||
return val |
|||
|
|||
def validate_with_permissions(self, val, caller_permissions): |
|||
""" |
|||
For a val to pass validation, val must be of the correct type and have |
|||
all required permissioned fields present. Should only be called |
|||
for callers with extra permissions. |
|||
""" |
|||
self.validate(val) |
|||
self.validate_fields_only_with_permissions(val, caller_permissions) |
|||
return val |
|||
|
|||
def validate_fields_only(self, val): |
|||
""" |
|||
To pass field validation, no required field should be missing. |
|||
|
|||
This method assumes that the contents of each field have already been |
|||
validated on assignment, so it's merely a presence check. |
|||
|
|||
FIXME(kelkabany): Since the definition object does not maintain a list |
|||
of which fields are required, all fields are scanned. |
|||
""" |
|||
for field_name in self.definition._all_field_names_: |
|||
if not hasattr(val, field_name): |
|||
raise ValidationError("missing required field '%s'" % |
|||
field_name) |
|||
|
|||
def validate_fields_only_with_permissions(self, val, caller_permissions): |
|||
""" |
|||
To pass field validation, no required field should be missing. |
|||
This method assumes that the contents of each field have already been |
|||
validated on assignment, so it's merely a presence check. |
|||
Should only be called for callers with extra permissions. |
|||
""" |
|||
self.validate_fields_only(val) |
|||
|
|||
# check if type has been patched |
|||
for extra_permission in caller_permissions.permissions: |
|||
all_field_names = '_all_{}_field_names_'.format(extra_permission) |
|||
for field_name in getattr(self.definition, all_field_names, set()): |
|||
if not hasattr(val, field_name): |
|||
raise ValidationError("missing required field '%s'" % field_name) |
|||
|
|||
def validate_type_only(self, val): |
|||
""" |
|||
Use this when you only want to validate that the type of an object |
|||
is correct, but not yet validate each field. |
|||
""" |
|||
# Since the definition maintains the list of fields for serialization, |
|||
# we're okay with a subclass that might have extra information. This |
|||
# makes it easier to return one subclass for two routes, one of which |
|||
# relies on the parent class. |
|||
if not isinstance(val, self.definition): |
|||
raise ValidationError('expected type %s, got %s' % |
|||
(self.definition.__name__, generic_type_name(val))) |
|||
|
|||
def has_default(self): |
|||
return not self.definition._has_required_fields |
|||
|
|||
def get_default(self): |
|||
assert not self.definition._has_required_fields, 'No default available.' |
|||
return self.definition() |
|||
|
|||
|
|||
class StructTree(Struct): |
|||
"""Validator for structs with enumerated subtypes. |
|||
|
|||
NOTE: validate_fields_only() validates the fields known to this base |
|||
struct, but does not do any validation specific to the subtype. |
|||
""" |
|||
|
|||
# See PyCQA/pylint#1043 for why this is disabled; this should show up |
|||
# as a usless-suppression (and can be removed) once a fix is released |
|||
def __init__(self, definition): # pylint: disable=useless-super-delegation |
|||
super(StructTree, self).__init__(definition) |
|||
|
|||
|
|||
class Union(Composite): |
|||
|
|||
def __init__(self, definition): |
|||
""" |
|||
Args: |
|||
definition (class): A generated class representing a Stone union |
|||
from a spec. Must have a _tagmap attribute with the following |
|||
structure: |
|||
|
|||
_tagmap = {field_name: validator, ...} |
|||
|
|||
where |
|||
field_name (str): Tag name. |
|||
validator (Validator): Tag value validator. |
|||
""" |
|||
self.definition = definition |
|||
|
|||
def validate(self, val): |
|||
""" |
|||
For a val to pass validation, it must have a _tag set. This assumes |
|||
that the object validated that _tag is a valid tag, and that any |
|||
associated value has also been validated. |
|||
""" |
|||
self.validate_type_only(val) |
|||
if not hasattr(val, '_tag') or val._tag is None: |
|||
raise ValidationError('no tag set') |
|||
return val |
|||
|
|||
def validate_type_only(self, val): |
|||
""" |
|||
Use this when you only want to validate that the type of an object |
|||
is correct, but not yet validate each field. |
|||
|
|||
We check whether val is a Python parent class of the definition. This |
|||
is because Union subtyping works in the opposite direction of Python |
|||
inheritance. For example, if a union U2 extends U1 in Python, this |
|||
validator will accept U1 in places where U2 is expected. |
|||
""" |
|||
if not issubclass(self.definition, type(val)): |
|||
raise ValidationError('expected type %s or subtype, got %s' % |
|||
(self.definition.__name__, generic_type_name(val))) |
|||
|
|||
|
|||
class Void(Primitive): |
|||
|
|||
def validate(self, val): |
|||
if val is not None: |
|||
raise ValidationError('expected NoneType, got %s' % |
|||
generic_type_name(val)) |
|||
|
|||
def has_default(self): |
|||
return True |
|||
|
|||
def get_default(self): |
|||
return None |
|||
|
|||
|
|||
class Nullable(Validator): |
|||
|
|||
def __init__(self, validator): |
|||
assert isinstance(validator, (Primitive, Composite)), \ |
|||
'validator must be for a primitive or composite type' |
|||
assert not isinstance(validator, Nullable), \ |
|||
'nullables cannot be stacked' |
|||
assert not isinstance(validator, Void), \ |
|||
'void cannot be made nullable' |
|||
self.validator = validator |
|||
|
|||
def validate(self, val): |
|||
if val is None: |
|||
return |
|||
else: |
|||
return self.validator.validate(val) |
|||
|
|||
def validate_type_only(self, val): |
|||
"""Use this only if Nullable is wrapping a Composite.""" |
|||
if val is None: |
|||
return |
|||
else: |
|||
return self.validator.validate_type_only(val) |
|||
|
|||
def has_default(self): |
|||
return True |
|||
|
|||
def get_default(self): |
|||
return None |
|||
|
|||
class Redactor(object): |
|||
def __init__(self, regex): |
|||
""" |
|||
Args: |
|||
regex: What parts of the field to redact. |
|||
""" |
|||
self.regex = regex |
|||
|
|||
@abstractmethod |
|||
def apply(self, val): |
|||
"""Redacts information from annotated field. |
|||
Returns: A redacted version of the string provided. |
|||
""" |
|||
pass |
|||
|
|||
def _get_matches(self, val): |
|||
if not self.regex: |
|||
return None |
|||
try: |
|||
return re.search(self.regex, val) |
|||
except TypeError: |
|||
return None |
|||
|
|||
|
|||
class HashRedactor(Redactor): |
|||
def apply(self, val): |
|||
matches = self._get_matches(val) |
|||
|
|||
val_to_hash = str(val) if isinstance(val, int) or isinstance(val, float) else val |
|||
|
|||
try: |
|||
# add string literal to ensure unicode |
|||
hashed = hashlib.md5(val_to_hash.encode('utf-8')).hexdigest() + '' |
|||
except [AttributeError, ValueError]: |
|||
hashed = None |
|||
|
|||
if matches: |
|||
blotted = '***'.join(matches.groups()) |
|||
if hashed: |
|||
return '{} ({})'.format(hashed, blotted) |
|||
return blotted |
|||
return hashed |
|||
|
|||
|
|||
class BlotRedactor(Redactor): |
|||
def apply(self, val): |
|||
matches = self._get_matches(val) |
|||
if matches: |
|||
return '***'.join(matches.groups()) |
|||
return '********' |
File diff suppressed because it is too large
@ -0,0 +1,562 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
try: |
|||
from . import ( |
|||
common, |
|||
) |
|||
except (ImportError, SystemError, ValueError): |
|||
import common |
|||
|
|||
class GroupManagementType(bb.Union): |
|||
""" |
|||
The group type determines how a group is managed. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar user_managed: A group which is managed by selected users. |
|||
:ivar company_managed: A group which is managed by team admins only. |
|||
:ivar system_managed: A group which is managed automatically by Dropbox. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
user_managed = None |
|||
# Attribute is overwritten below the class definition |
|||
company_managed = None |
|||
# Attribute is overwritten below the class definition |
|||
system_managed = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_user_managed(self): |
|||
""" |
|||
Check if the union tag is ``user_managed``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'user_managed' |
|||
|
|||
def is_company_managed(self): |
|||
""" |
|||
Check if the union tag is ``company_managed``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'company_managed' |
|||
|
|||
def is_system_managed(self): |
|||
""" |
|||
Check if the union tag is ``system_managed``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'system_managed' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(GroupManagementType, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'GroupManagementType(%r, %r)' % (self._tag, self._value) |
|||
|
|||
GroupManagementType_validator = bv.Union(GroupManagementType) |
|||
|
|||
class GroupSummary(bb.Struct): |
|||
""" |
|||
Information about a group. |
|||
|
|||
:ivar group_external_id: External ID of group. This is an arbitrary ID that |
|||
an admin can attach to a group. |
|||
:ivar member_count: The number of members in the group. |
|||
:ivar group_management_type: Who is allowed to manage the group. |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_group_name_value', |
|||
'_group_name_present', |
|||
'_group_id_value', |
|||
'_group_id_present', |
|||
'_group_external_id_value', |
|||
'_group_external_id_present', |
|||
'_member_count_value', |
|||
'_member_count_present', |
|||
'_group_management_type_value', |
|||
'_group_management_type_present', |
|||
] |
|||
|
|||
_has_required_fields = True |
|||
|
|||
def __init__(self, |
|||
group_name=None, |
|||
group_id=None, |
|||
group_management_type=None, |
|||
group_external_id=None, |
|||
member_count=None): |
|||
self._group_name_value = None |
|||
self._group_name_present = False |
|||
self._group_id_value = None |
|||
self._group_id_present = False |
|||
self._group_external_id_value = None |
|||
self._group_external_id_present = False |
|||
self._member_count_value = None |
|||
self._member_count_present = False |
|||
self._group_management_type_value = None |
|||
self._group_management_type_present = False |
|||
if group_name is not None: |
|||
self.group_name = group_name |
|||
if group_id is not None: |
|||
self.group_id = group_id |
|||
if group_external_id is not None: |
|||
self.group_external_id = group_external_id |
|||
if member_count is not None: |
|||
self.member_count = member_count |
|||
if group_management_type is not None: |
|||
self.group_management_type = group_management_type |
|||
|
|||
@property |
|||
def group_name(self): |
|||
""" |
|||
:rtype: str |
|||
""" |
|||
if self._group_name_present: |
|||
return self._group_name_value |
|||
else: |
|||
raise AttributeError("missing required field 'group_name'") |
|||
|
|||
@group_name.setter |
|||
def group_name(self, val): |
|||
val = self._group_name_validator.validate(val) |
|||
self._group_name_value = val |
|||
self._group_name_present = True |
|||
|
|||
@group_name.deleter |
|||
def group_name(self): |
|||
self._group_name_value = None |
|||
self._group_name_present = False |
|||
|
|||
@property |
|||
def group_id(self): |
|||
""" |
|||
:rtype: str |
|||
""" |
|||
if self._group_id_present: |
|||
return self._group_id_value |
|||
else: |
|||
raise AttributeError("missing required field 'group_id'") |
|||
|
|||
@group_id.setter |
|||
def group_id(self, val): |
|||
val = self._group_id_validator.validate(val) |
|||
self._group_id_value = val |
|||
self._group_id_present = True |
|||
|
|||
@group_id.deleter |
|||
def group_id(self): |
|||
self._group_id_value = None |
|||
self._group_id_present = False |
|||
|
|||
@property |
|||
def group_external_id(self): |
|||
""" |
|||
External ID of group. This is an arbitrary ID that an admin can attach |
|||
to a group. |
|||
|
|||
:rtype: str |
|||
""" |
|||
if self._group_external_id_present: |
|||
return self._group_external_id_value |
|||
else: |
|||
return None |
|||
|
|||
@group_external_id.setter |
|||
def group_external_id(self, val): |
|||
if val is None: |
|||
del self.group_external_id |
|||
return |
|||
val = self._group_external_id_validator.validate(val) |
|||
self._group_external_id_value = val |
|||
self._group_external_id_present = True |
|||
|
|||
@group_external_id.deleter |
|||
def group_external_id(self): |
|||
self._group_external_id_value = None |
|||
self._group_external_id_present = False |
|||
|
|||
@property |
|||
def member_count(self): |
|||
""" |
|||
The number of members in the group. |
|||
|
|||
:rtype: int |
|||
""" |
|||
if self._member_count_present: |
|||
return self._member_count_value |
|||
else: |
|||
return None |
|||
|
|||
@member_count.setter |
|||
def member_count(self, val): |
|||
if val is None: |
|||
del self.member_count |
|||
return |
|||
val = self._member_count_validator.validate(val) |
|||
self._member_count_value = val |
|||
self._member_count_present = True |
|||
|
|||
@member_count.deleter |
|||
def member_count(self): |
|||
self._member_count_value = None |
|||
self._member_count_present = False |
|||
|
|||
@property |
|||
def group_management_type(self): |
|||
""" |
|||
Who is allowed to manage the group. |
|||
|
|||
:rtype: GroupManagementType |
|||
""" |
|||
if self._group_management_type_present: |
|||
return self._group_management_type_value |
|||
else: |
|||
raise AttributeError("missing required field 'group_management_type'") |
|||
|
|||
@group_management_type.setter |
|||
def group_management_type(self, val): |
|||
self._group_management_type_validator.validate_type_only(val) |
|||
self._group_management_type_value = val |
|||
self._group_management_type_present = True |
|||
|
|||
@group_management_type.deleter |
|||
def group_management_type(self): |
|||
self._group_management_type_value = None |
|||
self._group_management_type_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(GroupSummary, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'GroupSummary(group_name={!r}, group_id={!r}, group_management_type={!r}, group_external_id={!r}, member_count={!r})'.format( |
|||
self._group_name_value, |
|||
self._group_id_value, |
|||
self._group_management_type_value, |
|||
self._group_external_id_value, |
|||
self._member_count_value, |
|||
) |
|||
|
|||
GroupSummary_validator = bv.Struct(GroupSummary) |
|||
|
|||
class GroupType(bb.Union): |
|||
""" |
|||
The group type determines how a group is created and managed. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar team: A group to which team members are automatically added. |
|||
Applicable to `team folders <https://www.dropbox.com/help/986>`_ only. |
|||
:ivar user_managed: A group is created and managed by a user. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
team = None |
|||
# Attribute is overwritten below the class definition |
|||
user_managed = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_team(self): |
|||
""" |
|||
Check if the union tag is ``team``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'team' |
|||
|
|||
def is_user_managed(self): |
|||
""" |
|||
Check if the union tag is ``user_managed``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'user_managed' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(GroupType, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'GroupType(%r, %r)' % (self._tag, self._value) |
|||
|
|||
GroupType_validator = bv.Union(GroupType) |
|||
|
|||
class MemberSpaceLimitType(bb.Union): |
|||
""" |
|||
The type of the space limit imposed on a team member. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar off: The team member does not have imposed space limit. |
|||
:ivar alert_only: The team member has soft imposed space limit - the limit |
|||
is used for display and for notifications. |
|||
:ivar stop_sync: The team member has hard imposed space limit - Dropbox file |
|||
sync will stop after the limit is reached. |
|||
""" |
|||
|
|||
_catch_all = 'other' |
|||
# Attribute is overwritten below the class definition |
|||
off = None |
|||
# Attribute is overwritten below the class definition |
|||
alert_only = None |
|||
# Attribute is overwritten below the class definition |
|||
stop_sync = None |
|||
# Attribute is overwritten below the class definition |
|||
other = None |
|||
|
|||
def is_off(self): |
|||
""" |
|||
Check if the union tag is ``off``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'off' |
|||
|
|||
def is_alert_only(self): |
|||
""" |
|||
Check if the union tag is ``alert_only``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'alert_only' |
|||
|
|||
def is_stop_sync(self): |
|||
""" |
|||
Check if the union tag is ``stop_sync``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'stop_sync' |
|||
|
|||
def is_other(self): |
|||
""" |
|||
Check if the union tag is ``other``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'other' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(MemberSpaceLimitType, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'MemberSpaceLimitType(%r, %r)' % (self._tag, self._value) |
|||
|
|||
MemberSpaceLimitType_validator = bv.Union(MemberSpaceLimitType) |
|||
|
|||
class TimeRange(bb.Struct): |
|||
""" |
|||
Time range. |
|||
|
|||
:ivar start_time: Optional starting time (inclusive). |
|||
:ivar end_time: Optional ending time (exclusive). |
|||
""" |
|||
|
|||
__slots__ = [ |
|||
'_start_time_value', |
|||
'_start_time_present', |
|||
'_end_time_value', |
|||
'_end_time_present', |
|||
] |
|||
|
|||
_has_required_fields = False |
|||
|
|||
def __init__(self, |
|||
start_time=None, |
|||
end_time=None): |
|||
self._start_time_value = None |
|||
self._start_time_present = False |
|||
self._end_time_value = None |
|||
self._end_time_present = False |
|||
if start_time is not None: |
|||
self.start_time = start_time |
|||
if end_time is not None: |
|||
self.end_time = end_time |
|||
|
|||
@property |
|||
def start_time(self): |
|||
""" |
|||
Optional starting time (inclusive). |
|||
|
|||
:rtype: datetime.datetime |
|||
""" |
|||
if self._start_time_present: |
|||
return self._start_time_value |
|||
else: |
|||
return None |
|||
|
|||
@start_time.setter |
|||
def start_time(self, val): |
|||
if val is None: |
|||
del self.start_time |
|||
return |
|||
val = self._start_time_validator.validate(val) |
|||
self._start_time_value = val |
|||
self._start_time_present = True |
|||
|
|||
@start_time.deleter |
|||
def start_time(self): |
|||
self._start_time_value = None |
|||
self._start_time_present = False |
|||
|
|||
@property |
|||
def end_time(self): |
|||
""" |
|||
Optional ending time (exclusive). |
|||
|
|||
:rtype: datetime.datetime |
|||
""" |
|||
if self._end_time_present: |
|||
return self._end_time_value |
|||
else: |
|||
return None |
|||
|
|||
@end_time.setter |
|||
def end_time(self, val): |
|||
if val is None: |
|||
del self.end_time |
|||
return |
|||
val = self._end_time_validator.validate(val) |
|||
self._end_time_value = val |
|||
self._end_time_present = True |
|||
|
|||
@end_time.deleter |
|||
def end_time(self): |
|||
self._end_time_value = None |
|||
self._end_time_present = False |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(TimeRange, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'TimeRange(start_time={!r}, end_time={!r})'.format( |
|||
self._start_time_value, |
|||
self._end_time_value, |
|||
) |
|||
|
|||
TimeRange_validator = bv.Struct(TimeRange) |
|||
|
|||
GroupExternalId_validator = bv.String() |
|||
GroupId_validator = bv.String() |
|||
MemberExternalId_validator = bv.String(max_length=64) |
|||
ResellerId_validator = bv.String() |
|||
TeamMemberId_validator = bv.String() |
|||
GroupManagementType._user_managed_validator = bv.Void() |
|||
GroupManagementType._company_managed_validator = bv.Void() |
|||
GroupManagementType._system_managed_validator = bv.Void() |
|||
GroupManagementType._other_validator = bv.Void() |
|||
GroupManagementType._tagmap = { |
|||
'user_managed': GroupManagementType._user_managed_validator, |
|||
'company_managed': GroupManagementType._company_managed_validator, |
|||
'system_managed': GroupManagementType._system_managed_validator, |
|||
'other': GroupManagementType._other_validator, |
|||
} |
|||
|
|||
GroupManagementType.user_managed = GroupManagementType('user_managed') |
|||
GroupManagementType.company_managed = GroupManagementType('company_managed') |
|||
GroupManagementType.system_managed = GroupManagementType('system_managed') |
|||
GroupManagementType.other = GroupManagementType('other') |
|||
|
|||
GroupSummary._group_name_validator = bv.String() |
|||
GroupSummary._group_id_validator = GroupId_validator |
|||
GroupSummary._group_external_id_validator = bv.Nullable(GroupExternalId_validator) |
|||
GroupSummary._member_count_validator = bv.Nullable(bv.UInt32()) |
|||
GroupSummary._group_management_type_validator = GroupManagementType_validator |
|||
GroupSummary._all_field_names_ = set([ |
|||
'group_name', |
|||
'group_id', |
|||
'group_external_id', |
|||
'member_count', |
|||
'group_management_type', |
|||
]) |
|||
GroupSummary._all_fields_ = [ |
|||
('group_name', GroupSummary._group_name_validator), |
|||
('group_id', GroupSummary._group_id_validator), |
|||
('group_external_id', GroupSummary._group_external_id_validator), |
|||
('member_count', GroupSummary._member_count_validator), |
|||
('group_management_type', GroupSummary._group_management_type_validator), |
|||
] |
|||
|
|||
GroupType._team_validator = bv.Void() |
|||
GroupType._user_managed_validator = bv.Void() |
|||
GroupType._other_validator = bv.Void() |
|||
GroupType._tagmap = { |
|||
'team': GroupType._team_validator, |
|||
'user_managed': GroupType._user_managed_validator, |
|||
'other': GroupType._other_validator, |
|||
} |
|||
|
|||
GroupType.team = GroupType('team') |
|||
GroupType.user_managed = GroupType('user_managed') |
|||
GroupType.other = GroupType('other') |
|||
|
|||
MemberSpaceLimitType._off_validator = bv.Void() |
|||
MemberSpaceLimitType._alert_only_validator = bv.Void() |
|||
MemberSpaceLimitType._stop_sync_validator = bv.Void() |
|||
MemberSpaceLimitType._other_validator = bv.Void() |
|||
MemberSpaceLimitType._tagmap = { |
|||
'off': MemberSpaceLimitType._off_validator, |
|||
'alert_only': MemberSpaceLimitType._alert_only_validator, |
|||
'stop_sync': MemberSpaceLimitType._stop_sync_validator, |
|||
'other': MemberSpaceLimitType._other_validator, |
|||
} |
|||
|
|||
MemberSpaceLimitType.off = MemberSpaceLimitType('off') |
|||
MemberSpaceLimitType.alert_only = MemberSpaceLimitType('alert_only') |
|||
MemberSpaceLimitType.stop_sync = MemberSpaceLimitType('stop_sync') |
|||
MemberSpaceLimitType.other = MemberSpaceLimitType('other') |
|||
|
|||
TimeRange._start_time_validator = bv.Nullable(common.DropboxTimestamp_validator) |
|||
TimeRange._end_time_validator = bv.Nullable(common.DropboxTimestamp_validator) |
|||
TimeRange._all_field_names_ = set([ |
|||
'start_time', |
|||
'end_time', |
|||
]) |
|||
TimeRange._all_fields_ = [ |
|||
('start_time', TimeRange._start_time_validator), |
|||
('end_time', TimeRange._end_time_validator), |
|||
] |
|||
|
|||
ROUTES = { |
|||
} |
|||
|
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,88 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# Auto-generated by Stone, do not modify. |
|||
# @generated |
|||
# flake8: noqa |
|||
# pylint: skip-file |
|||
""" |
|||
This namespace contains common data types used within the users namespace. |
|||
""" |
|||
|
|||
try: |
|||
from . import stone_validators as bv |
|||
from . import stone_base as bb |
|||
except (ImportError, SystemError, ValueError): |
|||
# Catch errors raised when importing a relative module when not in a package. |
|||
# This makes testing this file directly (outside of a package) easier. |
|||
import stone_validators as bv |
|||
import stone_base as bb |
|||
|
|||
class AccountType(bb.Union): |
|||
""" |
|||
What type of account this user has. |
|||
|
|||
This class acts as a tagged union. Only one of the ``is_*`` methods will |
|||
return true. To get the associated value of a tag (if one exists), use the |
|||
corresponding ``get_*`` method. |
|||
|
|||
:ivar basic: The basic account type. |
|||
:ivar pro: The Dropbox Pro account type. |
|||
:ivar business: The Dropbox Business account type. |
|||
""" |
|||
|
|||
_catch_all = None |
|||
# Attribute is overwritten below the class definition |
|||
basic = None |
|||
# Attribute is overwritten below the class definition |
|||
pro = None |
|||
# Attribute is overwritten below the class definition |
|||
business = None |
|||
|
|||
def is_basic(self): |
|||
""" |
|||
Check if the union tag is ``basic``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'basic' |
|||
|
|||
def is_pro(self): |
|||
""" |
|||
Check if the union tag is ``pro``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'pro' |
|||
|
|||
def is_business(self): |
|||
""" |
|||
Check if the union tag is ``business``. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
return self._tag == 'business' |
|||
|
|||
def _process_custom_annotations(self, annotation_type, processor): |
|||
super(AccountType, self)._process_custom_annotations(annotation_type, processor) |
|||
|
|||
def __repr__(self): |
|||
return 'AccountType(%r, %r)' % (self._tag, self._value) |
|||
|
|||
AccountType_validator = bv.Union(AccountType) |
|||
|
|||
AccountId_validator = bv.String(min_length=40, max_length=40) |
|||
AccountType._basic_validator = bv.Void() |
|||
AccountType._pro_validator = bv.Void() |
|||
AccountType._business_validator = bv.Void() |
|||
AccountType._tagmap = { |
|||
'basic': AccountType._basic_validator, |
|||
'pro': AccountType._pro_validator, |
|||
'business': AccountType._business_validator, |
|||
} |
|||
|
|||
AccountType.basic = AccountType('basic') |
|||
AccountType.pro = AccountType('pro') |
|||
AccountType.business = AccountType('business') |
|||
|
|||
ROUTES = { |
|||
} |
|||
|
@ -0,0 +1,2 @@ |
|||
from .package_data import __version__ |
|||
from .core import * |
@ -0,0 +1,118 @@ |
|||
from .core import encode, decode, alabel, ulabel, IDNAError |
|||
import codecs |
|||
import re |
|||
|
|||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') |
|||
|
|||
class Codec(codecs.Codec): |
|||
|
|||
def encode(self, data, errors='strict'): |
|||
|
|||
if errors != 'strict': |
|||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
|||
|
|||
if not data: |
|||
return "", 0 |
|||
|
|||
return encode(data), len(data) |
|||
|
|||
def decode(self, data, errors='strict'): |
|||
|
|||
if errors != 'strict': |
|||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
|||
|
|||
if not data: |
|||
return u"", 0 |
|||
|
|||
return decode(data), len(data) |
|||
|
|||
class IncrementalEncoder(codecs.BufferedIncrementalEncoder): |
|||
def _buffer_encode(self, data, errors, final): |
|||
if errors != 'strict': |
|||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
|||
|
|||
if not data: |
|||
return ("", 0) |
|||
|
|||
labels = _unicode_dots_re.split(data) |
|||
trailing_dot = u'' |
|||
if labels: |
|||
if not labels[-1]: |
|||
trailing_dot = '.' |
|||
del labels[-1] |
|||
elif not final: |
|||
# Keep potentially unfinished label until the next call |
|||
del labels[-1] |
|||
if labels: |
|||
trailing_dot = '.' |
|||
|
|||
result = [] |
|||
size = 0 |
|||
for label in labels: |
|||
result.append(alabel(label)) |
|||
if size: |
|||
size += 1 |
|||
size += len(label) |
|||
|
|||
# Join with U+002E |
|||
result = ".".join(result) + trailing_dot |
|||
size += len(trailing_dot) |
|||
return (result, size) |
|||
|
|||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
|||
def _buffer_decode(self, data, errors, final): |
|||
if errors != 'strict': |
|||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
|||
|
|||
if not data: |
|||
return (u"", 0) |
|||
|
|||
# IDNA allows decoding to operate on Unicode strings, too. |
|||
if isinstance(data, unicode): |
|||
labels = _unicode_dots_re.split(data) |
|||
else: |
|||
# Must be ASCII string |
|||
data = str(data) |
|||
unicode(data, "ascii") |
|||
labels = data.split(".") |
|||
|
|||
trailing_dot = u'' |
|||
if labels: |
|||
if not labels[-1]: |
|||
trailing_dot = u'.' |
|||
del labels[-1] |
|||
elif not final: |
|||
# Keep potentially unfinished label until the next call |
|||
del labels[-1] |
|||
if labels: |
|||
trailing_dot = u'.' |
|||
|
|||
result = [] |
|||
size = 0 |
|||
for label in labels: |
|||
result.append(ulabel(label)) |
|||
if size: |
|||
size += 1 |
|||
size += len(label) |
|||
|
|||
result = u".".join(result) + trailing_dot |
|||
size += len(trailing_dot) |
|||
return (result, size) |
|||
|
|||
|
|||
class StreamWriter(Codec, codecs.StreamWriter): |
|||
pass |
|||
|
|||
class StreamReader(Codec, codecs.StreamReader): |
|||
pass |
|||
|
|||
def getregentry(): |
|||
return codecs.CodecInfo( |
|||
name='idna', |
|||
encode=Codec().encode, |
|||
decode=Codec().decode, |
|||
incrementalencoder=IncrementalEncoder, |
|||
incrementaldecoder=IncrementalDecoder, |
|||
streamwriter=StreamWriter, |
|||
streamreader=StreamReader, |
|||
) |
@ -0,0 +1,12 @@ |
|||
from .core import * |
|||
from .codec import * |
|||
|
|||
def ToASCII(label): |
|||
return encode(label) |
|||
|
|||
def ToUnicode(label): |
|||
return decode(label) |
|||
|
|||
def nameprep(s): |
|||
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") |
|||
|
@ -0,0 +1,396 @@ |
|||
from . import idnadata |
|||
import bisect |
|||
import unicodedata |
|||
import re |
|||
import sys |
|||
from .intranges import intranges_contain |
|||
|
|||
_virama_combining_class = 9 |
|||
_alabel_prefix = b'xn--' |
|||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') |
|||
|
|||
if sys.version_info[0] == 3: |
|||
unicode = str |
|||
unichr = chr |
|||
|
|||
class IDNAError(UnicodeError): |
|||
""" Base exception for all IDNA-encoding related problems """ |
|||
pass |
|||
|
|||
|
|||
class IDNABidiError(IDNAError): |
|||
""" Exception when bidirectional requirements are not satisfied """ |
|||
pass |
|||
|
|||
|
|||
class InvalidCodepoint(IDNAError): |
|||
""" Exception when a disallowed or unallocated codepoint is used """ |
|||
pass |
|||
|
|||
|
|||
class InvalidCodepointContext(IDNAError): |
|||
""" Exception when the codepoint is not valid in the context it is used """ |
|||
pass |
|||
|
|||
|
|||
def _combining_class(cp): |
|||
v = unicodedata.combining(unichr(cp)) |
|||
if v == 0: |
|||
if not unicodedata.name(unichr(cp)): |
|||
raise ValueError("Unknown character in unicodedata") |
|||
return v |
|||
|
|||
def _is_script(cp, script): |
|||
return intranges_contain(ord(cp), idnadata.scripts[script]) |
|||
|
|||
def _punycode(s): |
|||
return s.encode('punycode') |
|||
|
|||
def _unot(s): |
|||
return 'U+{0:04X}'.format(s) |
|||
|
|||
|
|||
def valid_label_length(label): |
|||
|
|||
if len(label) > 63: |
|||
return False |
|||
return True |
|||
|
|||
|
|||
def valid_string_length(label, trailing_dot): |
|||
|
|||
if len(label) > (254 if trailing_dot else 253): |
|||
return False |
|||
return True |
|||
|
|||
|
|||
def check_bidi(label, check_ltr=False): |
|||
|
|||
# Bidi rules should only be applied if string contains RTL characters |
|||
bidi_label = False |
|||
for (idx, cp) in enumerate(label, 1): |
|||
direction = unicodedata.bidirectional(cp) |
|||
if direction == '': |
|||
# String likely comes from a newer version of Unicode |
|||
raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx)) |
|||
if direction in ['R', 'AL', 'AN']: |
|||
bidi_label = True |
|||
if not bidi_label and not check_ltr: |
|||
return True |
|||
|
|||
# Bidi rule 1 |
|||
direction = unicodedata.bidirectional(label[0]) |
|||
if direction in ['R', 'AL']: |
|||
rtl = True |
|||
elif direction == 'L': |
|||
rtl = False |
|||
else: |
|||
raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label))) |
|||
|
|||
valid_ending = False |
|||
number_type = False |
|||
for (idx, cp) in enumerate(label, 1): |
|||
direction = unicodedata.bidirectional(cp) |
|||
|
|||
if rtl: |
|||
# Bidi rule 2 |
|||
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: |
|||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx)) |
|||
# Bidi rule 3 |
|||
if direction in ['R', 'AL', 'EN', 'AN']: |
|||
valid_ending = True |
|||
elif direction != 'NSM': |
|||
valid_ending = False |
|||
# Bidi rule 4 |
|||
if direction in ['AN', 'EN']: |
|||
if not number_type: |
|||
number_type = direction |
|||
else: |
|||
if number_type != direction: |
|||
raise IDNABidiError('Can not mix numeral types in a right-to-left label') |
|||
else: |
|||
# Bidi rule 5 |
|||
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: |
|||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx)) |
|||
# Bidi rule 6 |
|||
if direction in ['L', 'EN']: |
|||
valid_ending = True |
|||
elif direction != 'NSM': |
|||
valid_ending = False |
|||
|
|||
if not valid_ending: |
|||
raise IDNABidiError('Label ends with illegal codepoint directionality') |
|||
|
|||
return True |
|||
|
|||
|
|||
def check_initial_combiner(label): |
|||
|
|||
if unicodedata.category(label[0])[0] == 'M': |
|||
raise IDNAError('Label begins with an illegal combining character') |
|||
return True |
|||
|
|||
|
|||
def check_hyphen_ok(label): |
|||
|
|||
if label[2:4] == '--': |
|||
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') |
|||
if label[0] == '-' or label[-1] == '-': |
|||
raise IDNAError('Label must not start or end with a hyphen') |
|||
return True |
|||
|
|||
|
|||
def check_nfc(label): |
|||
|
|||
if unicodedata.normalize('NFC', label) != label: |
|||
raise IDNAError('Label must be in Normalization Form C') |
|||
|
|||
|
|||
def valid_contextj(label, pos): |
|||
|
|||
cp_value = ord(label[pos]) |
|||
|
|||
if cp_value == 0x200c: |
|||
|
|||
if pos > 0: |
|||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class: |
|||
return True |
|||
|
|||
ok = False |
|||
for i in range(pos-1, -1, -1): |
|||
joining_type = idnadata.joining_types.get(ord(label[i])) |
|||
if joining_type == ord('T'): |
|||
continue |
|||
if joining_type in [ord('L'), ord('D')]: |
|||
ok = True |
|||
break |
|||
|
|||
if not ok: |
|||
return False |
|||
|
|||
ok = False |
|||
for i in range(pos+1, len(label)): |
|||
joining_type = idnadata.joining_types.get(ord(label[i])) |
|||
if joining_type == ord('T'): |
|||
continue |
|||
if joining_type in [ord('R'), ord('D')]: |
|||
ok = True |
|||
break |
|||
return ok |
|||
|
|||
if cp_value == 0x200d: |
|||
|
|||
if pos > 0: |
|||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class: |
|||
return True |
|||
return False |
|||
|
|||
else: |
|||
|
|||
return False |
|||
|
|||
|
|||
def valid_contexto(label, pos, exception=False): |
|||
|
|||
cp_value = ord(label[pos]) |
|||
|
|||
if cp_value == 0x00b7: |
|||
if 0 < pos < len(label)-1: |
|||
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c: |
|||
return True |
|||
return False |
|||
|
|||
elif cp_value == 0x0375: |
|||
if pos < len(label)-1 and len(label) > 1: |
|||
return _is_script(label[pos + 1], 'Greek') |
|||
return False |
|||
|
|||
elif cp_value == 0x05f3 or cp_value == 0x05f4: |
|||
if pos > 0: |
|||
return _is_script(label[pos - 1], 'Hebrew') |
|||
return False |
|||
|
|||
elif cp_value == 0x30fb: |
|||
for cp in label: |
|||
if cp == u'\u30fb': |
|||
continue |
|||
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): |
|||
return True |
|||
return False |
|||
|
|||
elif 0x660 <= cp_value <= 0x669: |
|||
for cp in label: |
|||
if 0x6f0 <= ord(cp) <= 0x06f9: |
|||
return False |
|||
return True |
|||
|
|||
elif 0x6f0 <= cp_value <= 0x6f9: |
|||
for cp in label: |
|||
if 0x660 <= ord(cp) <= 0x0669: |
|||
return False |
|||
return True |
|||
|
|||
|
|||
def check_label(label): |
|||
|
|||
if isinstance(label, (bytes, bytearray)): |
|||
label = label.decode('utf-8') |
|||
if len(label) == 0: |
|||
raise IDNAError('Empty Label') |
|||
|
|||
check_nfc(label) |
|||
check_hyphen_ok(label) |
|||
check_initial_combiner(label) |
|||
|
|||
for (pos, cp) in enumerate(label): |
|||
cp_value = ord(cp) |
|||
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']): |
|||
continue |
|||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): |
|||
try: |
|||
if not valid_contextj(label, pos): |
|||
raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format( |
|||
_unot(cp_value), pos+1, repr(label))) |
|||
except ValueError: |
|||
raise IDNAError('Unknown codepoint adjacent to joiner {0} at position {1} in {2}'.format( |
|||
_unot(cp_value), pos+1, repr(label))) |
|||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): |
|||
if not valid_contexto(label, pos): |
|||
raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label))) |
|||
else: |
|||
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label))) |
|||
|
|||
check_bidi(label) |
|||
|
|||
|
|||
def alabel(label): |
|||
|
|||
try: |
|||
label = label.encode('ascii') |
|||
ulabel(label) |
|||
if not valid_label_length(label): |
|||
raise IDNAError('Label too long') |
|||
return label |
|||
except UnicodeEncodeError: |
|||
pass |
|||
|
|||
if not label: |
|||
raise IDNAError('No Input') |
|||
|
|||
label = unicode(label) |
|||
check_label(label) |
|||
label = _punycode(label) |
|||
label = _alabel_prefix + label |
|||
|
|||
if not valid_label_length(label): |
|||
raise IDNAError('Label too long') |
|||
|
|||
return label |
|||
|
|||
|
|||
def ulabel(label): |
|||
|
|||
if not isinstance(label, (bytes, bytearray)): |
|||
try: |
|||
label = label.encode('ascii') |
|||
except UnicodeEncodeError: |
|||
check_label(label) |
|||
return label |
|||
|
|||
label = label.lower() |
|||
if label.startswith(_alabel_prefix): |
|||
label = label[len(_alabel_prefix):] |
|||
else: |
|||
check_label(label) |
|||
return label.decode('ascii') |
|||
|
|||
label = label.decode('punycode') |
|||
check_label(label) |
|||
return label |
|||
|
|||
|
|||
def uts46_remap(domain, std3_rules=True, transitional=False): |
|||
"""Re-map the characters in the string according to UTS46 processing.""" |
|||
from .uts46data import uts46data |
|||
output = u"" |
|||
try: |
|||
for pos, char in enumerate(domain): |
|||
code_point = ord(char) |
|||
uts46row = uts46data[code_point if code_point < 256 else |
|||
bisect.bisect_left(uts46data, (code_point, "Z")) - 1] |
|||
status = uts46row[1] |
|||
replacement = uts46row[2] if len(uts46row) == 3 else None |
|||
if (status == "V" or |
|||
(status == "D" and not transitional) or |
|||
(status == "3" and not std3_rules and replacement is None)): |
|||
output += char |
|||
elif replacement is not None and (status == "M" or |
|||
(status == "3" and not std3_rules) or |
|||
(status == "D" and transitional)): |
|||
output += replacement |
|||
elif status != "I": |
|||
raise IndexError() |
|||
return unicodedata.normalize("NFC", output) |
|||
except IndexError: |
|||
raise InvalidCodepoint( |
|||
"Codepoint {0} not allowed at position {1} in {2}".format( |
|||
_unot(code_point), pos + 1, repr(domain))) |
|||
|
|||
|
|||
def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False): |
|||
|
|||
if isinstance(s, (bytes, bytearray)): |
|||
s = s.decode("ascii") |
|||
if uts46: |
|||
s = uts46_remap(s, std3_rules, transitional) |
|||
trailing_dot = False |
|||
result = [] |
|||
if strict: |
|||
labels = s.split('.') |
|||
else: |
|||
labels = _unicode_dots_re.split(s) |
|||
if not labels or labels == ['']: |
|||
raise IDNAError('Empty domain') |
|||
if labels[-1] == '': |
|||
del labels[-1] |
|||
trailing_dot = True |
|||
for label in labels: |
|||
s = alabel(label) |
|||
if s: |
|||
result.append(s) |
|||
else: |
|||
raise IDNAError('Empty label') |
|||
if trailing_dot: |
|||
result.append(b'') |
|||
s = b'.'.join(result) |
|||
if not valid_string_length(s, trailing_dot): |
|||
raise IDNAError('Domain too long') |
|||
return s |
|||
|
|||
|
|||
def decode(s, strict=False, uts46=False, std3_rules=False): |
|||
|
|||
if isinstance(s, (bytes, bytearray)): |
|||
s = s.decode("ascii") |
|||
if uts46: |
|||
s = uts46_remap(s, std3_rules, False) |
|||
trailing_dot = False |
|||
result = [] |
|||
if not strict: |
|||
labels = _unicode_dots_re.split(s) |
|||
else: |
|||
labels = s.split(u'.') |
|||
if not labels or labels == ['']: |
|||
raise IDNAError('Empty domain') |
|||
if not labels[-1]: |
|||
del labels[-1] |
|||
trailing_dot = True |
|||
for label in labels: |
|||
s = ulabel(label) |
|||
if s: |
|||
result.append(s) |
|||
else: |
|||
raise IDNAError('Empty label') |
|||
if trailing_dot: |
|||
result.append(u'') |
|||
return u'.'.join(result) |
File diff suppressed because it is too large
@ -0,0 +1,53 @@ |
|||
""" |
|||
Given a list of integers, made up of (hopefully) a small number of long runs |
|||
of consecutive integers, compute a representation of the form |
|||
((start1, end1), (start2, end2) ...). Then answer the question "was x present |
|||
in the original list?" in time O(log(# runs)). |
|||
""" |
|||
|
|||
import bisect |
|||
|
|||
def intranges_from_list(list_): |
|||
"""Represent a list of integers as a sequence of ranges: |
|||
((start_0, end_0), (start_1, end_1), ...), such that the original |
|||
integers are exactly those x such that start_i <= x < end_i for some i. |
|||
|
|||
Ranges are encoded as single integers (start << 32 | end), not as tuples. |
|||
""" |
|||
|
|||
sorted_list = sorted(list_) |
|||
ranges = [] |
|||
last_write = -1 |
|||
for i in range(len(sorted_list)): |
|||
if i+1 < len(sorted_list): |
|||
if sorted_list[i] == sorted_list[i+1]-1: |
|||
continue |
|||
current_range = sorted_list[last_write+1:i+1] |
|||
ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) |
|||
last_write = i |
|||
|
|||
return tuple(ranges) |
|||
|
|||
def _encode_range(start, end): |
|||
return (start << 32) | end |
|||
|
|||
def _decode_range(r): |
|||
return (r >> 32), (r & ((1 << 32) - 1)) |
|||
|
|||
|
|||
def intranges_contain(int_, ranges): |
|||
"""Determine if `int_` falls into one of the ranges in `ranges`.""" |
|||
tuple_ = _encode_range(int_, 0) |
|||
pos = bisect.bisect_left(ranges, tuple_) |
|||
# we could be immediately ahead of a tuple (start, end) |
|||
# with start < int_ <= end |
|||
if pos > 0: |
|||
left, right = _decode_range(ranges[pos-1]) |
|||
if left <= int_ < right: |
|||
return True |
|||
# or we could be immediately behind a tuple (int_, end) |
|||
if pos < len(ranges): |
|||
left, _ = _decode_range(ranges[pos]) |
|||
if left == int_: |
|||
return True |
|||
return False |
@ -0,0 +1,2 @@ |
|||
__version__ = '2.8' |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,131 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
# __ |
|||
# /__) _ _ _ _ _/ _ |
|||
# / ( (- (/ (/ (- _) / _) |
|||
# / |
|||
|
|||
""" |
|||
Requests HTTP Library |
|||
~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Requests is an HTTP library, written in Python, for human beings. Basic GET |
|||
usage: |
|||
|
|||
>>> import requests |
|||
>>> r = requests.get('https://www.python.org') |
|||
>>> r.status_code |
|||
200 |
|||
>>> 'Python is a programming language' in r.content |
|||
True |
|||
|
|||
... or POST: |
|||
|
|||
>>> payload = dict(key1='value1', key2='value2') |
|||
>>> r = requests.post('https://httpbin.org/post', data=payload) |
|||
>>> print(r.text) |
|||
{ |
|||
... |
|||
"form": { |
|||
"key2": "value2", |
|||
"key1": "value1" |
|||
}, |
|||
... |
|||
} |
|||
|
|||
The other HTTP methods are supported - see `requests.api`. Full documentation |
|||
is at <http://python-requests.org>. |
|||
|
|||
:copyright: (c) 2017 by Kenneth Reitz. |
|||
:license: Apache 2.0, see LICENSE for more details. |
|||
""" |
|||
|
|||
import urllib3 |
|||
import chardet |
|||
import warnings |
|||
from .exceptions import RequestsDependencyWarning |
|||
|
|||
|
|||
def check_compatibility(urllib3_version, chardet_version): |
|||
urllib3_version = urllib3_version.split('.') |
|||
assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. |
|||
|
|||
# Sometimes, urllib3 only reports its version as 16.1. |
|||
if len(urllib3_version) == 2: |
|||
urllib3_version.append('0') |
|||
|
|||
# Check urllib3 for compatibility. |
|||
major, minor, patch = urllib3_version # noqa: F811 |
|||
major, minor, patch = int(major), int(minor), int(patch) |
|||
# urllib3 >= 1.21.1, <= 1.24 |
|||
assert major == 1 |
|||
assert minor >= 21 |
|||
assert minor <= 24 |
|||
|
|||
# Check chardet for compatibility. |
|||
major, minor, patch = chardet_version.split('.')[:3] |
|||
major, minor, patch = int(major), int(minor), int(patch) |
|||
# chardet >= 3.0.2, < 3.1.0 |
|||
assert major == 3 |
|||
assert minor < 1 |
|||
assert patch >= 2 |
|||
|
|||
|
|||
def _check_cryptography(cryptography_version): |
|||
# cryptography < 1.3.4 |
|||
try: |
|||
cryptography_version = list(map(int, cryptography_version.split('.'))) |
|||
except ValueError: |
|||
return |
|||
|
|||
if cryptography_version < [1, 3, 4]: |
|||
warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) |
|||
warnings.warn(warning, RequestsDependencyWarning) |
|||
|
|||
# Check imported dependencies for compatibility. |
|||
try: |
|||
check_compatibility(urllib3.__version__, chardet.__version__) |
|||
except (AssertionError, ValueError): |
|||
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " |
|||
"version!".format(urllib3.__version__, chardet.__version__), |
|||
RequestsDependencyWarning) |
|||
|
|||
# Attempt to enable urllib3's SNI support, if possible |
|||
try: |
|||
from urllib3.contrib import pyopenssl |
|||
pyopenssl.inject_into_urllib3() |
|||
|
|||
# Check cryptography version |
|||
from cryptography import __version__ as cryptography_version |
|||
_check_cryptography(cryptography_version) |
|||
except ImportError: |
|||
pass |
|||
|
|||
# urllib3's DependencyWarnings should be silenced. |
|||
from urllib3.exceptions import DependencyWarning |
|||
warnings.simplefilter('ignore', DependencyWarning) |
|||
|
|||
from .__version__ import __title__, __description__, __url__, __version__ |
|||
from .__version__ import __build__, __author__, __author_email__, __license__ |
|||
from .__version__ import __copyright__, __cake__ |
|||
|
|||
from . import utils |
|||
from . import packages |
|||
from .models import Request, Response, PreparedRequest |
|||
from .api import request, get, head, post, patch, put, delete, options |
|||
from .sessions import session, Session |
|||
from .status_codes import codes |
|||
from .exceptions import ( |
|||
RequestException, Timeout, URLRequired, |
|||
TooManyRedirects, HTTPError, ConnectionError, |
|||
FileModeWarning, ConnectTimeout, ReadTimeout |
|||
) |
|||
|
|||
# Set default logging handler to avoid "No handler found" warnings. |
|||
import logging |
|||
from logging import NullHandler |
|||
|
|||
logging.getLogger(__name__).addHandler(NullHandler()) |
|||
|
|||
# FileModeWarnings go off per the default. |
|||
warnings.simplefilter('default', FileModeWarning, append=True) |
@ -0,0 +1,14 @@ |
|||
# .-. .-. .-. . . .-. .-. .-. .-. |
|||
# |( |- |.| | | |- `-. | `-. |
|||
# ' ' `-' `-`.`-' `-' `-' ' `-' |
|||
|
|||
__title__ = 'requests' |
|||
__description__ = 'Python HTTP for Humans.' |
|||
__url__ = 'http://python-requests.org' |
|||
__version__ = '2.21.0' |
|||
__build__ = 0x022100 |
|||
__author__ = 'Kenneth Reitz' |
|||
__author_email__ = 'me@kennethreitz.org' |
|||
__license__ = 'Apache 2.0' |
|||
__copyright__ = 'Copyright 2018 Kenneth Reitz' |
|||
__cake__ = u'\u2728 \U0001f370 \u2728' |
@ -0,0 +1,42 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests._internal_utils |
|||
~~~~~~~~~~~~~~ |
|||
|
|||
Provides utility functions that are consumed internally by Requests |
|||
which depend on extremely few external helpers (such as compat) |
|||
""" |
|||
|
|||
from .compat import is_py2, builtin_str, str |
|||
|
|||
|
|||
def to_native_string(string, encoding='ascii'): |
|||
"""Given a string object, regardless of type, returns a representation of |
|||
that string in the native string type, encoding and decoding where |
|||
necessary. This assumes ASCII unless told otherwise. |
|||
""" |
|||
if isinstance(string, builtin_str): |
|||
out = string |
|||
else: |
|||
if is_py2: |
|||
out = string.encode(encoding) |
|||
else: |
|||
out = string.decode(encoding) |
|||
|
|||
return out |
|||
|
|||
|
|||
def unicode_is_ascii(u_string): |
|||
"""Determine if unicode string only contains ASCII characters. |
|||
|
|||
:param str u_string: unicode string to check. Must be unicode |
|||
and not Python 2 `str`. |
|||
:rtype: bool |
|||
""" |
|||
assert isinstance(u_string, str) |
|||
try: |
|||
u_string.encode('ascii') |
|||
return True |
|||
except UnicodeEncodeError: |
|||
return False |
@ -0,0 +1,533 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.adapters |
|||
~~~~~~~~~~~~~~~~~ |
|||
|
|||
This module contains the transport adapters that Requests uses to define |
|||
and maintain connections. |
|||
""" |
|||
|
|||
import os.path |
|||
import socket |
|||
|
|||
from urllib3.poolmanager import PoolManager, proxy_from_url |
|||
from urllib3.response import HTTPResponse |
|||
from urllib3.util import parse_url |
|||
from urllib3.util import Timeout as TimeoutSauce |
|||
from urllib3.util.retry import Retry |
|||
from urllib3.exceptions import ClosedPoolError |
|||
from urllib3.exceptions import ConnectTimeoutError |
|||
from urllib3.exceptions import HTTPError as _HTTPError |
|||
from urllib3.exceptions import MaxRetryError |
|||
from urllib3.exceptions import NewConnectionError |
|||
from urllib3.exceptions import ProxyError as _ProxyError |
|||
from urllib3.exceptions import ProtocolError |
|||
from urllib3.exceptions import ReadTimeoutError |
|||
from urllib3.exceptions import SSLError as _SSLError |
|||
from urllib3.exceptions import ResponseError |
|||
from urllib3.exceptions import LocationValueError |
|||
|
|||
from .models import Response |
|||
from .compat import urlparse, basestring |
|||
from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, |
|||
get_encoding_from_headers, prepend_scheme_if_needed, |
|||
get_auth_from_url, urldefragauth, select_proxy) |
|||
from .structures import CaseInsensitiveDict |
|||
from .cookies import extract_cookies_to_jar |
|||
from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, |
|||
ProxyError, RetryError, InvalidSchema, InvalidProxyURL, |
|||
InvalidURL) |
|||
from .auth import _basic_auth_str |
|||
|
|||
try: |
|||
from urllib3.contrib.socks import SOCKSProxyManager |
|||
except ImportError: |
|||
def SOCKSProxyManager(*args, **kwargs): |
|||
raise InvalidSchema("Missing dependencies for SOCKS support.") |
|||
|
|||
DEFAULT_POOLBLOCK = False |
|||
DEFAULT_POOLSIZE = 10 |
|||
DEFAULT_RETRIES = 0 |
|||
DEFAULT_POOL_TIMEOUT = None |
|||
|
|||
|
|||
class BaseAdapter(object): |
|||
"""The Base Transport Adapter""" |
|||
|
|||
def __init__(self): |
|||
super(BaseAdapter, self).__init__() |
|||
|
|||
def send(self, request, stream=False, timeout=None, verify=True, |
|||
cert=None, proxies=None): |
|||
"""Sends PreparedRequest object. Returns Response object. |
|||
|
|||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent. |
|||
:param stream: (optional) Whether to stream the request content. |
|||
:param timeout: (optional) How long to wait for the server to send |
|||
data before giving up, as a float, or a :ref:`(connect timeout, |
|||
read timeout) <timeouts>` tuple. |
|||
:type timeout: float or tuple |
|||
:param verify: (optional) Either a boolean, in which case it controls whether we verify |
|||
the server's TLS certificate, or a string, in which case it must be a path |
|||
to a CA bundle to use |
|||
:param cert: (optional) Any user-provided SSL certificate to be trusted. |
|||
:param proxies: (optional) The proxies dictionary to apply to the request. |
|||
""" |
|||
raise NotImplementedError |
|||
|
|||
def close(self): |
|||
"""Cleans up adapter specific items.""" |
|||
raise NotImplementedError |
|||
|
|||
|
|||
class HTTPAdapter(BaseAdapter): |
|||
"""The built-in HTTP Adapter for urllib3. |
|||
|
|||
Provides a general-case interface for Requests sessions to contact HTTP and |
|||
HTTPS urls by implementing the Transport Adapter interface. This class will |
|||
usually be created by the :class:`Session <Session>` class under the |
|||
covers. |
|||
|
|||
:param pool_connections: The number of urllib3 connection pools to cache. |
|||
:param pool_maxsize: The maximum number of connections to save in the pool. |
|||
:param max_retries: The maximum number of retries each connection |
|||
should attempt. Note, this applies only to failed DNS lookups, socket |
|||
connections and connection timeouts, never to requests where data has |
|||
made it to the server. By default, Requests does not retry failed |
|||
connections. If you need granular control over the conditions under |
|||
which we retry a request, import urllib3's ``Retry`` class and pass |
|||
that instead. |
|||
:param pool_block: Whether the connection pool should block for connections. |
|||
|
|||
Usage:: |
|||
|
|||
>>> import requests |
|||
>>> s = requests.Session() |
|||
>>> a = requests.adapters.HTTPAdapter(max_retries=3) |
|||
>>> s.mount('http://', a) |
|||
""" |
|||
__attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize', |
|||
'_pool_block'] |
|||
|
|||
def __init__(self, pool_connections=DEFAULT_POOLSIZE, |
|||
pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, |
|||
pool_block=DEFAULT_POOLBLOCK): |
|||
if max_retries == DEFAULT_RETRIES: |
|||
self.max_retries = Retry(0, read=False) |
|||
else: |
|||
self.max_retries = Retry.from_int(max_retries) |
|||
self.config = {} |
|||
self.proxy_manager = {} |
|||
|
|||
super(HTTPAdapter, self).__init__() |
|||
|
|||
self._pool_connections = pool_connections |
|||
self._pool_maxsize = pool_maxsize |
|||
self._pool_block = pool_block |
|||
|
|||
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block) |
|||
|
|||
def __getstate__(self): |
|||
return {attr: getattr(self, attr, None) for attr in self.__attrs__} |
|||
|
|||
def __setstate__(self, state): |
|||
# Can't handle by adding 'proxy_manager' to self.__attrs__ because |
|||
# self.poolmanager uses a lambda function, which isn't pickleable. |
|||
self.proxy_manager = {} |
|||
self.config = {} |
|||
|
|||
for attr, value in state.items(): |
|||
setattr(self, attr, value) |
|||
|
|||
self.init_poolmanager(self._pool_connections, self._pool_maxsize, |
|||
block=self._pool_block) |
|||
|
|||
def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): |
|||
"""Initializes a urllib3 PoolManager. |
|||
|
|||
This method should not be called from user code, and is only |
|||
exposed for use when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param connections: The number of urllib3 connection pools to cache. |
|||
:param maxsize: The maximum number of connections to save in the pool. |
|||
:param block: Block when no free connections are available. |
|||
:param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager. |
|||
""" |
|||
# save these values for pickling |
|||
self._pool_connections = connections |
|||
self._pool_maxsize = maxsize |
|||
self._pool_block = block |
|||
|
|||
self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, |
|||
block=block, strict=True, **pool_kwargs) |
|||
|
|||
def proxy_manager_for(self, proxy, **proxy_kwargs): |
|||
"""Return urllib3 ProxyManager for the given proxy. |
|||
|
|||
This method should not be called from user code, and is only |
|||
exposed for use when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param proxy: The proxy to return a urllib3 ProxyManager for. |
|||
:param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. |
|||
:returns: ProxyManager |
|||
:rtype: urllib3.ProxyManager |
|||
""" |
|||
if proxy in self.proxy_manager: |
|||
manager = self.proxy_manager[proxy] |
|||
elif proxy.lower().startswith('socks'): |
|||
username, password = get_auth_from_url(proxy) |
|||
manager = self.proxy_manager[proxy] = SOCKSProxyManager( |
|||
proxy, |
|||
username=username, |
|||
password=password, |
|||
num_pools=self._pool_connections, |
|||
maxsize=self._pool_maxsize, |
|||
block=self._pool_block, |
|||
**proxy_kwargs |
|||
) |
|||
else: |
|||
proxy_headers = self.proxy_headers(proxy) |
|||
manager = self.proxy_manager[proxy] = proxy_from_url( |
|||
proxy, |
|||
proxy_headers=proxy_headers, |
|||
num_pools=self._pool_connections, |
|||
maxsize=self._pool_maxsize, |
|||
block=self._pool_block, |
|||
**proxy_kwargs) |
|||
|
|||
return manager |
|||
|
|||
def cert_verify(self, conn, url, verify, cert): |
|||
"""Verify a SSL certificate. This method should not be called from user |
|||
code, and is only exposed for use when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param conn: The urllib3 connection object associated with the cert. |
|||
:param url: The requested URL. |
|||
:param verify: Either a boolean, in which case it controls whether we verify |
|||
the server's TLS certificate, or a string, in which case it must be a path |
|||
to a CA bundle to use |
|||
:param cert: The SSL certificate to verify. |
|||
""" |
|||
if url.lower().startswith('https') and verify: |
|||
|
|||
cert_loc = None |
|||
|
|||
# Allow self-specified cert location. |
|||
if verify is not True: |
|||
cert_loc = verify |
|||
|
|||
if not cert_loc: |
|||
cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH) |
|||
|
|||
if not cert_loc or not os.path.exists(cert_loc): |
|||
raise IOError("Could not find a suitable TLS CA certificate bundle, " |
|||
"invalid path: {}".format(cert_loc)) |
|||
|
|||
conn.cert_reqs = 'CERT_REQUIRED' |
|||
|
|||
if not os.path.isdir(cert_loc): |
|||
conn.ca_certs = cert_loc |
|||
else: |
|||
conn.ca_cert_dir = cert_loc |
|||
else: |
|||
conn.cert_reqs = 'CERT_NONE' |
|||
conn.ca_certs = None |
|||
conn.ca_cert_dir = None |
|||
|
|||
if cert: |
|||
if not isinstance(cert, basestring): |
|||
conn.cert_file = cert[0] |
|||
conn.key_file = cert[1] |
|||
else: |
|||
conn.cert_file = cert |
|||
conn.key_file = None |
|||
if conn.cert_file and not os.path.exists(conn.cert_file): |
|||
raise IOError("Could not find the TLS certificate file, " |
|||
"invalid path: {}".format(conn.cert_file)) |
|||
if conn.key_file and not os.path.exists(conn.key_file): |
|||
raise IOError("Could not find the TLS key file, " |
|||
"invalid path: {}".format(conn.key_file)) |
|||
|
|||
def build_response(self, req, resp): |
|||
"""Builds a :class:`Response <requests.Response>` object from a urllib3 |
|||
response. This should not be called from user code, and is only exposed |
|||
for use when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>` |
|||
|
|||
:param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response. |
|||
:param resp: The urllib3 response object. |
|||
:rtype: requests.Response |
|||
""" |
|||
response = Response() |
|||
|
|||
# Fallback to None if there's no status_code, for whatever reason. |
|||
response.status_code = getattr(resp, 'status', None) |
|||
|
|||
# Make headers case-insensitive. |
|||
response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) |
|||
|
|||
# Set encoding. |
|||
response.encoding = get_encoding_from_headers(response.headers) |
|||
response.raw = resp |
|||
response.reason = response.raw.reason |
|||
|
|||
if isinstance(req.url, bytes): |
|||
response.url = req.url.decode('utf-8') |
|||
else: |
|||
response.url = req.url |
|||
|
|||
# Add new cookies from the server. |
|||
extract_cookies_to_jar(response.cookies, req, resp) |
|||
|
|||
# Give the Response some context. |
|||
response.request = req |
|||
response.connection = self |
|||
|
|||
return response |
|||
|
|||
def get_connection(self, url, proxies=None): |
|||
"""Returns a urllib3 connection for the given URL. This should not be |
|||
called from user code, and is only exposed for use when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param url: The URL to connect to. |
|||
:param proxies: (optional) A Requests-style dictionary of proxies used on this request. |
|||
:rtype: urllib3.ConnectionPool |
|||
""" |
|||
proxy = select_proxy(url, proxies) |
|||
|
|||
if proxy: |
|||
proxy = prepend_scheme_if_needed(proxy, 'http') |
|||
proxy_url = parse_url(proxy) |
|||
if not proxy_url.host: |
|||
raise InvalidProxyURL("Please check proxy URL. It is malformed" |
|||
" and could be missing the host.") |
|||
proxy_manager = self.proxy_manager_for(proxy) |
|||
conn = proxy_manager.connection_from_url(url) |
|||
else: |
|||
# Only scheme should be lower case |
|||
parsed = urlparse(url) |
|||
url = parsed.geturl() |
|||
conn = self.poolmanager.connection_from_url(url) |
|||
|
|||
return conn |
|||
|
|||
def close(self): |
|||
"""Disposes of any internal state. |
|||
|
|||
Currently, this closes the PoolManager and any active ProxyManager, |
|||
which closes any pooled connections. |
|||
""" |
|||
self.poolmanager.clear() |
|||
for proxy in self.proxy_manager.values(): |
|||
proxy.clear() |
|||
|
|||
def request_url(self, request, proxies): |
|||
"""Obtain the url to use when making the final request. |
|||
|
|||
If the message is being sent through a HTTP proxy, the full URL has to |
|||
be used. Otherwise, we should only use the path portion of the URL. |
|||
|
|||
This should not be called from user code, and is only exposed for use |
|||
when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent. |
|||
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs. |
|||
:rtype: str |
|||
""" |
|||
proxy = select_proxy(request.url, proxies) |
|||
scheme = urlparse(request.url).scheme |
|||
|
|||
is_proxied_http_request = (proxy and scheme != 'https') |
|||
using_socks_proxy = False |
|||
if proxy: |
|||
proxy_scheme = urlparse(proxy).scheme.lower() |
|||
using_socks_proxy = proxy_scheme.startswith('socks') |
|||
|
|||
url = request.path_url |
|||
if is_proxied_http_request and not using_socks_proxy: |
|||
url = urldefragauth(request.url) |
|||
|
|||
return url |
|||
|
|||
def add_headers(self, request, **kwargs): |
|||
"""Add any headers needed by the connection. As of v2.0 this does |
|||
nothing by default, but is left for overriding by users that subclass |
|||
the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
This should not be called from user code, and is only exposed for use |
|||
when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to. |
|||
:param kwargs: The keyword arguments from the call to send(). |
|||
""" |
|||
pass |
|||
|
|||
def proxy_headers(self, proxy): |
|||
"""Returns a dictionary of the headers to add to any request sent |
|||
through a proxy. This works with urllib3 magic to ensure that they are |
|||
correctly sent to the proxy, rather than in a tunnelled request if |
|||
CONNECT is being used. |
|||
|
|||
This should not be called from user code, and is only exposed for use |
|||
when subclassing the |
|||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. |
|||
|
|||
:param proxy: The url of the proxy being used for this request. |
|||
:rtype: dict |
|||
""" |
|||
headers = {} |
|||
username, password = get_auth_from_url(proxy) |
|||
|
|||
if username: |
|||
headers['Proxy-Authorization'] = _basic_auth_str(username, |
|||
password) |
|||
|
|||
return headers |
|||
|
|||
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): |
|||
"""Sends PreparedRequest object. Returns Response object. |
|||
|
|||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent. |
|||
:param stream: (optional) Whether to stream the request content. |
|||
:param timeout: (optional) How long to wait for the server to send |
|||
data before giving up, as a float, or a :ref:`(connect timeout, |
|||
read timeout) <timeouts>` tuple. |
|||
:type timeout: float or tuple or urllib3 Timeout object |
|||
:param verify: (optional) Either a boolean, in which case it controls whether |
|||
we verify the server's TLS certificate, or a string, in which case it |
|||
must be a path to a CA bundle to use |
|||
:param cert: (optional) Any user-provided SSL certificate to be trusted. |
|||
:param proxies: (optional) The proxies dictionary to apply to the request. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
try: |
|||
conn = self.get_connection(request.url, proxies) |
|||
except LocationValueError as e: |
|||
raise InvalidURL(e, request=request) |
|||
|
|||
self.cert_verify(conn, request.url, verify, cert) |
|||
url = self.request_url(request, proxies) |
|||
self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) |
|||
|
|||
chunked = not (request.body is None or 'Content-Length' in request.headers) |
|||
|
|||
if isinstance(timeout, tuple): |
|||
try: |
|||
connect, read = timeout |
|||
timeout = TimeoutSauce(connect=connect, read=read) |
|||
except ValueError as e: |
|||
# this may raise a string formatting error. |
|||
err = ("Invalid timeout {}. Pass a (connect, read) " |
|||
"timeout tuple, or a single float to set " |
|||
"both timeouts to the same value".format(timeout)) |
|||
raise ValueError(err) |
|||
elif isinstance(timeout, TimeoutSauce): |
|||
pass |
|||
else: |
|||
timeout = TimeoutSauce(connect=timeout, read=timeout) |
|||
|
|||
try: |
|||
if not chunked: |
|||
resp = conn.urlopen( |
|||
method=request.method, |
|||
url=url, |
|||
body=request.body, |
|||
headers=request.headers, |
|||
redirect=False, |
|||
assert_same_host=False, |
|||
preload_content=False, |
|||
decode_content=False, |
|||
retries=self.max_retries, |
|||
timeout=timeout |
|||
) |
|||
|
|||
# Send the request. |
|||
else: |
|||
if hasattr(conn, 'proxy_pool'): |
|||
conn = conn.proxy_pool |
|||
|
|||
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) |
|||
|
|||
try: |
|||
low_conn.putrequest(request.method, |
|||
url, |
|||
skip_accept_encoding=True) |
|||
|
|||
for header, value in request.headers.items(): |
|||
low_conn.putheader(header, value) |
|||
|
|||
low_conn.endheaders() |
|||
|
|||
for i in request.body: |
|||
low_conn.send(hex(len(i))[2:].encode('utf-8')) |
|||
low_conn.send(b'\r\n') |
|||
low_conn.send(i) |
|||
low_conn.send(b'\r\n') |
|||
low_conn.send(b'0\r\n\r\n') |
|||
|
|||
# Receive the response from the server |
|||
try: |
|||
# For Python 2.7, use buffering of HTTP responses |
|||
r = low_conn.getresponse(buffering=True) |
|||
except TypeError: |
|||
# For compatibility with Python 3.3+ |
|||
r = low_conn.getresponse() |
|||
|
|||
resp = HTTPResponse.from_httplib( |
|||
r, |
|||
pool=conn, |
|||
connection=low_conn, |
|||
preload_content=False, |
|||
decode_content=False |
|||
) |
|||
except: |
|||
# If we hit any problems here, clean up the connection. |
|||
# Then, reraise so that we can handle the actual exception. |
|||
low_conn.close() |
|||
raise |
|||
|
|||
except (ProtocolError, socket.error) as err: |
|||
raise ConnectionError(err, request=request) |
|||
|
|||
except MaxRetryError as e: |
|||
if isinstance(e.reason, ConnectTimeoutError): |
|||
# TODO: Remove this in 3.0.0: see #2811 |
|||
if not isinstance(e.reason, NewConnectionError): |
|||
raise ConnectTimeout(e, request=request) |
|||
|
|||
if isinstance(e.reason, ResponseError): |
|||
raise RetryError(e, request=request) |
|||
|
|||
if isinstance(e.reason, _ProxyError): |
|||
raise ProxyError(e, request=request) |
|||
|
|||
if isinstance(e.reason, _SSLError): |
|||
# This branch is for urllib3 v1.22 and later. |
|||
raise SSLError(e, request=request) |
|||
|
|||
raise ConnectionError(e, request=request) |
|||
|
|||
except ClosedPoolError as e: |
|||
raise ConnectionError(e, request=request) |
|||
|
|||
except _ProxyError as e: |
|||
raise ProxyError(e) |
|||
|
|||
except (_SSLError, _HTTPError) as e: |
|||
if isinstance(e, _SSLError): |
|||
# This branch is for urllib3 versions earlier than v1.22 |
|||
raise SSLError(e, request=request) |
|||
elif isinstance(e, ReadTimeoutError): |
|||
raise ReadTimeout(e, request=request) |
|||
else: |
|||
raise |
|||
|
|||
return self.build_response(request, resp) |
@ -0,0 +1,158 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.api |
|||
~~~~~~~~~~~~ |
|||
|
|||
This module implements the Requests API. |
|||
|
|||
:copyright: (c) 2012 by Kenneth Reitz. |
|||
:license: Apache2, see LICENSE for more details. |
|||
""" |
|||
|
|||
from . import sessions |
|||
|
|||
|
|||
def request(method, url, **kwargs): |
|||
"""Constructs and sends a :class:`Request <Request>`. |
|||
|
|||
:param method: method for the new :class:`Request` object. |
|||
:param url: URL for the new :class:`Request` object. |
|||
:param params: (optional) Dictionary, list of tuples or bytes to send |
|||
in the body of the :class:`Request`. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`. |
|||
:param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. |
|||
:param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. |
|||
:param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. |
|||
``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')`` |
|||
or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string |
|||
defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers |
|||
to add for the file. |
|||
:param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. |
|||
:param timeout: (optional) How many seconds to wait for the server to send data |
|||
before giving up, as a float, or a :ref:`(connect timeout, read |
|||
timeout) <timeouts>` tuple. |
|||
:type timeout: float or tuple |
|||
:param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``. |
|||
:type allow_redirects: bool |
|||
:param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. |
|||
:param verify: (optional) Either a boolean, in which case it controls whether we verify |
|||
the server's TLS certificate, or a string, in which case it must be a path |
|||
to a CA bundle to use. Defaults to ``True``. |
|||
:param stream: (optional) if ``False``, the response content will be immediately downloaded. |
|||
:param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
|
|||
Usage:: |
|||
|
|||
>>> import requests |
|||
>>> req = requests.request('GET', 'https://httpbin.org/get') |
|||
<Response [200]> |
|||
""" |
|||
|
|||
# By using the 'with' statement we are sure the session is closed, thus we |
|||
# avoid leaving sockets open which can trigger a ResourceWarning in some |
|||
# cases, and look like a memory leak in others. |
|||
with sessions.Session() as session: |
|||
return session.request(method=method, url=url, **kwargs) |
|||
|
|||
|
|||
def get(url, params=None, **kwargs): |
|||
r"""Sends a GET request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param params: (optional) Dictionary, list of tuples or bytes to send |
|||
in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', True) |
|||
return request('get', url, params=params, **kwargs) |
|||
|
|||
|
|||
def options(url, **kwargs): |
|||
r"""Sends an OPTIONS request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', True) |
|||
return request('options', url, **kwargs) |
|||
|
|||
|
|||
def head(url, **kwargs): |
|||
r"""Sends a HEAD request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', False) |
|||
return request('head', url, **kwargs) |
|||
|
|||
|
|||
def post(url, data=None, json=None, **kwargs): |
|||
r"""Sends a POST request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) json data to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return request('post', url, data=data, json=json, **kwargs) |
|||
|
|||
|
|||
def put(url, data=None, **kwargs): |
|||
r"""Sends a PUT request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) json data to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return request('put', url, data=data, **kwargs) |
|||
|
|||
|
|||
def patch(url, data=None, **kwargs): |
|||
r"""Sends a PATCH request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) json data to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return request('patch', url, data=data, **kwargs) |
|||
|
|||
|
|||
def delete(url, **kwargs): |
|||
r"""Sends a DELETE request. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:return: :class:`Response <Response>` object |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return request('delete', url, **kwargs) |
@ -0,0 +1,305 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.auth |
|||
~~~~~~~~~~~~~ |
|||
|
|||
This module contains the authentication handlers for Requests. |
|||
""" |
|||
|
|||
import os |
|||
import re |
|||
import time |
|||
import hashlib |
|||
import threading |
|||
import warnings |
|||
|
|||
from base64 import b64encode |
|||
|
|||
from .compat import urlparse, str, basestring |
|||
from .cookies import extract_cookies_to_jar |
|||
from ._internal_utils import to_native_string |
|||
from .utils import parse_dict_header |
|||
|
|||
CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' |
|||
CONTENT_TYPE_MULTI_PART = 'multipart/form-data' |
|||
|
|||
|
|||
def _basic_auth_str(username, password): |
|||
"""Returns a Basic Auth string.""" |
|||
|
|||
# "I want us to put a big-ol' comment on top of it that |
|||
# says that this behaviour is dumb but we need to preserve |
|||
# it because people are relying on it." |
|||
# - Lukasa |
|||
# |
|||
# These are here solely to maintain backwards compatibility |
|||
# for things like ints. This will be removed in 3.0.0. |
|||
if not isinstance(username, basestring): |
|||
warnings.warn( |
|||
"Non-string usernames will no longer be supported in Requests " |
|||
"3.0.0. Please convert the object you've passed in ({!r}) to " |
|||
"a string or bytes object in the near future to avoid " |
|||
"problems.".format(username), |
|||
category=DeprecationWarning, |
|||
) |
|||
username = str(username) |
|||
|
|||
if not isinstance(password, basestring): |
|||
warnings.warn( |
|||
"Non-string passwords will no longer be supported in Requests " |
|||
"3.0.0. Please convert the object you've passed in ({!r}) to " |
|||
"a string or bytes object in the near future to avoid " |
|||
"problems.".format(password), |
|||
category=DeprecationWarning, |
|||
) |
|||
password = str(password) |
|||
# -- End Removal -- |
|||
|
|||
if isinstance(username, str): |
|||
username = username.encode('latin1') |
|||
|
|||
if isinstance(password, str): |
|||
password = password.encode('latin1') |
|||
|
|||
authstr = 'Basic ' + to_native_string( |
|||
b64encode(b':'.join((username, password))).strip() |
|||
) |
|||
|
|||
return authstr |
|||
|
|||
|
|||
class AuthBase(object): |
|||
"""Base class that all auth implementations derive from""" |
|||
|
|||
def __call__(self, r): |
|||
raise NotImplementedError('Auth hooks must be callable.') |
|||
|
|||
|
|||
class HTTPBasicAuth(AuthBase): |
|||
"""Attaches HTTP Basic Authentication to the given Request object.""" |
|||
|
|||
def __init__(self, username, password): |
|||
self.username = username |
|||
self.password = password |
|||
|
|||
def __eq__(self, other): |
|||
return all([ |
|||
self.username == getattr(other, 'username', None), |
|||
self.password == getattr(other, 'password', None) |
|||
]) |
|||
|
|||
def __ne__(self, other): |
|||
return not self == other |
|||
|
|||
def __call__(self, r): |
|||
r.headers['Authorization'] = _basic_auth_str(self.username, self.password) |
|||
return r |
|||
|
|||
|
|||
class HTTPProxyAuth(HTTPBasicAuth): |
|||
"""Attaches HTTP Proxy Authentication to a given Request object.""" |
|||
|
|||
def __call__(self, r): |
|||
r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) |
|||
return r |
|||
|
|||
|
|||
class HTTPDigestAuth(AuthBase): |
|||
"""Attaches HTTP Digest Authentication to the given Request object.""" |
|||
|
|||
def __init__(self, username, password): |
|||
self.username = username |
|||
self.password = password |
|||
# Keep state in per-thread local storage |
|||
self._thread_local = threading.local() |
|||
|
|||
def init_per_thread_state(self): |
|||
# Ensure state is initialized just once per-thread |
|||
if not hasattr(self._thread_local, 'init'): |
|||
self._thread_local.init = True |
|||
self._thread_local.last_nonce = '' |
|||
self._thread_local.nonce_count = 0 |
|||
self._thread_local.chal = {} |
|||
self._thread_local.pos = None |
|||
self._thread_local.num_401_calls = None |
|||
|
|||
def build_digest_header(self, method, url): |
|||
""" |
|||
:rtype: str |
|||
""" |
|||
|
|||
realm = self._thread_local.chal['realm'] |
|||
nonce = self._thread_local.chal['nonce'] |
|||
qop = self._thread_local.chal.get('qop') |
|||
algorithm = self._thread_local.chal.get('algorithm') |
|||
opaque = self._thread_local.chal.get('opaque') |
|||
hash_utf8 = None |
|||
|
|||
if algorithm is None: |
|||
_algorithm = 'MD5' |
|||
else: |
|||
_algorithm = algorithm.upper() |
|||
# lambdas assume digest modules are imported at the top level |
|||
if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': |
|||
def md5_utf8(x): |
|||
if isinstance(x, str): |
|||
x = x.encode('utf-8') |
|||
return hashlib.md5(x).hexdigest() |
|||
hash_utf8 = md5_utf8 |
|||
elif _algorithm == 'SHA': |
|||
def sha_utf8(x): |
|||
if isinstance(x, str): |
|||
x = x.encode('utf-8') |
|||
return hashlib.sha1(x).hexdigest() |
|||
hash_utf8 = sha_utf8 |
|||
elif _algorithm == 'SHA-256': |
|||
def sha256_utf8(x): |
|||
if isinstance(x, str): |
|||
x = x.encode('utf-8') |
|||
return hashlib.sha256(x).hexdigest() |
|||
hash_utf8 = sha256_utf8 |
|||
elif _algorithm == 'SHA-512': |
|||
def sha512_utf8(x): |
|||
if isinstance(x, str): |
|||
x = x.encode('utf-8') |
|||
return hashlib.sha512(x).hexdigest() |
|||
hash_utf8 = sha512_utf8 |
|||
|
|||
KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) |
|||
|
|||
if hash_utf8 is None: |
|||
return None |
|||
|
|||
# XXX not implemented yet |
|||
entdig = None |
|||
p_parsed = urlparse(url) |
|||
#: path is request-uri defined in RFC 2616 which should not be empty |
|||
path = p_parsed.path or "/" |
|||
if p_parsed.query: |
|||
path += '?' + p_parsed.query |
|||
|
|||
A1 = '%s:%s:%s' % (self.username, realm, self.password) |
|||
A2 = '%s:%s' % (method, path) |
|||
|
|||
HA1 = hash_utf8(A1) |
|||
HA2 = hash_utf8(A2) |
|||
|
|||
if nonce == self._thread_local.last_nonce: |
|||
self._thread_local.nonce_count += 1 |
|||
else: |
|||
self._thread_local.nonce_count = 1 |
|||
ncvalue = '%08x' % self._thread_local.nonce_count |
|||
s = str(self._thread_local.nonce_count).encode('utf-8') |
|||
s += nonce.encode('utf-8') |
|||
s += time.ctime().encode('utf-8') |
|||
s += os.urandom(8) |
|||
|
|||
cnonce = (hashlib.sha1(s).hexdigest()[:16]) |
|||
if _algorithm == 'MD5-SESS': |
|||
HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) |
|||
|
|||
if not qop: |
|||
respdig = KD(HA1, "%s:%s" % (nonce, HA2)) |
|||
elif qop == 'auth' or 'auth' in qop.split(','): |
|||
noncebit = "%s:%s:%s:%s:%s" % ( |
|||
nonce, ncvalue, cnonce, 'auth', HA2 |
|||
) |
|||
respdig = KD(HA1, noncebit) |
|||
else: |
|||
# XXX handle auth-int. |
|||
return None |
|||
|
|||
self._thread_local.last_nonce = nonce |
|||
|
|||
# XXX should the partial digests be encoded too? |
|||
base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ |
|||
'response="%s"' % (self.username, realm, nonce, path, respdig) |
|||
if opaque: |
|||
base += ', opaque="%s"' % opaque |
|||
if algorithm: |
|||
base += ', algorithm="%s"' % algorithm |
|||
if entdig: |
|||
base += ', digest="%s"' % entdig |
|||
if qop: |
|||
base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) |
|||
|
|||
return 'Digest %s' % (base) |
|||
|
|||
def handle_redirect(self, r, **kwargs): |
|||
"""Reset num_401_calls counter on redirects.""" |
|||
if r.is_redirect: |
|||
self._thread_local.num_401_calls = 1 |
|||
|
|||
def handle_401(self, r, **kwargs): |
|||
""" |
|||
Takes the given response and tries digest-auth, if needed. |
|||
|
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
# If response is not 4xx, do not auth |
|||
# See https://github.com/requests/requests/issues/3772 |
|||
if not 400 <= r.status_code < 500: |
|||
self._thread_local.num_401_calls = 1 |
|||
return r |
|||
|
|||
if self._thread_local.pos is not None: |
|||
# Rewind the file position indicator of the body to where |
|||
# it was to resend the request. |
|||
r.request.body.seek(self._thread_local.pos) |
|||
s_auth = r.headers.get('www-authenticate', '') |
|||
|
|||
if 'digest' in s_auth.lower() and self._thread_local.num_401_calls < 2: |
|||
|
|||
self._thread_local.num_401_calls += 1 |
|||
pat = re.compile(r'digest ', flags=re.IGNORECASE) |
|||
self._thread_local.chal = parse_dict_header(pat.sub('', s_auth, count=1)) |
|||
|
|||
# Consume content and release the original connection |
|||
# to allow our new request to reuse the same one. |
|||
r.content |
|||
r.close() |
|||
prep = r.request.copy() |
|||
extract_cookies_to_jar(prep._cookies, r.request, r.raw) |
|||
prep.prepare_cookies(prep._cookies) |
|||
|
|||
prep.headers['Authorization'] = self.build_digest_header( |
|||
prep.method, prep.url) |
|||
_r = r.connection.send(prep, **kwargs) |
|||
_r.history.append(r) |
|||
_r.request = prep |
|||
|
|||
return _r |
|||
|
|||
self._thread_local.num_401_calls = 1 |
|||
return r |
|||
|
|||
def __call__(self, r): |
|||
# Initialize per-thread state, if needed |
|||
self.init_per_thread_state() |
|||
# If we have a saved nonce, skip the 401 |
|||
if self._thread_local.last_nonce: |
|||
r.headers['Authorization'] = self.build_digest_header(r.method, r.url) |
|||
try: |
|||
self._thread_local.pos = r.body.tell() |
|||
except AttributeError: |
|||
# In the case of HTTPDigestAuth being reused and the body of |
|||
# the previous request was a file-like object, pos has the |
|||
# file position of the previous body. Ensure it's set to |
|||
# None. |
|||
self._thread_local.pos = None |
|||
r.register_hook('response', self.handle_401) |
|||
r.register_hook('response', self.handle_redirect) |
|||
self._thread_local.num_401_calls = 1 |
|||
|
|||
return r |
|||
|
|||
def __eq__(self, other): |
|||
return all([ |
|||
self.username == getattr(other, 'username', None), |
|||
self.password == getattr(other, 'password', None) |
|||
]) |
|||
|
|||
def __ne__(self, other): |
|||
return not self == other |
@ -0,0 +1,18 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.certs |
|||
~~~~~~~~~~~~~~ |
|||
|
|||
This module returns the preferred default CA certificate bundle. There is |
|||
only one — the one from the certifi package. |
|||
|
|||
If you are packaging Requests, e.g., for a Linux distribution or a managed |
|||
environment, you can change the definition of where() to return a separately |
|||
packaged CA bundle. |
|||
""" |
|||
from certifi import where |
|||
|
|||
if __name__ == '__main__': |
|||
print(where()) |
@ -0,0 +1,70 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.compat |
|||
~~~~~~~~~~~~~~~ |
|||
|
|||
This module handles import compatibility issues between Python 2 and |
|||
Python 3. |
|||
""" |
|||
|
|||
import chardet |
|||
|
|||
import sys |
|||
|
|||
# ------- |
|||
# Pythons |
|||
# ------- |
|||
|
|||
# Syntax sugar. |
|||
_ver = sys.version_info |
|||
|
|||
#: Python 2.x? |
|||
is_py2 = (_ver[0] == 2) |
|||
|
|||
#: Python 3.x? |
|||
is_py3 = (_ver[0] == 3) |
|||
|
|||
try: |
|||
import simplejson as json |
|||
except ImportError: |
|||
import json |
|||
|
|||
# --------- |
|||
# Specifics |
|||
# --------- |
|||
|
|||
if is_py2: |
|||
from urllib import ( |
|||
quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, |
|||
proxy_bypass, proxy_bypass_environment, getproxies_environment) |
|||
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag |
|||
from urllib2 import parse_http_list |
|||
import cookielib |
|||
from Cookie import Morsel |
|||
from StringIO import StringIO |
|||
from collections import Callable, Mapping, MutableMapping, OrderedDict |
|||
|
|||
|
|||
builtin_str = str |
|||
bytes = str |
|||
str = unicode |
|||
basestring = basestring |
|||
numeric_types = (int, long, float) |
|||
integer_types = (int, long) |
|||
|
|||
elif is_py3: |
|||
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag |
|||
from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment |
|||
from http import cookiejar as cookielib |
|||
from http.cookies import Morsel |
|||
from io import StringIO |
|||
from collections import OrderedDict |
|||
from collections.abc import Callable, Mapping, MutableMapping |
|||
|
|||
builtin_str = str |
|||
str = str |
|||
bytes = bytes |
|||
basestring = (str, bytes) |
|||
numeric_types = (int, float) |
|||
integer_types = (int,) |
@ -0,0 +1,549 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.cookies |
|||
~~~~~~~~~~~~~~~~ |
|||
|
|||
Compatibility code to be able to use `cookielib.CookieJar` with requests. |
|||
|
|||
requests.utils imports from here, so be careful with imports. |
|||
""" |
|||
|
|||
import copy |
|||
import time |
|||
import calendar |
|||
|
|||
from ._internal_utils import to_native_string |
|||
from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping |
|||
|
|||
try: |
|||
import threading |
|||
except ImportError: |
|||
import dummy_threading as threading |
|||
|
|||
|
|||
class MockRequest(object): |
|||
"""Wraps a `requests.Request` to mimic a `urllib2.Request`. |
|||
|
|||
The code in `cookielib.CookieJar` expects this interface in order to correctly |
|||
manage cookie policies, i.e., determine whether a cookie can be set, given the |
|||
domains of the request and the cookie. |
|||
|
|||
The original request object is read-only. The client is responsible for collecting |
|||
the new headers via `get_new_headers()` and interpreting them appropriately. You |
|||
probably want `get_cookie_header`, defined below. |
|||
""" |
|||
|
|||
def __init__(self, request): |
|||
self._r = request |
|||
self._new_headers = {} |
|||
self.type = urlparse(self._r.url).scheme |
|||
|
|||
def get_type(self): |
|||
return self.type |
|||
|
|||
def get_host(self): |
|||
return urlparse(self._r.url).netloc |
|||
|
|||
def get_origin_req_host(self): |
|||
return self.get_host() |
|||
|
|||
def get_full_url(self): |
|||
# Only return the response's URL if the user hadn't set the Host |
|||
# header |
|||
if not self._r.headers.get('Host'): |
|||
return self._r.url |
|||
# If they did set it, retrieve it and reconstruct the expected domain |
|||
host = to_native_string(self._r.headers['Host'], encoding='utf-8') |
|||
parsed = urlparse(self._r.url) |
|||
# Reconstruct the URL as we expect it |
|||
return urlunparse([ |
|||
parsed.scheme, host, parsed.path, parsed.params, parsed.query, |
|||
parsed.fragment |
|||
]) |
|||
|
|||
def is_unverifiable(self): |
|||
return True |
|||
|
|||
def has_header(self, name): |
|||
return name in self._r.headers or name in self._new_headers |
|||
|
|||
def get_header(self, name, default=None): |
|||
return self._r.headers.get(name, self._new_headers.get(name, default)) |
|||
|
|||
def add_header(self, key, val): |
|||
"""cookielib has no legitimate use for this method; add it back if you find one.""" |
|||
raise NotImplementedError("Cookie headers should be added with add_unredirected_header()") |
|||
|
|||
def add_unredirected_header(self, name, value): |
|||
self._new_headers[name] = value |
|||
|
|||
def get_new_headers(self): |
|||
return self._new_headers |
|||
|
|||
@property |
|||
def unverifiable(self): |
|||
return self.is_unverifiable() |
|||
|
|||
@property |
|||
def origin_req_host(self): |
|||
return self.get_origin_req_host() |
|||
|
|||
@property |
|||
def host(self): |
|||
return self.get_host() |
|||
|
|||
|
|||
class MockResponse(object): |
|||
"""Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. |
|||
|
|||
...what? Basically, expose the parsed HTTP headers from the server response |
|||
the way `cookielib` expects to see them. |
|||
""" |
|||
|
|||
def __init__(self, headers): |
|||
"""Make a MockResponse for `cookielib` to read. |
|||
|
|||
:param headers: a httplib.HTTPMessage or analogous carrying the headers |
|||
""" |
|||
self._headers = headers |
|||
|
|||
def info(self): |
|||
return self._headers |
|||
|
|||
def getheaders(self, name): |
|||
self._headers.getheaders(name) |
|||
|
|||
|
|||
def extract_cookies_to_jar(jar, request, response): |
|||
"""Extract the cookies from the response into a CookieJar. |
|||
|
|||
:param jar: cookielib.CookieJar (not necessarily a RequestsCookieJar) |
|||
:param request: our own requests.Request object |
|||
:param response: urllib3.HTTPResponse object |
|||
""" |
|||
if not (hasattr(response, '_original_response') and |
|||
response._original_response): |
|||
return |
|||
# the _original_response field is the wrapped httplib.HTTPResponse object, |
|||
req = MockRequest(request) |
|||
# pull out the HTTPMessage with the headers and put it in the mock: |
|||
res = MockResponse(response._original_response.msg) |
|||
jar.extract_cookies(res, req) |
|||
|
|||
|
|||
def get_cookie_header(jar, request): |
|||
""" |
|||
Produce an appropriate Cookie header string to be sent with `request`, or None. |
|||
|
|||
:rtype: str |
|||
""" |
|||
r = MockRequest(request) |
|||
jar.add_cookie_header(r) |
|||
return r.get_new_headers().get('Cookie') |
|||
|
|||
|
|||
def remove_cookie_by_name(cookiejar, name, domain=None, path=None): |
|||
"""Unsets a cookie by name, by default over all domains and paths. |
|||
|
|||
Wraps CookieJar.clear(), is O(n). |
|||
""" |
|||
clearables = [] |
|||
for cookie in cookiejar: |
|||
if cookie.name != name: |
|||
continue |
|||
if domain is not None and domain != cookie.domain: |
|||
continue |
|||
if path is not None and path != cookie.path: |
|||
continue |
|||
clearables.append((cookie.domain, cookie.path, cookie.name)) |
|||
|
|||
for domain, path, name in clearables: |
|||
cookiejar.clear(domain, path, name) |
|||
|
|||
|
|||
class CookieConflictError(RuntimeError): |
|||
"""There are two cookies that meet the criteria specified in the cookie jar. |
|||
Use .get and .set and include domain and path args in order to be more specific. |
|||
""" |
|||
|
|||
|
|||
class RequestsCookieJar(cookielib.CookieJar, MutableMapping): |
|||
"""Compatibility class; is a cookielib.CookieJar, but exposes a dict |
|||
interface. |
|||
|
|||
This is the CookieJar we create by default for requests and sessions that |
|||
don't specify one, since some clients may expect response.cookies and |
|||
session.cookies to support dict operations. |
|||
|
|||
Requests does not use the dict interface internally; it's just for |
|||
compatibility with external client code. All requests code should work |
|||
out of the box with externally provided instances of ``CookieJar``, e.g. |
|||
``LWPCookieJar`` and ``FileCookieJar``. |
|||
|
|||
Unlike a regular CookieJar, this class is pickleable. |
|||
|
|||
.. warning:: dictionary operations that are normally O(1) may be O(n). |
|||
""" |
|||
|
|||
def get(self, name, default=None, domain=None, path=None): |
|||
"""Dict-like get() that also supports optional domain and path args in |
|||
order to resolve naming collisions from using one cookie jar over |
|||
multiple domains. |
|||
|
|||
.. warning:: operation is O(n), not O(1). |
|||
""" |
|||
try: |
|||
return self._find_no_duplicates(name, domain, path) |
|||
except KeyError: |
|||
return default |
|||
|
|||
def set(self, name, value, **kwargs): |
|||
"""Dict-like set() that also supports optional domain and path args in |
|||
order to resolve naming collisions from using one cookie jar over |
|||
multiple domains. |
|||
""" |
|||
# support client code that unsets cookies by assignment of a None value: |
|||
if value is None: |
|||
remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path')) |
|||
return |
|||
|
|||
if isinstance(value, Morsel): |
|||
c = morsel_to_cookie(value) |
|||
else: |
|||
c = create_cookie(name, value, **kwargs) |
|||
self.set_cookie(c) |
|||
return c |
|||
|
|||
def iterkeys(self): |
|||
"""Dict-like iterkeys() that returns an iterator of names of cookies |
|||
from the jar. |
|||
|
|||
.. seealso:: itervalues() and iteritems(). |
|||
""" |
|||
for cookie in iter(self): |
|||
yield cookie.name |
|||
|
|||
def keys(self): |
|||
"""Dict-like keys() that returns a list of names of cookies from the |
|||
jar. |
|||
|
|||
.. seealso:: values() and items(). |
|||
""" |
|||
return list(self.iterkeys()) |
|||
|
|||
def itervalues(self): |
|||
"""Dict-like itervalues() that returns an iterator of values of cookies |
|||
from the jar. |
|||
|
|||
.. seealso:: iterkeys() and iteritems(). |
|||
""" |
|||
for cookie in iter(self): |
|||
yield cookie.value |
|||
|
|||
def values(self): |
|||
"""Dict-like values() that returns a list of values of cookies from the |
|||
jar. |
|||
|
|||
.. seealso:: keys() and items(). |
|||
""" |
|||
return list(self.itervalues()) |
|||
|
|||
def iteritems(self): |
|||
"""Dict-like iteritems() that returns an iterator of name-value tuples |
|||
from the jar. |
|||
|
|||
.. seealso:: iterkeys() and itervalues(). |
|||
""" |
|||
for cookie in iter(self): |
|||
yield cookie.name, cookie.value |
|||
|
|||
def items(self): |
|||
"""Dict-like items() that returns a list of name-value tuples from the |
|||
jar. Allows client-code to call ``dict(RequestsCookieJar)`` and get a |
|||
vanilla python dict of key value pairs. |
|||
|
|||
.. seealso:: keys() and values(). |
|||
""" |
|||
return list(self.iteritems()) |
|||
|
|||
def list_domains(self): |
|||
"""Utility method to list all the domains in the jar.""" |
|||
domains = [] |
|||
for cookie in iter(self): |
|||
if cookie.domain not in domains: |
|||
domains.append(cookie.domain) |
|||
return domains |
|||
|
|||
def list_paths(self): |
|||
"""Utility method to list all the paths in the jar.""" |
|||
paths = [] |
|||
for cookie in iter(self): |
|||
if cookie.path not in paths: |
|||
paths.append(cookie.path) |
|||
return paths |
|||
|
|||
def multiple_domains(self): |
|||
"""Returns True if there are multiple domains in the jar. |
|||
Returns False otherwise. |
|||
|
|||
:rtype: bool |
|||
""" |
|||
domains = [] |
|||
for cookie in iter(self): |
|||
if cookie.domain is not None and cookie.domain in domains: |
|||
return True |
|||
domains.append(cookie.domain) |
|||
return False # there is only one domain in jar |
|||
|
|||
def get_dict(self, domain=None, path=None): |
|||
"""Takes as an argument an optional domain and path and returns a plain |
|||
old Python dict of name-value pairs of cookies that meet the |
|||
requirements. |
|||
|
|||
:rtype: dict |
|||
""" |
|||
dictionary = {} |
|||
for cookie in iter(self): |
|||
if ( |
|||
(domain is None or cookie.domain == domain) and |
|||
(path is None or cookie.path == path) |
|||
): |
|||
dictionary[cookie.name] = cookie.value |
|||
return dictionary |
|||
|
|||
def __contains__(self, name): |
|||
try: |
|||
return super(RequestsCookieJar, self).__contains__(name) |
|||
except CookieConflictError: |
|||
return True |
|||
|
|||
def __getitem__(self, name): |
|||
"""Dict-like __getitem__() for compatibility with client code. Throws |
|||
exception if there are more than one cookie with name. In that case, |
|||
use the more explicit get() method instead. |
|||
|
|||
.. warning:: operation is O(n), not O(1). |
|||
""" |
|||
return self._find_no_duplicates(name) |
|||
|
|||
def __setitem__(self, name, value): |
|||
"""Dict-like __setitem__ for compatibility with client code. Throws |
|||
exception if there is already a cookie of that name in the jar. In that |
|||
case, use the more explicit set() method instead. |
|||
""" |
|||
self.set(name, value) |
|||
|
|||
def __delitem__(self, name): |
|||
"""Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s |
|||
``remove_cookie_by_name()``. |
|||
""" |
|||
remove_cookie_by_name(self, name) |
|||
|
|||
def set_cookie(self, cookie, *args, **kwargs): |
|||
if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'): |
|||
cookie.value = cookie.value.replace('\\"', '') |
|||
return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) |
|||
|
|||
def update(self, other): |
|||
"""Updates this jar with cookies from another CookieJar or dict-like""" |
|||
if isinstance(other, cookielib.CookieJar): |
|||
for cookie in other: |
|||
self.set_cookie(copy.copy(cookie)) |
|||
else: |
|||
super(RequestsCookieJar, self).update(other) |
|||
|
|||
def _find(self, name, domain=None, path=None): |
|||
"""Requests uses this method internally to get cookie values. |
|||
|
|||
If there are conflicting cookies, _find arbitrarily chooses one. |
|||
See _find_no_duplicates if you want an exception thrown if there are |
|||
conflicting cookies. |
|||
|
|||
:param name: a string containing name of cookie |
|||
:param domain: (optional) string containing domain of cookie |
|||
:param path: (optional) string containing path of cookie |
|||
:return: cookie.value |
|||
""" |
|||
for cookie in iter(self): |
|||
if cookie.name == name: |
|||
if domain is None or cookie.domain == domain: |
|||
if path is None or cookie.path == path: |
|||
return cookie.value |
|||
|
|||
raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) |
|||
|
|||
def _find_no_duplicates(self, name, domain=None, path=None): |
|||
"""Both ``__get_item__`` and ``get`` call this function: it's never |
|||
used elsewhere in Requests. |
|||
|
|||
:param name: a string containing name of cookie |
|||
:param domain: (optional) string containing domain of cookie |
|||
:param path: (optional) string containing path of cookie |
|||
:raises KeyError: if cookie is not found |
|||
:raises CookieConflictError: if there are multiple cookies |
|||
that match name and optionally domain and path |
|||
:return: cookie.value |
|||
""" |
|||
toReturn = None |
|||
for cookie in iter(self): |
|||
if cookie.name == name: |
|||
if domain is None or cookie.domain == domain: |
|||
if path is None or cookie.path == path: |
|||
if toReturn is not None: # if there are multiple cookies that meet passed in criteria |
|||
raise CookieConflictError('There are multiple cookies with name, %r' % (name)) |
|||
toReturn = cookie.value # we will eventually return this as long as no cookie conflict |
|||
|
|||
if toReturn: |
|||
return toReturn |
|||
raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) |
|||
|
|||
def __getstate__(self): |
|||
"""Unlike a normal CookieJar, this class is pickleable.""" |
|||
state = self.__dict__.copy() |
|||
# remove the unpickleable RLock object |
|||
state.pop('_cookies_lock') |
|||
return state |
|||
|
|||
def __setstate__(self, state): |
|||
"""Unlike a normal CookieJar, this class is pickleable.""" |
|||
self.__dict__.update(state) |
|||
if '_cookies_lock' not in self.__dict__: |
|||
self._cookies_lock = threading.RLock() |
|||
|
|||
def copy(self): |
|||
"""Return a copy of this RequestsCookieJar.""" |
|||
new_cj = RequestsCookieJar() |
|||
new_cj.set_policy(self.get_policy()) |
|||
new_cj.update(self) |
|||
return new_cj |
|||
|
|||
def get_policy(self): |
|||
"""Return the CookiePolicy instance used.""" |
|||
return self._policy |
|||
|
|||
|
|||
def _copy_cookie_jar(jar): |
|||
if jar is None: |
|||
return None |
|||
|
|||
if hasattr(jar, 'copy'): |
|||
# We're dealing with an instance of RequestsCookieJar |
|||
return jar.copy() |
|||
# We're dealing with a generic CookieJar instance |
|||
new_jar = copy.copy(jar) |
|||
new_jar.clear() |
|||
for cookie in jar: |
|||
new_jar.set_cookie(copy.copy(cookie)) |
|||
return new_jar |
|||
|
|||
|
|||
def create_cookie(name, value, **kwargs): |
|||
"""Make a cookie from underspecified parameters. |
|||
|
|||
By default, the pair of `name` and `value` will be set for the domain '' |
|||
and sent on every request (this is sometimes called a "supercookie"). |
|||
""" |
|||
result = { |
|||
'version': 0, |
|||
'name': name, |
|||
'value': value, |
|||
'port': None, |
|||
'domain': '', |
|||
'path': '/', |
|||
'secure': False, |
|||
'expires': None, |
|||
'discard': True, |
|||
'comment': None, |
|||
'comment_url': None, |
|||
'rest': {'HttpOnly': None}, |
|||
'rfc2109': False, |
|||
} |
|||
|
|||
badargs = set(kwargs) - set(result) |
|||
if badargs: |
|||
err = 'create_cookie() got unexpected keyword arguments: %s' |
|||
raise TypeError(err % list(badargs)) |
|||
|
|||
result.update(kwargs) |
|||
result['port_specified'] = bool(result['port']) |
|||
result['domain_specified'] = bool(result['domain']) |
|||
result['domain_initial_dot'] = result['domain'].startswith('.') |
|||
result['path_specified'] = bool(result['path']) |
|||
|
|||
return cookielib.Cookie(**result) |
|||
|
|||
|
|||
def morsel_to_cookie(morsel): |
|||
"""Convert a Morsel object into a Cookie containing the one k/v pair.""" |
|||
|
|||
expires = None |
|||
if morsel['max-age']: |
|||
try: |
|||
expires = int(time.time() + int(morsel['max-age'])) |
|||
except ValueError: |
|||
raise TypeError('max-age: %s must be integer' % morsel['max-age']) |
|||
elif morsel['expires']: |
|||
time_template = '%a, %d-%b-%Y %H:%M:%S GMT' |
|||
expires = calendar.timegm( |
|||
time.strptime(morsel['expires'], time_template) |
|||
) |
|||
return create_cookie( |
|||
comment=morsel['comment'], |
|||
comment_url=bool(morsel['comment']), |
|||
discard=False, |
|||
domain=morsel['domain'], |
|||
expires=expires, |
|||
name=morsel.key, |
|||
path=morsel['path'], |
|||
port=None, |
|||
rest={'HttpOnly': morsel['httponly']}, |
|||
rfc2109=False, |
|||
secure=bool(morsel['secure']), |
|||
value=morsel.value, |
|||
version=morsel['version'] or 0, |
|||
) |
|||
|
|||
|
|||
def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): |
|||
"""Returns a CookieJar from a key/value dictionary. |
|||
|
|||
:param cookie_dict: Dict of key/values to insert into CookieJar. |
|||
:param cookiejar: (optional) A cookiejar to add the cookies to. |
|||
:param overwrite: (optional) If False, will not replace cookies |
|||
already in the jar with new ones. |
|||
:rtype: CookieJar |
|||
""" |
|||
if cookiejar is None: |
|||
cookiejar = RequestsCookieJar() |
|||
|
|||
if cookie_dict is not None: |
|||
names_from_jar = [cookie.name for cookie in cookiejar] |
|||
for name in cookie_dict: |
|||
if overwrite or (name not in names_from_jar): |
|||
cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) |
|||
|
|||
return cookiejar |
|||
|
|||
|
|||
def merge_cookies(cookiejar, cookies): |
|||
"""Add cookies to cookiejar and returns a merged CookieJar. |
|||
|
|||
:param cookiejar: CookieJar object to add the cookies to. |
|||
:param cookies: Dictionary or CookieJar object to be added. |
|||
:rtype: CookieJar |
|||
""" |
|||
if not isinstance(cookiejar, cookielib.CookieJar): |
|||
raise ValueError('You can only merge into CookieJar') |
|||
|
|||
if isinstance(cookies, dict): |
|||
cookiejar = cookiejar_from_dict( |
|||
cookies, cookiejar=cookiejar, overwrite=False) |
|||
elif isinstance(cookies, cookielib.CookieJar): |
|||
try: |
|||
cookiejar.update(cookies) |
|||
except AttributeError: |
|||
for cookie_in_jar in cookies: |
|||
cookiejar.set_cookie(cookie_in_jar) |
|||
|
|||
return cookiejar |
@ -0,0 +1,126 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.exceptions |
|||
~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
This module contains the set of Requests' exceptions. |
|||
""" |
|||
from urllib3.exceptions import HTTPError as BaseHTTPError |
|||
|
|||
|
|||
class RequestException(IOError): |
|||
"""There was an ambiguous exception that occurred while handling your |
|||
request. |
|||
""" |
|||
|
|||
def __init__(self, *args, **kwargs): |
|||
"""Initialize RequestException with `request` and `response` objects.""" |
|||
response = kwargs.pop('response', None) |
|||
self.response = response |
|||
self.request = kwargs.pop('request', None) |
|||
if (response is not None and not self.request and |
|||
hasattr(response, 'request')): |
|||
self.request = self.response.request |
|||
super(RequestException, self).__init__(*args, **kwargs) |
|||
|
|||
|
|||
class HTTPError(RequestException): |
|||
"""An HTTP error occurred.""" |
|||
|
|||
|
|||
class ConnectionError(RequestException): |
|||
"""A Connection error occurred.""" |
|||
|
|||
|
|||
class ProxyError(ConnectionError): |
|||
"""A proxy error occurred.""" |
|||
|
|||
|
|||
class SSLError(ConnectionError): |
|||
"""An SSL error occurred.""" |
|||
|
|||
|
|||
class Timeout(RequestException): |
|||
"""The request timed out. |
|||
|
|||
Catching this error will catch both |
|||
:exc:`~requests.exceptions.ConnectTimeout` and |
|||
:exc:`~requests.exceptions.ReadTimeout` errors. |
|||
""" |
|||
|
|||
|
|||
class ConnectTimeout(ConnectionError, Timeout): |
|||
"""The request timed out while trying to connect to the remote server. |
|||
|
|||
Requests that produced this error are safe to retry. |
|||
""" |
|||
|
|||
|
|||
class ReadTimeout(Timeout): |
|||
"""The server did not send any data in the allotted amount of time.""" |
|||
|
|||
|
|||
class URLRequired(RequestException): |
|||
"""A valid URL is required to make a request.""" |
|||
|
|||
|
|||
class TooManyRedirects(RequestException): |
|||
"""Too many redirects.""" |
|||
|
|||
|
|||
class MissingSchema(RequestException, ValueError): |
|||
"""The URL schema (e.g. http or https) is missing.""" |
|||
|
|||
|
|||
class InvalidSchema(RequestException, ValueError): |
|||
"""See defaults.py for valid schemas.""" |
|||
|
|||
|
|||
class InvalidURL(RequestException, ValueError): |
|||
"""The URL provided was somehow invalid.""" |
|||
|
|||
|
|||
class InvalidHeader(RequestException, ValueError): |
|||
"""The header value provided was somehow invalid.""" |
|||
|
|||
|
|||
class InvalidProxyURL(InvalidURL): |
|||
"""The proxy URL provided is invalid.""" |
|||
|
|||
|
|||
class ChunkedEncodingError(RequestException): |
|||
"""The server declared chunked encoding but sent an invalid chunk.""" |
|||
|
|||
|
|||
class ContentDecodingError(RequestException, BaseHTTPError): |
|||
"""Failed to decode response content""" |
|||
|
|||
|
|||
class StreamConsumedError(RequestException, TypeError): |
|||
"""The content for this response was already consumed""" |
|||
|
|||
|
|||
class RetryError(RequestException): |
|||
"""Custom retries logic failed""" |
|||
|
|||
|
|||
class UnrewindableBodyError(RequestException): |
|||
"""Requests encountered an error when trying to rewind a body""" |
|||
|
|||
# Warnings |
|||
|
|||
|
|||
class RequestsWarning(Warning): |
|||
"""Base warning for Requests.""" |
|||
pass |
|||
|
|||
|
|||
class FileModeWarning(RequestsWarning, DeprecationWarning): |
|||
"""A file was opened in text mode, but Requests determined its binary length.""" |
|||
pass |
|||
|
|||
|
|||
class RequestsDependencyWarning(RequestsWarning): |
|||
"""An imported dependency doesn't match the expected version range.""" |
|||
pass |
@ -0,0 +1,119 @@ |
|||
"""Module containing bug report helper(s).""" |
|||
from __future__ import print_function |
|||
|
|||
import json |
|||
import platform |
|||
import sys |
|||
import ssl |
|||
|
|||
import idna |
|||
import urllib3 |
|||
import chardet |
|||
|
|||
from . import __version__ as requests_version |
|||
|
|||
try: |
|||
from urllib3.contrib import pyopenssl |
|||
except ImportError: |
|||
pyopenssl = None |
|||
OpenSSL = None |
|||
cryptography = None |
|||
else: |
|||
import OpenSSL |
|||
import cryptography |
|||
|
|||
|
|||
def _implementation(): |
|||
"""Return a dict with the Python implementation and version. |
|||
|
|||
Provide both the name and the version of the Python implementation |
|||
currently running. For example, on CPython 2.7.5 it will return |
|||
{'name': 'CPython', 'version': '2.7.5'}. |
|||
|
|||
This function works best on CPython and PyPy: in particular, it probably |
|||
doesn't work for Jython or IronPython. Future investigation should be done |
|||
to work out the correct shape of the code for those platforms. |
|||
""" |
|||
implementation = platform.python_implementation() |
|||
|
|||
if implementation == 'CPython': |
|||
implementation_version = platform.python_version() |
|||
elif implementation == 'PyPy': |
|||
implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, |
|||
sys.pypy_version_info.minor, |
|||
sys.pypy_version_info.micro) |
|||
if sys.pypy_version_info.releaselevel != 'final': |
|||
implementation_version = ''.join([ |
|||
implementation_version, sys.pypy_version_info.releaselevel |
|||
]) |
|||
elif implementation == 'Jython': |
|||
implementation_version = platform.python_version() # Complete Guess |
|||
elif implementation == 'IronPython': |
|||
implementation_version = platform.python_version() # Complete Guess |
|||
else: |
|||
implementation_version = 'Unknown' |
|||
|
|||
return {'name': implementation, 'version': implementation_version} |
|||
|
|||
|
|||
def info(): |
|||
"""Generate information for a bug report.""" |
|||
try: |
|||
platform_info = { |
|||
'system': platform.system(), |
|||
'release': platform.release(), |
|||
} |
|||
except IOError: |
|||
platform_info = { |
|||
'system': 'Unknown', |
|||
'release': 'Unknown', |
|||
} |
|||
|
|||
implementation_info = _implementation() |
|||
urllib3_info = {'version': urllib3.__version__} |
|||
chardet_info = {'version': chardet.__version__} |
|||
|
|||
pyopenssl_info = { |
|||
'version': None, |
|||
'openssl_version': '', |
|||
} |
|||
if OpenSSL: |
|||
pyopenssl_info = { |
|||
'version': OpenSSL.__version__, |
|||
'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER, |
|||
} |
|||
cryptography_info = { |
|||
'version': getattr(cryptography, '__version__', ''), |
|||
} |
|||
idna_info = { |
|||
'version': getattr(idna, '__version__', ''), |
|||
} |
|||
|
|||
system_ssl = ssl.OPENSSL_VERSION_NUMBER |
|||
system_ssl_info = { |
|||
'version': '%x' % system_ssl if system_ssl is not None else '' |
|||
} |
|||
|
|||
return { |
|||
'platform': platform_info, |
|||
'implementation': implementation_info, |
|||
'system_ssl': system_ssl_info, |
|||
'using_pyopenssl': pyopenssl is not None, |
|||
'pyOpenSSL': pyopenssl_info, |
|||
'urllib3': urllib3_info, |
|||
'chardet': chardet_info, |
|||
'cryptography': cryptography_info, |
|||
'idna': idna_info, |
|||
'requests': { |
|||
'version': requests_version, |
|||
}, |
|||
} |
|||
|
|||
|
|||
def main(): |
|||
"""Pretty-print the bug information as JSON.""" |
|||
print(json.dumps(info(), sort_keys=True, indent=2)) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,34 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.hooks |
|||
~~~~~~~~~~~~~~ |
|||
|
|||
This module provides the capabilities for the Requests hooks system. |
|||
|
|||
Available hooks: |
|||
|
|||
``response``: |
|||
The response generated from a Request. |
|||
""" |
|||
HOOKS = ['response'] |
|||
|
|||
|
|||
def default_hooks(): |
|||
return {event: [] for event in HOOKS} |
|||
|
|||
# TODO: response is the only one |
|||
|
|||
|
|||
def dispatch_hook(key, hooks, hook_data, **kwargs): |
|||
"""Dispatches a hook dictionary on a given piece of data.""" |
|||
hooks = hooks or {} |
|||
hooks = hooks.get(key) |
|||
if hooks: |
|||
if hasattr(hooks, '__call__'): |
|||
hooks = [hooks] |
|||
for hook in hooks: |
|||
_hook_data = hook(hook_data, **kwargs) |
|||
if _hook_data is not None: |
|||
hook_data = _hook_data |
|||
return hook_data |
@ -0,0 +1,953 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.models |
|||
~~~~~~~~~~~~~~~ |
|||
|
|||
This module contains the primary objects that power Requests. |
|||
""" |
|||
|
|||
import datetime |
|||
import sys |
|||
|
|||
# Import encoding now, to avoid implicit import later. |
|||
# Implicit import within threads may cause LookupError when standard library is in a ZIP, |
|||
# such as in Embedded Python. See https://github.com/requests/requests/issues/3578. |
|||
import encodings.idna |
|||
|
|||
from urllib3.fields import RequestField |
|||
from urllib3.filepost import encode_multipart_formdata |
|||
from urllib3.util import parse_url |
|||
from urllib3.exceptions import ( |
|||
DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) |
|||
|
|||
from io import UnsupportedOperation |
|||
from .hooks import default_hooks |
|||
from .structures import CaseInsensitiveDict |
|||
|
|||
from .auth import HTTPBasicAuth |
|||
from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar |
|||
from .exceptions import ( |
|||
HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, |
|||
ContentDecodingError, ConnectionError, StreamConsumedError) |
|||
from ._internal_utils import to_native_string, unicode_is_ascii |
|||
from .utils import ( |
|||
guess_filename, get_auth_from_url, requote_uri, |
|||
stream_decode_response_unicode, to_key_val_list, parse_header_links, |
|||
iter_slices, guess_json_utf, super_len, check_header_validity) |
|||
from .compat import ( |
|||
Callable, Mapping, |
|||
cookielib, urlunparse, urlsplit, urlencode, str, bytes, |
|||
is_py2, chardet, builtin_str, basestring) |
|||
from .compat import json as complexjson |
|||
from .status_codes import codes |
|||
|
|||
#: The set of HTTP status codes that indicate an automatically |
|||
#: processable redirect. |
|||
REDIRECT_STATI = ( |
|||
codes.moved, # 301 |
|||
codes.found, # 302 |
|||
codes.other, # 303 |
|||
codes.temporary_redirect, # 307 |
|||
codes.permanent_redirect, # 308 |
|||
) |
|||
|
|||
DEFAULT_REDIRECT_LIMIT = 30 |
|||
CONTENT_CHUNK_SIZE = 10 * 1024 |
|||
ITER_CHUNK_SIZE = 512 |
|||
|
|||
|
|||
class RequestEncodingMixin(object): |
|||
@property |
|||
def path_url(self): |
|||
"""Build the path URL to use.""" |
|||
|
|||
url = [] |
|||
|
|||
p = urlsplit(self.url) |
|||
|
|||
path = p.path |
|||
if not path: |
|||
path = '/' |
|||
|
|||
url.append(path) |
|||
|
|||
query = p.query |
|||
if query: |
|||
url.append('?') |
|||
url.append(query) |
|||
|
|||
return ''.join(url) |
|||
|
|||
@staticmethod |
|||
def _encode_params(data): |
|||
"""Encode parameters in a piece of data. |
|||
|
|||
Will successfully encode parameters when passed as a dict or a list of |
|||
2-tuples. Order is retained if data is a list of 2-tuples but arbitrary |
|||
if parameters are supplied as a dict. |
|||
""" |
|||
|
|||
if isinstance(data, (str, bytes)): |
|||
return data |
|||
elif hasattr(data, 'read'): |
|||
return data |
|||
elif hasattr(data, '__iter__'): |
|||
result = [] |
|||
for k, vs in to_key_val_list(data): |
|||
if isinstance(vs, basestring) or not hasattr(vs, '__iter__'): |
|||
vs = [vs] |
|||
for v in vs: |
|||
if v is not None: |
|||
result.append( |
|||
(k.encode('utf-8') if isinstance(k, str) else k, |
|||
v.encode('utf-8') if isinstance(v, str) else v)) |
|||
return urlencode(result, doseq=True) |
|||
else: |
|||
return data |
|||
|
|||
@staticmethod |
|||
def _encode_files(files, data): |
|||
"""Build the body for a multipart/form-data request. |
|||
|
|||
Will successfully encode files when passed as a dict or a list of |
|||
tuples. Order is retained if data is a list of tuples but arbitrary |
|||
if parameters are supplied as a dict. |
|||
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype) |
|||
or 4-tuples (filename, fileobj, contentype, custom_headers). |
|||
""" |
|||
if (not files): |
|||
raise ValueError("Files must be provided.") |
|||
elif isinstance(data, basestring): |
|||
raise ValueError("Data must not be a string.") |
|||
|
|||
new_fields = [] |
|||
fields = to_key_val_list(data or {}) |
|||
files = to_key_val_list(files or {}) |
|||
|
|||
for field, val in fields: |
|||
if isinstance(val, basestring) or not hasattr(val, '__iter__'): |
|||
val = [val] |
|||
for v in val: |
|||
if v is not None: |
|||
# Don't call str() on bytestrings: in Py3 it all goes wrong. |
|||
if not isinstance(v, bytes): |
|||
v = str(v) |
|||
|
|||
new_fields.append( |
|||
(field.decode('utf-8') if isinstance(field, bytes) else field, |
|||
v.encode('utf-8') if isinstance(v, str) else v)) |
|||
|
|||
for (k, v) in files: |
|||
# support for explicit filename |
|||
ft = None |
|||
fh = None |
|||
if isinstance(v, (tuple, list)): |
|||
if len(v) == 2: |
|||
fn, fp = v |
|||
elif len(v) == 3: |
|||
fn, fp, ft = v |
|||
else: |
|||
fn, fp, ft, fh = v |
|||
else: |
|||
fn = guess_filename(v) or k |
|||
fp = v |
|||
|
|||
if isinstance(fp, (str, bytes, bytearray)): |
|||
fdata = fp |
|||
elif hasattr(fp, 'read'): |
|||
fdata = fp.read() |
|||
elif fp is None: |
|||
continue |
|||
else: |
|||
fdata = fp |
|||
|
|||
rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) |
|||
rf.make_multipart(content_type=ft) |
|||
new_fields.append(rf) |
|||
|
|||
body, content_type = encode_multipart_formdata(new_fields) |
|||
|
|||
return body, content_type |
|||
|
|||
|
|||
class RequestHooksMixin(object): |
|||
def register_hook(self, event, hook): |
|||
"""Properly register a hook.""" |
|||
|
|||
if event not in self.hooks: |
|||
raise ValueError('Unsupported event specified, with event name "%s"' % (event)) |
|||
|
|||
if isinstance(hook, Callable): |
|||
self.hooks[event].append(hook) |
|||
elif hasattr(hook, '__iter__'): |
|||
self.hooks[event].extend(h for h in hook if isinstance(h, Callable)) |
|||
|
|||
def deregister_hook(self, event, hook): |
|||
"""Deregister a previously registered hook. |
|||
Returns True if the hook existed, False if not. |
|||
""" |
|||
|
|||
try: |
|||
self.hooks[event].remove(hook) |
|||
return True |
|||
except ValueError: |
|||
return False |
|||
|
|||
|
|||
class Request(RequestHooksMixin): |
|||
"""A user-created :class:`Request <Request>` object. |
|||
|
|||
Used to prepare a :class:`PreparedRequest <PreparedRequest>`, which is sent to the server. |
|||
|
|||
:param method: HTTP method to use. |
|||
:param url: URL to send. |
|||
:param headers: dictionary of headers to send. |
|||
:param files: dictionary of {filename: fileobject} files to multipart upload. |
|||
:param data: the body to attach to the request. If a dictionary or |
|||
list of tuples ``[(key, value)]`` is provided, form-encoding will |
|||
take place. |
|||
:param json: json for the body to attach to the request (if files or data is not specified). |
|||
:param params: URL parameters to append to the URL. If a dictionary or |
|||
list of tuples ``[(key, value)]`` is provided, form-encoding will |
|||
take place. |
|||
:param auth: Auth handler or (user, pass) tuple. |
|||
:param cookies: dictionary or CookieJar of cookies to attach to this request. |
|||
:param hooks: dictionary of callback hooks, for internal usage. |
|||
|
|||
Usage:: |
|||
|
|||
>>> import requests |
|||
>>> req = requests.Request('GET', 'https://httpbin.org/get') |
|||
>>> req.prepare() |
|||
<PreparedRequest [GET]> |
|||
""" |
|||
|
|||
def __init__(self, |
|||
method=None, url=None, headers=None, files=None, data=None, |
|||
params=None, auth=None, cookies=None, hooks=None, json=None): |
|||
|
|||
# Default empty dicts for dict params. |
|||
data = [] if data is None else data |
|||
files = [] if files is None else files |
|||
headers = {} if headers is None else headers |
|||
params = {} if params is None else params |
|||
hooks = {} if hooks is None else hooks |
|||
|
|||
self.hooks = default_hooks() |
|||
for (k, v) in list(hooks.items()): |
|||
self.register_hook(event=k, hook=v) |
|||
|
|||
self.method = method |
|||
self.url = url |
|||
self.headers = headers |
|||
self.files = files |
|||
self.data = data |
|||
self.json = json |
|||
self.params = params |
|||
self.auth = auth |
|||
self.cookies = cookies |
|||
|
|||
def __repr__(self): |
|||
return '<Request [%s]>' % (self.method) |
|||
|
|||
def prepare(self): |
|||
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for transmission and returns it.""" |
|||
p = PreparedRequest() |
|||
p.prepare( |
|||
method=self.method, |
|||
url=self.url, |
|||
headers=self.headers, |
|||
files=self.files, |
|||
data=self.data, |
|||
json=self.json, |
|||
params=self.params, |
|||
auth=self.auth, |
|||
cookies=self.cookies, |
|||
hooks=self.hooks, |
|||
) |
|||
return p |
|||
|
|||
|
|||
class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): |
|||
"""The fully mutable :class:`PreparedRequest <PreparedRequest>` object, |
|||
containing the exact bytes that will be sent to the server. |
|||
|
|||
Generated from either a :class:`Request <Request>` object or manually. |
|||
|
|||
Usage:: |
|||
|
|||
>>> import requests |
|||
>>> req = requests.Request('GET', 'https://httpbin.org/get') |
|||
>>> r = req.prepare() |
|||
<PreparedRequest [GET]> |
|||
|
|||
>>> s = requests.Session() |
|||
>>> s.send(r) |
|||
<Response [200]> |
|||
""" |
|||
|
|||
def __init__(self): |
|||
#: HTTP verb to send to the server. |
|||
self.method = None |
|||
#: HTTP URL to send the request to. |
|||
self.url = None |
|||
#: dictionary of HTTP headers. |
|||
self.headers = None |
|||
# The `CookieJar` used to create the Cookie header will be stored here |
|||
# after prepare_cookies is called |
|||
self._cookies = None |
|||
#: request body to send to the server. |
|||
self.body = None |
|||
#: dictionary of callback hooks, for internal usage. |
|||
self.hooks = default_hooks() |
|||
#: integer denoting starting position of a readable file-like body. |
|||
self._body_position = None |
|||
|
|||
def prepare(self, |
|||
method=None, url=None, headers=None, files=None, data=None, |
|||
params=None, auth=None, cookies=None, hooks=None, json=None): |
|||
"""Prepares the entire request with the given parameters.""" |
|||
|
|||
self.prepare_method(method) |
|||
self.prepare_url(url, params) |
|||
self.prepare_headers(headers) |
|||
self.prepare_cookies(cookies) |
|||
self.prepare_body(data, files, json) |
|||
self.prepare_auth(auth, url) |
|||
|
|||
# Note that prepare_auth must be last to enable authentication schemes |
|||
# such as OAuth to work on a fully prepared request. |
|||
|
|||
# This MUST go after prepare_auth. Authenticators could add a hook |
|||
self.prepare_hooks(hooks) |
|||
|
|||
def __repr__(self): |
|||
return '<PreparedRequest [%s]>' % (self.method) |
|||
|
|||
def copy(self): |
|||
p = PreparedRequest() |
|||
p.method = self.method |
|||
p.url = self.url |
|||
p.headers = self.headers.copy() if self.headers is not None else None |
|||
p._cookies = _copy_cookie_jar(self._cookies) |
|||
p.body = self.body |
|||
p.hooks = self.hooks |
|||
p._body_position = self._body_position |
|||
return p |
|||
|
|||
def prepare_method(self, method): |
|||
"""Prepares the given HTTP method.""" |
|||
self.method = method |
|||
if self.method is not None: |
|||
self.method = to_native_string(self.method.upper()) |
|||
|
|||
@staticmethod |
|||
def _get_idna_encoded_host(host): |
|||
import idna |
|||
|
|||
try: |
|||
host = idna.encode(host, uts46=True).decode('utf-8') |
|||
except idna.IDNAError: |
|||
raise UnicodeError |
|||
return host |
|||
|
|||
def prepare_url(self, url, params): |
|||
"""Prepares the given HTTP URL.""" |
|||
#: Accept objects that have string representations. |
|||
#: We're unable to blindly call unicode/str functions |
|||
#: as this will include the bytestring indicator (b'') |
|||
#: on python 3.x. |
|||
#: https://github.com/requests/requests/pull/2238 |
|||
if isinstance(url, bytes): |
|||
url = url.decode('utf8') |
|||
else: |
|||
url = unicode(url) if is_py2 else str(url) |
|||
|
|||
# Remove leading whitespaces from url |
|||
url = url.lstrip() |
|||
|
|||
# Don't do any URL preparation for non-HTTP schemes like `mailto`, |
|||
# `data` etc to work around exceptions from `url_parse`, which |
|||
# handles RFC 3986 only. |
|||
if ':' in url and not url.lower().startswith('http'): |
|||
self.url = url |
|||
return |
|||
|
|||
# Support for unicode domain names and paths. |
|||
try: |
|||
scheme, auth, host, port, path, query, fragment = parse_url(url) |
|||
except LocationParseError as e: |
|||
raise InvalidURL(*e.args) |
|||
|
|||
if not scheme: |
|||
error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") |
|||
error = error.format(to_native_string(url, 'utf8')) |
|||
|
|||
raise MissingSchema(error) |
|||
|
|||
if not host: |
|||
raise InvalidURL("Invalid URL %r: No host supplied" % url) |
|||
|
|||
# In general, we want to try IDNA encoding the hostname if the string contains |
|||
# non-ASCII characters. This allows users to automatically get the correct IDNA |
|||
# behaviour. For strings containing only ASCII characters, we need to also verify |
|||
# it doesn't start with a wildcard (*), before allowing the unencoded hostname. |
|||
if not unicode_is_ascii(host): |
|||
try: |
|||
host = self._get_idna_encoded_host(host) |
|||
except UnicodeError: |
|||
raise InvalidURL('URL has an invalid label.') |
|||
elif host.startswith(u'*'): |
|||
raise InvalidURL('URL has an invalid label.') |
|||
|
|||
# Carefully reconstruct the network location |
|||
netloc = auth or '' |
|||
if netloc: |
|||
netloc += '@' |
|||
netloc += host |
|||
if port: |
|||
netloc += ':' + str(port) |
|||
|
|||
# Bare domains aren't valid URLs. |
|||
if not path: |
|||
path = '/' |
|||
|
|||
if is_py2: |
|||
if isinstance(scheme, str): |
|||
scheme = scheme.encode('utf-8') |
|||
if isinstance(netloc, str): |
|||
netloc = netloc.encode('utf-8') |
|||
if isinstance(path, str): |
|||
path = path.encode('utf-8') |
|||
if isinstance(query, str): |
|||
query = query.encode('utf-8') |
|||
if isinstance(fragment, str): |
|||
fragment = fragment.encode('utf-8') |
|||
|
|||
if isinstance(params, (str, bytes)): |
|||
params = to_native_string(params) |
|||
|
|||
enc_params = self._encode_params(params) |
|||
if enc_params: |
|||
if query: |
|||
query = '%s&%s' % (query, enc_params) |
|||
else: |
|||
query = enc_params |
|||
|
|||
url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) |
|||
self.url = url |
|||
|
|||
def prepare_headers(self, headers): |
|||
"""Prepares the given HTTP headers.""" |
|||
|
|||
self.headers = CaseInsensitiveDict() |
|||
if headers: |
|||
for header in headers.items(): |
|||
# Raise exception on invalid header value. |
|||
check_header_validity(header) |
|||
name, value = header |
|||
self.headers[to_native_string(name)] = value |
|||
|
|||
def prepare_body(self, data, files, json=None): |
|||
"""Prepares the given HTTP body data.""" |
|||
|
|||
# Check if file, fo, generator, iterator. |
|||
# If not, run through normal process. |
|||
|
|||
# Nottin' on you. |
|||
body = None |
|||
content_type = None |
|||
|
|||
if not data and json is not None: |
|||
# urllib3 requires a bytes-like body. Python 2's json.dumps |
|||
# provides this natively, but Python 3 gives a Unicode string. |
|||
content_type = 'application/json' |
|||
body = complexjson.dumps(json) |
|||
if not isinstance(body, bytes): |
|||
body = body.encode('utf-8') |
|||
|
|||
is_stream = all([ |
|||
hasattr(data, '__iter__'), |
|||
not isinstance(data, (basestring, list, tuple, Mapping)) |
|||
]) |
|||
|
|||
try: |
|||
length = super_len(data) |
|||
except (TypeError, AttributeError, UnsupportedOperation): |
|||
length = None |
|||
|
|||
if is_stream: |
|||
body = data |
|||
|
|||
if getattr(body, 'tell', None) is not None: |
|||
# Record the current file position before reading. |
|||
# This will allow us to rewind a file in the event |
|||
# of a redirect. |
|||
try: |
|||
self._body_position = body.tell() |
|||
except (IOError, OSError): |
|||
# This differentiates from None, allowing us to catch |
|||
# a failed `tell()` later when trying to rewind the body |
|||
self._body_position = object() |
|||
|
|||
if files: |
|||
raise NotImplementedError('Streamed bodies and files are mutually exclusive.') |
|||
|
|||
if length: |
|||
self.headers['Content-Length'] = builtin_str(length) |
|||
else: |
|||
self.headers['Transfer-Encoding'] = 'chunked' |
|||
else: |
|||
# Multi-part file uploads. |
|||
if files: |
|||
(body, content_type) = self._encode_files(files, data) |
|||
else: |
|||
if data: |
|||
body = self._encode_params(data) |
|||
if isinstance(data, basestring) or hasattr(data, 'read'): |
|||
content_type = None |
|||
else: |
|||
content_type = 'application/x-www-form-urlencoded' |
|||
|
|||
self.prepare_content_length(body) |
|||
|
|||
# Add content-type if it wasn't explicitly provided. |
|||
if content_type and ('content-type' not in self.headers): |
|||
self.headers['Content-Type'] = content_type |
|||
|
|||
self.body = body |
|||
|
|||
def prepare_content_length(self, body): |
|||
"""Prepare Content-Length header based on request method and body""" |
|||
if body is not None: |
|||
length = super_len(body) |
|||
if length: |
|||
# If length exists, set it. Otherwise, we fallback |
|||
# to Transfer-Encoding: chunked. |
|||
self.headers['Content-Length'] = builtin_str(length) |
|||
elif self.method not in ('GET', 'HEAD') and self.headers.get('Content-Length') is None: |
|||
# Set Content-Length to 0 for methods that can have a body |
|||
# but don't provide one. (i.e. not GET or HEAD) |
|||
self.headers['Content-Length'] = '0' |
|||
|
|||
def prepare_auth(self, auth, url=''): |
|||
"""Prepares the given HTTP auth data.""" |
|||
|
|||
# If no Auth is explicitly provided, extract it from the URL first. |
|||
if auth is None: |
|||
url_auth = get_auth_from_url(self.url) |
|||
auth = url_auth if any(url_auth) else None |
|||
|
|||
if auth: |
|||
if isinstance(auth, tuple) and len(auth) == 2: |
|||
# special-case basic HTTP auth |
|||
auth = HTTPBasicAuth(*auth) |
|||
|
|||
# Allow auth to make its changes. |
|||
r = auth(self) |
|||
|
|||
# Update self to reflect the auth changes. |
|||
self.__dict__.update(r.__dict__) |
|||
|
|||
# Recompute Content-Length |
|||
self.prepare_content_length(self.body) |
|||
|
|||
def prepare_cookies(self, cookies): |
|||
"""Prepares the given HTTP cookie data. |
|||
|
|||
This function eventually generates a ``Cookie`` header from the |
|||
given cookies using cookielib. Due to cookielib's design, the header |
|||
will not be regenerated if it already exists, meaning this function |
|||
can only be called once for the life of the |
|||
:class:`PreparedRequest <PreparedRequest>` object. Any subsequent calls |
|||
to ``prepare_cookies`` will have no actual effect, unless the "Cookie" |
|||
header is removed beforehand. |
|||
""" |
|||
if isinstance(cookies, cookielib.CookieJar): |
|||
self._cookies = cookies |
|||
else: |
|||
self._cookies = cookiejar_from_dict(cookies) |
|||
|
|||
cookie_header = get_cookie_header(self._cookies, self) |
|||
if cookie_header is not None: |
|||
self.headers['Cookie'] = cookie_header |
|||
|
|||
def prepare_hooks(self, hooks): |
|||
"""Prepares the given hooks.""" |
|||
# hooks can be passed as None to the prepare method and to this |
|||
# method. To prevent iterating over None, simply use an empty list |
|||
# if hooks is False-y |
|||
hooks = hooks or [] |
|||
for event in hooks: |
|||
self.register_hook(event, hooks[event]) |
|||
|
|||
|
|||
class Response(object): |
|||
"""The :class:`Response <Response>` object, which contains a |
|||
server's response to an HTTP request. |
|||
""" |
|||
|
|||
__attrs__ = [ |
|||
'_content', 'status_code', 'headers', 'url', 'history', |
|||
'encoding', 'reason', 'cookies', 'elapsed', 'request' |
|||
] |
|||
|
|||
def __init__(self): |
|||
self._content = False |
|||
self._content_consumed = False |
|||
self._next = None |
|||
|
|||
#: Integer Code of responded HTTP Status, e.g. 404 or 200. |
|||
self.status_code = None |
|||
|
|||
#: Case-insensitive Dictionary of Response Headers. |
|||
#: For example, ``headers['content-encoding']`` will return the |
|||
#: value of a ``'Content-Encoding'`` response header. |
|||
self.headers = CaseInsensitiveDict() |
|||
|
|||
#: File-like object representation of response (for advanced usage). |
|||
#: Use of ``raw`` requires that ``stream=True`` be set on the request. |
|||
# This requirement does not apply for use internally to Requests. |
|||
self.raw = None |
|||
|
|||
#: Final URL location of Response. |
|||
self.url = None |
|||
|
|||
#: Encoding to decode with when accessing r.text. |
|||
self.encoding = None |
|||
|
|||
#: A list of :class:`Response <Response>` objects from |
|||
#: the history of the Request. Any redirect responses will end |
|||
#: up here. The list is sorted from the oldest to the most recent request. |
|||
self.history = [] |
|||
|
|||
#: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK". |
|||
self.reason = None |
|||
|
|||
#: A CookieJar of Cookies the server sent back. |
|||
self.cookies = cookiejar_from_dict({}) |
|||
|
|||
#: The amount of time elapsed between sending the request |
|||
#: and the arrival of the response (as a timedelta). |
|||
#: This property specifically measures the time taken between sending |
|||
#: the first byte of the request and finishing parsing the headers. It |
|||
#: is therefore unaffected by consuming the response content or the |
|||
#: value of the ``stream`` keyword argument. |
|||
self.elapsed = datetime.timedelta(0) |
|||
|
|||
#: The :class:`PreparedRequest <PreparedRequest>` object to which this |
|||
#: is a response. |
|||
self.request = None |
|||
|
|||
def __enter__(self): |
|||
return self |
|||
|
|||
def __exit__(self, *args): |
|||
self.close() |
|||
|
|||
def __getstate__(self): |
|||
# Consume everything; accessing the content attribute makes |
|||
# sure the content has been fully read. |
|||
if not self._content_consumed: |
|||
self.content |
|||
|
|||
return {attr: getattr(self, attr, None) for attr in self.__attrs__} |
|||
|
|||
def __setstate__(self, state): |
|||
for name, value in state.items(): |
|||
setattr(self, name, value) |
|||
|
|||
# pickled objects do not have .raw |
|||
setattr(self, '_content_consumed', True) |
|||
setattr(self, 'raw', None) |
|||
|
|||
def __repr__(self): |
|||
return '<Response [%s]>' % (self.status_code) |
|||
|
|||
def __bool__(self): |
|||
"""Returns True if :attr:`status_code` is less than 400. |
|||
|
|||
This attribute checks if the status code of the response is between |
|||
400 and 600 to see if there was a client error or a server error. If |
|||
the status code, is between 200 and 400, this will return True. This |
|||
is **not** a check to see if the response code is ``200 OK``. |
|||
""" |
|||
return self.ok |
|||
|
|||
def __nonzero__(self): |
|||
"""Returns True if :attr:`status_code` is less than 400. |
|||
|
|||
This attribute checks if the status code of the response is between |
|||
400 and 600 to see if there was a client error or a server error. If |
|||
the status code, is between 200 and 400, this will return True. This |
|||
is **not** a check to see if the response code is ``200 OK``. |
|||
""" |
|||
return self.ok |
|||
|
|||
def __iter__(self): |
|||
"""Allows you to use a response as an iterator.""" |
|||
return self.iter_content(128) |
|||
|
|||
@property |
|||
def ok(self): |
|||
"""Returns True if :attr:`status_code` is less than 400, False if not. |
|||
|
|||
This attribute checks if the status code of the response is between |
|||
400 and 600 to see if there was a client error or a server error. If |
|||
the status code is between 200 and 400, this will return True. This |
|||
is **not** a check to see if the response code is ``200 OK``. |
|||
""" |
|||
try: |
|||
self.raise_for_status() |
|||
except HTTPError: |
|||
return False |
|||
return True |
|||
|
|||
@property |
|||
def is_redirect(self): |
|||
"""True if this Response is a well-formed HTTP redirect that could have |
|||
been processed automatically (by :meth:`Session.resolve_redirects`). |
|||
""" |
|||
return ('location' in self.headers and self.status_code in REDIRECT_STATI) |
|||
|
|||
@property |
|||
def is_permanent_redirect(self): |
|||
"""True if this Response one of the permanent versions of redirect.""" |
|||
return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) |
|||
|
|||
@property |
|||
def next(self): |
|||
"""Returns a PreparedRequest for the next request in a redirect chain, if there is one.""" |
|||
return self._next |
|||
|
|||
@property |
|||
def apparent_encoding(self): |
|||
"""The apparent encoding, provided by the chardet library.""" |
|||
return chardet.detect(self.content)['encoding'] |
|||
|
|||
def iter_content(self, chunk_size=1, decode_unicode=False): |
|||
"""Iterates over the response data. When stream=True is set on the |
|||
request, this avoids reading the content at once into memory for |
|||
large responses. The chunk size is the number of bytes it should |
|||
read into memory. This is not necessarily the length of each item |
|||
returned as decoding can take place. |
|||
|
|||
chunk_size must be of type int or None. A value of None will |
|||
function differently depending on the value of `stream`. |
|||
stream=True will read data as it arrives in whatever size the |
|||
chunks are received. If stream=False, data is returned as |
|||
a single chunk. |
|||
|
|||
If decode_unicode is True, content will be decoded using the best |
|||
available encoding based on the response. |
|||
""" |
|||
|
|||
def generate(): |
|||
# Special case for urllib3. |
|||
if hasattr(self.raw, 'stream'): |
|||
try: |
|||
for chunk in self.raw.stream(chunk_size, decode_content=True): |
|||
yield chunk |
|||
except ProtocolError as e: |
|||
raise ChunkedEncodingError(e) |
|||
except DecodeError as e: |
|||
raise ContentDecodingError(e) |
|||
except ReadTimeoutError as e: |
|||
raise ConnectionError(e) |
|||
else: |
|||
# Standard file-like object. |
|||
while True: |
|||
chunk = self.raw.read(chunk_size) |
|||
if not chunk: |
|||
break |
|||
yield chunk |
|||
|
|||
self._content_consumed = True |
|||
|
|||
if self._content_consumed and isinstance(self._content, bool): |
|||
raise StreamConsumedError() |
|||
elif chunk_size is not None and not isinstance(chunk_size, int): |
|||
raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) |
|||
# simulate reading small chunks of the content |
|||
reused_chunks = iter_slices(self._content, chunk_size) |
|||
|
|||
stream_chunks = generate() |
|||
|
|||
chunks = reused_chunks if self._content_consumed else stream_chunks |
|||
|
|||
if decode_unicode: |
|||
chunks = stream_decode_response_unicode(chunks, self) |
|||
|
|||
return chunks |
|||
|
|||
def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None): |
|||
"""Iterates over the response data, one line at a time. When |
|||
stream=True is set on the request, this avoids reading the |
|||
content at once into memory for large responses. |
|||
|
|||
.. note:: This method is not reentrant safe. |
|||
""" |
|||
|
|||
pending = None |
|||
|
|||
for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): |
|||
|
|||
if pending is not None: |
|||
chunk = pending + chunk |
|||
|
|||
if delimiter: |
|||
lines = chunk.split(delimiter) |
|||
else: |
|||
lines = chunk.splitlines() |
|||
|
|||
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: |
|||
pending = lines.pop() |
|||
else: |
|||
pending = None |
|||
|
|||
for line in lines: |
|||
yield line |
|||
|
|||
if pending is not None: |
|||
yield pending |
|||
|
|||
@property |
|||
def content(self): |
|||
"""Content of the response, in bytes.""" |
|||
|
|||
if self._content is False: |
|||
# Read the contents. |
|||
if self._content_consumed: |
|||
raise RuntimeError( |
|||
'The content for this response was already consumed') |
|||
|
|||
if self.status_code == 0 or self.raw is None: |
|||
self._content = None |
|||
else: |
|||
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' |
|||
|
|||
self._content_consumed = True |
|||
# don't need to release the connection; that's been handled by urllib3 |
|||
# since we exhausted the data. |
|||
return self._content |
|||
|
|||
@property |
|||
def text(self): |
|||
"""Content of the response, in unicode. |
|||
|
|||
If Response.encoding is None, encoding will be guessed using |
|||
``chardet``. |
|||
|
|||
The encoding of the response content is determined based solely on HTTP |
|||
headers, following RFC 2616 to the letter. If you can take advantage of |
|||
non-HTTP knowledge to make a better guess at the encoding, you should |
|||
set ``r.encoding`` appropriately before accessing this property. |
|||
""" |
|||
|
|||
# Try charset from content-type |
|||
content = None |
|||
encoding = self.encoding |
|||
|
|||
if not self.content: |
|||
return str('') |
|||
|
|||
# Fallback to auto-detected encoding. |
|||
if self.encoding is None: |
|||
encoding = self.apparent_encoding |
|||
|
|||
# Decode unicode from given encoding. |
|||
try: |
|||
content = str(self.content, encoding, errors='replace') |
|||
except (LookupError, TypeError): |
|||
# A LookupError is raised if the encoding was not found which could |
|||
# indicate a misspelling or similar mistake. |
|||
# |
|||
# A TypeError can be raised if encoding is None |
|||
# |
|||
# So we try blindly encoding. |
|||
content = str(self.content, errors='replace') |
|||
|
|||
return content |
|||
|
|||
def json(self, **kwargs): |
|||
r"""Returns the json-encoded content of a response, if any. |
|||
|
|||
:param \*\*kwargs: Optional arguments that ``json.loads`` takes. |
|||
:raises ValueError: If the response body does not contain valid json. |
|||
""" |
|||
|
|||
if not self.encoding and self.content and len(self.content) > 3: |
|||
# No encoding set. JSON RFC 4627 section 3 states we should expect |
|||
# UTF-8, -16 or -32. Detect which one to use; If the detection or |
|||
# decoding fails, fall back to `self.text` (using chardet to make |
|||
# a best guess). |
|||
encoding = guess_json_utf(self.content) |
|||
if encoding is not None: |
|||
try: |
|||
return complexjson.loads( |
|||
self.content.decode(encoding), **kwargs |
|||
) |
|||
except UnicodeDecodeError: |
|||
# Wrong UTF codec detected; usually because it's not UTF-8 |
|||
# but some other 8-bit codec. This is an RFC violation, |
|||
# and the server didn't bother to tell us what codec *was* |
|||
# used. |
|||
pass |
|||
return complexjson.loads(self.text, **kwargs) |
|||
|
|||
@property |
|||
def links(self): |
|||
"""Returns the parsed header links of the response, if any.""" |
|||
|
|||
header = self.headers.get('link') |
|||
|
|||
# l = MultiDict() |
|||
l = {} |
|||
|
|||
if header: |
|||
links = parse_header_links(header) |
|||
|
|||
for link in links: |
|||
key = link.get('rel') or link.get('url') |
|||
l[key] = link |
|||
|
|||
return l |
|||
|
|||
def raise_for_status(self): |
|||
"""Raises stored :class:`HTTPError`, if one occurred.""" |
|||
|
|||
http_error_msg = '' |
|||
if isinstance(self.reason, bytes): |
|||
# We attempt to decode utf-8 first because some servers |
|||
# choose to localize their reason strings. If the string |
|||
# isn't utf-8, we fall back to iso-8859-1 for all other |
|||
# encodings. (See PR #3538) |
|||
try: |
|||
reason = self.reason.decode('utf-8') |
|||
except UnicodeDecodeError: |
|||
reason = self.reason.decode('iso-8859-1') |
|||
else: |
|||
reason = self.reason |
|||
|
|||
if 400 <= self.status_code < 500: |
|||
http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url) |
|||
|
|||
elif 500 <= self.status_code < 600: |
|||
http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url) |
|||
|
|||
if http_error_msg: |
|||
raise HTTPError(http_error_msg, response=self) |
|||
|
|||
def close(self): |
|||
"""Releases the connection back to the pool. Once this method has been |
|||
called the underlying ``raw`` object must not be accessed again. |
|||
|
|||
*Note: Should not normally need to be called explicitly.* |
|||
""" |
|||
if not self._content_consumed: |
|||
self.raw.close() |
|||
|
|||
release_conn = getattr(self.raw, 'release_conn', None) |
|||
if release_conn is not None: |
|||
release_conn() |
@ -0,0 +1,14 @@ |
|||
import sys |
|||
|
|||
# This code exists for backwards compatibility reasons. |
|||
# I don't like it either. Just look the other way. :) |
|||
|
|||
for package in ('urllib3', 'idna', 'chardet'): |
|||
locals()[package] = __import__(package) |
|||
# This traversal is apparently necessary such that the identities are |
|||
# preserved (requests.packages.urllib3.* is urllib3.*) |
|||
for mod in list(sys.modules): |
|||
if mod == package or mod.startswith(package + '.'): |
|||
sys.modules['requests.packages.' + mod] = sys.modules[mod] |
|||
|
|||
# Kinda cool, though, right? |
@ -0,0 +1,770 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
requests.session |
|||
~~~~~~~~~~~~~~~~ |
|||
|
|||
This module provides a Session object to manage and persist settings across |
|||
requests (cookies, auth, proxies). |
|||
""" |
|||
import os |
|||
import sys |
|||
import time |
|||
from datetime import timedelta |
|||
|
|||
from .auth import _basic_auth_str |
|||
from .compat import cookielib, is_py3, OrderedDict, urljoin, urlparse, Mapping |
|||
from .cookies import ( |
|||
cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) |
|||
from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT |
|||
from .hooks import default_hooks, dispatch_hook |
|||
from ._internal_utils import to_native_string |
|||
from .utils import to_key_val_list, default_headers, DEFAULT_PORTS |
|||
from .exceptions import ( |
|||
TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) |
|||
|
|||
from .structures import CaseInsensitiveDict |
|||
from .adapters import HTTPAdapter |
|||
|
|||
from .utils import ( |
|||
requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, |
|||
get_auth_from_url, rewind_body |
|||
) |
|||
|
|||
from .status_codes import codes |
|||
|
|||
# formerly defined here, reexposed here for backward compatibility |
|||
from .models import REDIRECT_STATI |
|||
|
|||
# Preferred clock, based on which one is more accurate on a given system. |
|||
if sys.platform == 'win32': |
|||
try: # Python 3.4+ |
|||
preferred_clock = time.perf_counter |
|||
except AttributeError: # Earlier than Python 3. |
|||
preferred_clock = time.clock |
|||
else: |
|||
preferred_clock = time.time |
|||
|
|||
|
|||
def merge_setting(request_setting, session_setting, dict_class=OrderedDict): |
|||
"""Determines appropriate setting for a given request, taking into account |
|||
the explicit setting on that request, and the setting in the session. If a |
|||
setting is a dictionary, they will be merged together using `dict_class` |
|||
""" |
|||
|
|||
if session_setting is None: |
|||
return request_setting |
|||
|
|||
if request_setting is None: |
|||
return session_setting |
|||
|
|||
# Bypass if not a dictionary (e.g. verify) |
|||
if not ( |
|||
isinstance(session_setting, Mapping) and |
|||
isinstance(request_setting, Mapping) |
|||
): |
|||
return request_setting |
|||
|
|||
merged_setting = dict_class(to_key_val_list(session_setting)) |
|||
merged_setting.update(to_key_val_list(request_setting)) |
|||
|
|||
# Remove keys that are set to None. Extract keys first to avoid altering |
|||
# the dictionary during iteration. |
|||
none_keys = [k for (k, v) in merged_setting.items() if v is None] |
|||
for key in none_keys: |
|||
del merged_setting[key] |
|||
|
|||
return merged_setting |
|||
|
|||
|
|||
def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): |
|||
"""Properly merges both requests and session hooks. |
|||
|
|||
This is necessary because when request_hooks == {'response': []}, the |
|||
merge breaks Session hooks entirely. |
|||
""" |
|||
if session_hooks is None or session_hooks.get('response') == []: |
|||
return request_hooks |
|||
|
|||
if request_hooks is None or request_hooks.get('response') == []: |
|||
return session_hooks |
|||
|
|||
return merge_setting(request_hooks, session_hooks, dict_class) |
|||
|
|||
|
|||
class SessionRedirectMixin(object): |
|||
|
|||
def get_redirect_target(self, resp): |
|||
"""Receives a Response. Returns a redirect URI or ``None``""" |
|||
# Due to the nature of how requests processes redirects this method will |
|||
# be called at least once upon the original response and at least twice |
|||
# on each subsequent redirect response (if any). |
|||
# If a custom mixin is used to handle this logic, it may be advantageous |
|||
# to cache the redirect location onto the response object as a private |
|||
# attribute. |
|||
if resp.is_redirect: |
|||
location = resp.headers['location'] |
|||
# Currently the underlying http module on py3 decode headers |
|||
# in latin1, but empirical evidence suggests that latin1 is very |
|||
# rarely used with non-ASCII characters in HTTP headers. |
|||
# It is more likely to get UTF8 header rather than latin1. |
|||
# This causes incorrect handling of UTF8 encoded location headers. |
|||
# To solve this, we re-encode the location in latin1. |
|||
if is_py3: |
|||
location = location.encode('latin1') |
|||
return to_native_string(location, 'utf8') |
|||
return None |
|||
|
|||
def should_strip_auth(self, old_url, new_url): |
|||
"""Decide whether Authorization header should be removed when redirecting""" |
|||
old_parsed = urlparse(old_url) |
|||
new_parsed = urlparse(new_url) |
|||
if old_parsed.hostname != new_parsed.hostname: |
|||
return True |
|||
# Special case: allow http -> https redirect when using the standard |
|||
# ports. This isn't specified by RFC 7235, but is kept to avoid |
|||
# breaking backwards compatibility with older versions of requests |
|||
# that allowed any redirects on the same host. |
|||
if (old_parsed.scheme == 'http' and old_parsed.port in (80, None) |
|||
and new_parsed.scheme == 'https' and new_parsed.port in (443, None)): |
|||
return False |
|||
|
|||
# Handle default port usage corresponding to scheme. |
|||
changed_port = old_parsed.port != new_parsed.port |
|||
changed_scheme = old_parsed.scheme != new_parsed.scheme |
|||
default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None) |
|||
if (not changed_scheme and old_parsed.port in default_port |
|||
and new_parsed.port in default_port): |
|||
return False |
|||
|
|||
# Standard case: root URI must match |
|||
return changed_port or changed_scheme |
|||
|
|||
def resolve_redirects(self, resp, req, stream=False, timeout=None, |
|||
verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): |
|||
"""Receives a Response. Returns a generator of Responses or Requests.""" |
|||
|
|||
hist = [] # keep track of history |
|||
|
|||
url = self.get_redirect_target(resp) |
|||
previous_fragment = urlparse(req.url).fragment |
|||
while url: |
|||
prepared_request = req.copy() |
|||
|
|||
# Update history and keep track of redirects. |
|||
# resp.history must ignore the original request in this loop |
|||
hist.append(resp) |
|||
resp.history = hist[1:] |
|||
|
|||
try: |
|||
resp.content # Consume socket so it can be released |
|||
except (ChunkedEncodingError, ContentDecodingError, RuntimeError): |
|||
resp.raw.read(decode_content=False) |
|||
|
|||
if len(resp.history) >= self.max_redirects: |
|||
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp) |
|||
|
|||
# Release the connection back into the pool. |
|||
resp.close() |
|||
|
|||
# Handle redirection without scheme (see: RFC 1808 Section 4) |
|||
if url.startswith('//'): |
|||
parsed_rurl = urlparse(resp.url) |
|||
url = '%s:%s' % (to_native_string(parsed_rurl.scheme), url) |
|||
|
|||
# Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) |
|||
parsed = urlparse(url) |
|||
if parsed.fragment == '' and previous_fragment: |
|||
parsed = parsed._replace(fragment=previous_fragment) |
|||
elif parsed.fragment: |
|||
previous_fragment = parsed.fragment |
|||
url = parsed.geturl() |
|||
|
|||
# Facilitate relative 'location' headers, as allowed by RFC 7231. |
|||
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') |
|||
# Compliant with RFC3986, we percent encode the url. |
|||
if not parsed.netloc: |
|||
url = urljoin(resp.url, requote_uri(url)) |
|||
else: |
|||
url = requote_uri(url) |
|||
|
|||
prepared_request.url = to_native_string(url) |
|||
|
|||
self.rebuild_method(prepared_request, resp) |
|||
|
|||
# https://github.com/requests/requests/issues/1084 |
|||
if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): |
|||
# https://github.com/requests/requests/issues/3490 |
|||
purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') |
|||
for header in purged_headers: |
|||
prepared_request.headers.pop(header, None) |
|||
prepared_request.body = None |
|||
|
|||
headers = prepared_request.headers |
|||
try: |
|||
del headers['Cookie'] |
|||
except KeyError: |
|||
pass |
|||
|
|||
# Extract any cookies sent on the response to the cookiejar |
|||
# in the new request. Because we've mutated our copied prepared |
|||
# request, use the old one that we haven't yet touched. |
|||
extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) |
|||
merge_cookies(prepared_request._cookies, self.cookies) |
|||
prepared_request.prepare_cookies(prepared_request._cookies) |
|||
|
|||
# Rebuild auth and proxy information. |
|||
proxies = self.rebuild_proxies(prepared_request, proxies) |
|||
self.rebuild_auth(prepared_request, resp) |
|||
|
|||
# A failed tell() sets `_body_position` to `object()`. This non-None |
|||
# value ensures `rewindable` will be True, allowing us to raise an |
|||
# UnrewindableBodyError, instead of hanging the connection. |
|||
rewindable = ( |
|||
prepared_request._body_position is not None and |
|||
('Content-Length' in headers or 'Transfer-Encoding' in headers) |
|||
) |
|||
|
|||
# Attempt to rewind consumed file-like object. |
|||
if rewindable: |
|||
rewind_body(prepared_request) |
|||
|
|||
# Override the original request. |
|||
req = prepared_request |
|||
|
|||
if yield_requests: |
|||
yield req |
|||
else: |
|||
|
|||
resp = self.send( |
|||
req, |
|||
stream=stream, |
|||
timeout=timeout, |
|||
verify=verify, |
|||
cert=cert, |
|||
proxies=proxies, |
|||
allow_redirects=False, |
|||
**adapter_kwargs |
|||
) |
|||
|
|||
extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) |
|||
|
|||
# extract redirect url, if any, for the next loop |
|||
url = self.get_redirect_target(resp) |
|||
yield resp |
|||
|
|||
def rebuild_auth(self, prepared_request, response): |
|||
"""When being redirected we may want to strip authentication from the |
|||
request to avoid leaking credentials. This method intelligently removes |
|||
and reapplies authentication where possible to avoid credential loss. |
|||
""" |
|||
headers = prepared_request.headers |
|||
url = prepared_request.url |
|||
|
|||
if 'Authorization' in headers and self.should_strip_auth(response.request.url, url): |
|||
# If we get redirected to a new host, we should strip out any |
|||
# authentication headers. |
|||
del headers['Authorization'] |
|||
|
|||
# .netrc might have more auth for us on our new host. |
|||
new_auth = get_netrc_auth(url) if self.trust_env else None |
|||
if new_auth is not None: |
|||
prepared_request.prepare_auth(new_auth) |
|||
|
|||
return |
|||
|
|||
def rebuild_proxies(self, prepared_request, proxies): |
|||
"""This method re-evaluates the proxy configuration by considering the |
|||
environment variables. If we are redirected to a URL covered by |
|||
NO_PROXY, we strip the proxy configuration. Otherwise, we set missing |
|||
proxy keys for this URL (in case they were stripped by a previous |
|||
redirect). |
|||
|
|||
This method also replaces the Proxy-Authorization header where |
|||
necessary. |
|||
|
|||
:rtype: dict |
|||
""" |
|||
proxies = proxies if proxies is not None else {} |
|||
headers = prepared_request.headers |
|||
url = prepared_request.url |
|||
scheme = urlparse(url).scheme |
|||
new_proxies = proxies.copy() |
|||
no_proxy = proxies.get('no_proxy') |
|||
|
|||
bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy) |
|||
if self.trust_env and not bypass_proxy: |
|||
environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) |
|||
|
|||
proxy = environ_proxies.get(scheme, environ_proxies.get('all')) |
|||
|
|||
if proxy: |
|||
new_proxies.setdefault(scheme, proxy) |
|||
|
|||
if 'Proxy-Authorization' in headers: |
|||
del headers['Proxy-Authorization'] |
|||
|
|||
try: |
|||
username, password = get_auth_from_url(new_proxies[scheme]) |
|||
except KeyError: |
|||
username, password = None, None |
|||
|
|||
if username and password: |
|||
headers['Proxy-Authorization'] = _basic_auth_str(username, password) |
|||
|
|||
return new_proxies |
|||
|
|||
def rebuild_method(self, prepared_request, response): |
|||
"""When being redirected we may want to change the method of the request |
|||
based on certain specs or browser behavior. |
|||
""" |
|||
method = prepared_request.method |
|||
|
|||
# https://tools.ietf.org/html/rfc7231#section-6.4.4 |
|||
if response.status_code == codes.see_other and method != 'HEAD': |
|||
method = 'GET' |
|||
|
|||
# Do what the browsers do, despite standards... |
|||
# First, turn 302s into GETs. |
|||
if response.status_code == codes.found and method != 'HEAD': |
|||
method = 'GET' |
|||
|
|||
# Second, if a POST is responded to with a 301, turn it into a GET. |
|||
# This bizarre behaviour is explained in Issue 1704. |
|||
if response.status_code == codes.moved and method == 'POST': |
|||
method = 'GET' |
|||
|
|||
prepared_request.method = method |
|||
|
|||
|
|||
class Session(SessionRedirectMixin): |
|||
"""A Requests session. |
|||
|
|||
Provides cookie persistence, connection-pooling, and configuration. |
|||
|
|||
Basic Usage:: |
|||
|
|||
>>> import requests |
|||
>>> s = requests.Session() |
|||
>>> s.get('https://httpbin.org/get') |
|||
<Response [200]> |
|||
|
|||
Or as a context manager:: |
|||
|
|||
>>> with requests.Session() as s: |
|||
>>> s.get('https://httpbin.org/get') |
|||
<Response [200]> |
|||
""" |
|||
|
|||
__attrs__ = [ |
|||
'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', |
|||
'cert', 'prefetch', 'adapters', 'stream', 'trust_env', |
|||
'max_redirects', |
|||
] |
|||
|
|||
def __init__(self): |
|||
|
|||
#: A case-insensitive dictionary of headers to be sent on each |
|||
#: :class:`Request <Request>` sent from this |
|||
#: :class:`Session <Session>`. |
|||
self.headers = default_headers() |
|||
|
|||
#: Default Authentication tuple or object to attach to |
|||
#: :class:`Request <Request>`. |
|||
self.auth = None |
|||
|
|||
#: Dictionary mapping protocol or protocol and host to the URL of the proxy |
|||
#: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to |
|||
#: be used on each :class:`Request <Request>`. |
|||
self.proxies = {} |
|||
|
|||
#: Event-handling hooks. |
|||
self.hooks = default_hooks() |
|||
|
|||
#: Dictionary of querystring data to attach to each |
|||
#: :class:`Request <Request>`. The dictionary values may be lists for |
|||
#: representing multivalued query parameters. |
|||
self.params = {} |
|||
|
|||
#: Stream response content default. |
|||
self.stream = False |
|||
|
|||
#: SSL Verification default. |
|||
self.verify = True |
|||
|
|||
#: SSL client certificate default, if String, path to ssl client |
|||
#: cert file (.pem). If Tuple, ('cert', 'key') pair. |
|||
self.cert = None |
|||
|
|||
#: Maximum number of redirects allowed. If the request exceeds this |
|||
#: limit, a :class:`TooManyRedirects` exception is raised. |
|||
#: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is |
|||
#: 30. |
|||
self.max_redirects = DEFAULT_REDIRECT_LIMIT |
|||
|
|||
#: Trust environment settings for proxy configuration, default |
|||
#: authentication and similar. |
|||
self.trust_env = True |
|||
|
|||
#: A CookieJar containing all currently outstanding cookies set on this |
|||
#: session. By default it is a |
|||
#: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but |
|||
#: may be any other ``cookielib.CookieJar`` compatible object. |
|||
self.cookies = cookiejar_from_dict({}) |
|||
|
|||
# Default connection adapters. |
|||
self.adapters = OrderedDict() |
|||
self.mount('https://', HTTPAdapter()) |
|||
self.mount('http://', HTTPAdapter()) |
|||
|
|||
def __enter__(self): |
|||
return self |
|||
|
|||
def __exit__(self, *args): |
|||
self.close() |
|||
|
|||
def prepare_request(self, request): |
|||
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for |
|||
transmission and returns it. The :class:`PreparedRequest` has settings |
|||
merged from the :class:`Request <Request>` instance and those of the |
|||
:class:`Session`. |
|||
|
|||
:param request: :class:`Request` instance to prepare with this |
|||
session's settings. |
|||
:rtype: requests.PreparedRequest |
|||
""" |
|||
cookies = request.cookies or {} |
|||
|
|||
# Bootstrap CookieJar. |
|||
if not isinstance(cookies, cookielib.CookieJar): |
|||
cookies = cookiejar_from_dict(cookies) |
|||
|
|||
# Merge with session cookies |
|||
merged_cookies = merge_cookies( |
|||
merge_cookies(RequestsCookieJar(), self.cookies), cookies) |
|||
|
|||
# Set environment's basic authentication if not explicitly set. |
|||
auth = request.auth |
|||
if self.trust_env and not auth and not self.auth: |
|||
auth = get_netrc_auth(request.url) |
|||
|
|||
p = PreparedRequest() |
|||
p.prepare( |
|||
method=request.method.upper(), |
|||
url=request.url, |
|||
files=request.files, |
|||
data=request.data, |
|||
json=request.json, |
|||
headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), |
|||
params=merge_setting(request.params, self.params), |
|||
auth=merge_setting(auth, self.auth), |
|||
cookies=merged_cookies, |
|||
hooks=merge_hooks(request.hooks, self.hooks), |
|||
) |
|||
return p |
|||
|
|||
def request(self, method, url, |
|||
params=None, data=None, headers=None, cookies=None, files=None, |
|||
auth=None, timeout=None, allow_redirects=True, proxies=None, |
|||
hooks=None, stream=None, verify=None, cert=None, json=None): |
|||
"""Constructs a :class:`Request <Request>`, prepares it and sends it. |
|||
Returns :class:`Response <Response>` object. |
|||
|
|||
:param method: method for the new :class:`Request` object. |
|||
:param url: URL for the new :class:`Request` object. |
|||
:param params: (optional) Dictionary or bytes to be sent in the query |
|||
string for the :class:`Request`. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) json to send in the body of the |
|||
:class:`Request`. |
|||
:param headers: (optional) Dictionary of HTTP Headers to send with the |
|||
:class:`Request`. |
|||
:param cookies: (optional) Dict or CookieJar object to send with the |
|||
:class:`Request`. |
|||
:param files: (optional) Dictionary of ``'filename': file-like-objects`` |
|||
for multipart encoding upload. |
|||
:param auth: (optional) Auth tuple or callable to enable |
|||
Basic/Digest/Custom HTTP Auth. |
|||
:param timeout: (optional) How long to wait for the server to send |
|||
data before giving up, as a float, or a :ref:`(connect timeout, |
|||
read timeout) <timeouts>` tuple. |
|||
:type timeout: float or tuple |
|||
:param allow_redirects: (optional) Set to True by default. |
|||
:type allow_redirects: bool |
|||
:param proxies: (optional) Dictionary mapping protocol or protocol and |
|||
hostname to the URL of the proxy. |
|||
:param stream: (optional) whether to immediately download the response |
|||
content. Defaults to ``False``. |
|||
:param verify: (optional) Either a boolean, in which case it controls whether we verify |
|||
the server's TLS certificate, or a string, in which case it must be a path |
|||
to a CA bundle to use. Defaults to ``True``. |
|||
:param cert: (optional) if String, path to ssl client cert file (.pem). |
|||
If Tuple, ('cert', 'key') pair. |
|||
:rtype: requests.Response |
|||
""" |
|||
# Create the Request. |
|||
req = Request( |
|||
method=method.upper(), |
|||
url=url, |
|||
headers=headers, |
|||
files=files, |
|||
data=data or {}, |
|||
json=json, |
|||
params=params or {}, |
|||
auth=auth, |
|||
cookies=cookies, |
|||
hooks=hooks, |
|||
) |
|||
prep = self.prepare_request(req) |
|||
|
|||
proxies = proxies or {} |
|||
|
|||
settings = self.merge_environment_settings( |
|||
prep.url, proxies, stream, verify, cert |
|||
) |
|||
|
|||
# Send the request. |
|||
send_kwargs = { |
|||
'timeout': timeout, |
|||
'allow_redirects': allow_redirects, |
|||
} |
|||
send_kwargs.update(settings) |
|||
resp = self.send(prep, **send_kwargs) |
|||
|
|||
return resp |
|||
|
|||
def get(self, url, **kwargs): |
|||
r"""Sends a GET request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', True) |
|||
return self.request('GET', url, **kwargs) |
|||
|
|||
def options(self, url, **kwargs): |
|||
r"""Sends a OPTIONS request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', True) |
|||
return self.request('OPTIONS', url, **kwargs) |
|||
|
|||
def head(self, url, **kwargs): |
|||
r"""Sends a HEAD request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
kwargs.setdefault('allow_redirects', False) |
|||
return self.request('HEAD', url, **kwargs) |
|||
|
|||
def post(self, url, data=None, json=None, **kwargs): |
|||
r"""Sends a POST request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param json: (optional) json to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return self.request('POST', url, data=data, json=json, **kwargs) |
|||
|
|||
def put(self, url, data=None, **kwargs): |
|||
r"""Sends a PUT request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return self.request('PUT', url, data=data, **kwargs) |
|||
|
|||
def patch(self, url, data=None, **kwargs): |
|||
r"""Sends a PATCH request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like |
|||
object to send in the body of the :class:`Request`. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return self.request('PATCH', url, data=data, **kwargs) |
|||
|
|||
def delete(self, url, **kwargs): |
|||
r"""Sends a DELETE request. Returns :class:`Response` object. |
|||
|
|||
:param url: URL for the new :class:`Request` object. |
|||
:param \*\*kwargs: Optional arguments that ``request`` takes. |
|||
:rtype: requests.Response |
|||
""" |
|||
|
|||
return self.request('DELETE', url, **kwargs) |
|||
|
|||
def send(self, request, **kwargs): |
|||
"""Send a given PreparedRequest. |
|||
|
|||
:rtype: requests.Response |
|||
""" |
|||
# Set defaults that the hooks can utilize to ensure they always have |
|||
# the correct parameters to reproduce the previous request. |
|||
kwargs.setdefault('stream', self.stream) |
|||
kwargs.setdefault('verify', self.verify) |
|||
kwargs.setdefault('cert', self.cert) |
|||
kwargs.setdefault('proxies', self.proxies) |
|||
|
|||
# It's possible that users might accidentally send a Request object. |
|||
# Guard against that specific failure case. |
|||
if isinstance(request, Request): |
|||
raise ValueError('You can only send PreparedRequests.') |
|||
|
|||
# Set up variables needed for resolve_redirects and dispatching of hooks |
|||
allow_redirects = kwargs.pop('allow_redirects', True) |
|||
stream = kwargs.get('stream') |
|||
hooks = request.hooks |
|||
|
|||
# Get the appropriate adapter to use |
|||
adapter = self.get_adapter(url=request.url) |
|||
|
|||
# Start time (approximately) of the request |
|||
start = preferred_clock() |
|||
|
|||
# Send the request |
|||
r = adapter.send(request, **kwargs) |
|||
|
|||
# Total elapsed time of the request (approximately) |
|||
elapsed = preferred_clock() - start |
|||
r.elapsed = timedelta(seconds=elapsed) |
|||
|
|||
# Response manipulation hooks |
|||
r = dispatch_hook('response', hooks, r, **kwargs) |
|||
|
|||
# Persist cookies |
|||
if r.history: |
|||
|
|||
# If the hooks create history then we want those cookies too |
|||
for resp in r.history: |
|||
extract_cookies_to_jar(self.cookies, resp.request, resp.raw) |
|||
|
|||
extract_cookies_to_jar(self.cookies, request, r.raw) |
|||
|
|||
# Redirect resolving generator. |
|||
gen = self.resolve_redirects(r, request, **kwargs) |
|||
|
|||
# Resolve redirects if allowed. |
|||
history = [resp for resp in gen] if allow_redirects else [] |
|||
|
|||
# Shuffle things around if there's history. |
|||
if history: |
|||
# Insert the first (original) request at the start |
|||
history.insert(0, r) |
|||
# Get the last request made |
|||
r = history.pop() |
|||
r.history = history |
|||
|
|||
# If redirects aren't being followed, store the response on the Request for Response.next(). |
|||
if not allow_redirects: |
|||
try: |
|||
r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) |
|||
except StopIteration: |
|||
pass |
|||
|
|||
if not stream: |
|||
r.content |
|||
|
|||
return r |
|||
|
|||
def merge_environment_settings(self, url, proxies, stream, verify, cert): |
|||
""" |
|||
Check the environment and merge it with some settings. |
|||
|
|||
:rtype: dict |
|||
""" |
|||
# Gather clues from the surrounding environment. |
|||
if self.trust_env: |
|||
# Set environment's proxies. |
|||
no_proxy = proxies.get('no_proxy') if proxies is not None else None |
|||
env_proxies = get_environ_proxies(url, no_proxy=no_proxy) |
|||
for (k, v) in env_proxies.items(): |
|||
proxies.setdefault(k, v) |
|||
|
|||
# Look for requests environment configuration and be compatible |
|||
# with cURL. |
|||
if verify is True or verify is None: |
|||
verify = (os.environ.get('REQUESTS_CA_BUNDLE') or |
|||
os.environ.get('CURL_CA_BUNDLE')) |
|||
|
|||
# Merge all the kwargs. |
|||
proxies = merge_setting(proxies, self.proxies) |
|||
stream = merge_setting(stream, self.stream) |
|||
verify = merge_setting(verify, self.verify) |
|||
cert = merge_setting(cert, self.cert) |
|||
|
|||
return {'verify': verify, 'proxies': proxies, 'stream': stream, |
|||
'cert': cert} |
|||
|
|||
def get_adapter(self, url): |
|||
""" |
|||
Returns the appropriate connection adapter for the given URL. |
|||
|
|||
:rtype: requests.adapters.BaseAdapter |
|||
""" |
|||
for (prefix, adapter) in self.adapters.items(): |
|||
|
|||
if url.lower().startswith(prefix.lower()): |
|||
return adapter |
|||
|
|||
# Nothing matches :-/ |
|||
raise InvalidSchema("No connection adapters were found for '%s'" % url) |
|||
|
|||
def close(self): |
|||
"""Closes all adapters and as such the session""" |
|||
for v in self.adapters.values(): |
|||
v.close() |
|||
|
|||
def mount(self, prefix, adapter): |
|||
"""Registers a connection adapter to a prefix. |
|||
|
|||
Adapters are sorted in descending order by prefix length. |
|||
""" |
|||
self.adapters[prefix] = adapter |
|||
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] |
|||
|
|||
for key in keys_to_move: |
|||
self.adapters[key] = self.adapters.pop(key) |
|||
|
|||
def __getstate__(self): |
|||
state = {attr: getattr(self, attr, None) for attr in self.__attrs__} |
|||
return state |
|||
|
|||
def __setstate__(self, state): |
|||
for attr, value in state.items(): |
|||
setattr(self, attr, value) |
|||
|
|||
|
|||
def session(): |
|||
""" |
|||
Returns a :class:`Session` for context-management. |
|||
|
|||
.. deprecated:: 1.0.0 |
|||
|
|||
This method has been deprecated since version 1.0.0 and is only kept for |
|||
backwards compatibility. New code should use :class:`~requests.sessions.Session` |
|||
to create a session. This may be removed at a future date. |
|||
|
|||
:rtype: Session |
|||
""" |
|||
return Session() |
@ -0,0 +1,120 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
r""" |
|||
The ``codes`` object defines a mapping from common names for HTTP statuses |
|||
to their numerical codes, accessible either as attributes or as dictionary |
|||
items. |
|||
|
|||
>>> requests.codes['temporary_redirect'] |
|||
307 |
|||
>>> requests.codes.teapot |
|||
418 |
|||
>>> requests.codes['\o/'] |
|||
200 |
|||
|
|||
Some codes have multiple names, and both upper- and lower-case versions of |
|||
the names are allowed. For example, ``codes.ok``, ``codes.OK``, and |
|||
``codes.okay`` all correspond to the HTTP status code 200. |
|||
""" |
|||
|
|||
from .structures import LookupDict |
|||
|
|||
_codes = { |
|||
|
|||
# Informational. |
|||
100: ('continue',), |
|||
101: ('switching_protocols',), |
|||
102: ('processing',), |
|||
103: ('checkpoint',), |
|||
122: ('uri_too_long', 'request_uri_too_long'), |
|||
200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), |
|||
201: ('created',), |
|||
202: ('accepted',), |
|||
203: ('non_authoritative_info', 'non_authoritative_information'), |
|||
204: ('no_content',), |
|||
205: ('reset_content', 'reset'), |
|||
206: ('partial_content', 'partial'), |
|||
207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), |
|||
208: ('already_reported',), |
|||
226: ('im_used',), |
|||
|
|||
# Redirection. |
|||
300: ('multiple_choices',), |
|||
301: ('moved_permanently', 'moved', '\\o-'), |
|||
302: ('found',), |
|||
303: ('see_other', 'other'), |
|||
304: ('not_modified',), |
|||
305: ('use_proxy',), |
|||
306: ('switch_proxy',), |
|||
307: ('temporary_redirect', 'temporary_moved', 'temporary'), |
|||
308: ('permanent_redirect', |
|||
'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 |
|||
|
|||
# Client Error. |
|||
400: ('bad_request', 'bad'), |
|||
401: ('unauthorized',), |
|||
402: ('payment_required', 'payment'), |
|||
403: ('forbidden',), |
|||
404: ('not_found', '-o-'), |
|||
405: ('method_not_allowed', 'not_allowed'), |
|||
406: ('not_acceptable',), |
|||
407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), |
|||
408: ('request_timeout', 'timeout'), |
|||
409: ('conflict',), |
|||
410: ('gone',), |
|||
411: ('length_required',), |
|||
412: ('precondition_failed', 'precondition'), |
|||
413: ('request_entity_too_large',), |
|||
414: ('request_uri_too_large',), |
|||
415: ('unsupported_media_type', 'unsupported_media', 'media_type'), |
|||
416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), |
|||
417: ('expectation_failed',), |
|||
418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), |
|||
421: ('misdirected_request',), |
|||
422: ('unprocessable_entity', 'unprocessable'), |
|||
423: ('locked',), |
|||
424: ('failed_dependency', 'dependency'), |
|||
425: ('unordered_collection', 'unordered'), |
|||
426: ('upgrade_required', 'upgrade'), |
|||
428: ('precondition_required', 'precondition'), |
|||
429: ('too_many_requests', 'too_many'), |
|||
431: ('header_fields_too_large', 'fields_too_large'), |
|||
444: ('no_response', 'none'), |
|||
449: ('retry_with', 'retry'), |
|||
450: ('blocked_by_windows_parental_controls', 'parental_controls'), |
|||
451: ('unavailable_for_legal_reasons', 'legal_reasons'), |
|||
499: ('client_closed_request',), |
|||
|
|||
# Server Error. |
|||
500: ('internal_server_error', 'server_error', '/o\\', '✗'), |
|||
501: ('not_implemented',), |
|||
502: ('bad_gateway',), |
|||
503: ('service_unavailable', 'unavailable'), |
|||
504: ('gateway_timeout',), |
|||
505: ('http_version_not_supported', 'http_version'), |
|||
506: ('variant_also_negotiates',), |
|||
507: ('insufficient_storage',), |
|||
509: ('bandwidth_limit_exceeded', 'bandwidth'), |
|||
510: ('not_extended',), |
|||
511: ('network_authentication_required', 'network_auth', 'network_authentication'), |
|||
} |
|||
|
|||
codes = LookupDict(name='status_codes') |
|||
|
|||
def _init(): |
|||
for code, titles in _codes.items(): |
|||
for title in titles: |
|||
setattr(codes, title, code) |
|||
if not title.startswith(('\\', '/')): |
|||
setattr(codes, title.upper(), code) |
|||
|
|||
def doc(code): |
|||
names = ', '.join('``%s``' % n for n in _codes[code]) |
|||
return '* %d: %s' % (code, names) |
|||
|
|||
global __doc__ |
|||
__doc__ = (__doc__ + '\n' + |
|||
'\n'.join(doc(code) for code in sorted(_codes)) |
|||
if __doc__ is not None else None) |
|||
|
|||
_init() |
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue