annotate venv/lib/python2.7/site-packages/requests/packages/chardet/charsetgroupprober.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
1 ######################## BEGIN LICENSE BLOCK ########################
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
2 # The Original Code is Mozilla Communicator client code.
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
3 #
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
4 # The Initial Developer of the Original Code is
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
5 # Netscape Communications Corporation.
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
6 # Portions created by the Initial Developer are Copyright (C) 1998
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
7 # the Initial Developer. All Rights Reserved.
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
8 #
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
9 # Contributor(s):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
10 # Mark Pilgrim - port to Python
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
11 #
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
12 # This library is free software; you can redistribute it and/or
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
13 # modify it under the terms of the GNU Lesser General Public
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
14 # License as published by the Free Software Foundation; either
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
15 # version 2.1 of the License, or (at your option) any later version.
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
16 #
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
17 # This library is distributed in the hope that it will be useful,
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
20 # Lesser General Public License for more details.
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
21 #
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
22 # You should have received a copy of the GNU Lesser General Public
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
23 # License along with this library; if not, write to the Free Software
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
25 # 02110-1301 USA
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
26 ######################### END LICENSE BLOCK #########################
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
27
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
28 from . import constants
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
29 import sys
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
30 from .charsetprober import CharSetProber
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
31
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
32
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
33 class CharSetGroupProber(CharSetProber):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
34 def __init__(self):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
35 CharSetProber.__init__(self)
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
36 self._mActiveNum = 0
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
37 self._mProbers = []
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
38 self._mBestGuessProber = None
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
39
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
40 def reset(self):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
41 CharSetProber.reset(self)
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
42 self._mActiveNum = 0
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
43 for prober in self._mProbers:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
44 if prober:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
45 prober.reset()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
46 prober.active = True
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
47 self._mActiveNum += 1
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
48 self._mBestGuessProber = None
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
49
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
50 def get_charset_name(self):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
51 if not self._mBestGuessProber:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
52 self.get_confidence()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
53 if not self._mBestGuessProber:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
54 return None
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
55 # self._mBestGuessProber = self._mProbers[0]
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
56 return self._mBestGuessProber.get_charset_name()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
57
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
58 def feed(self, aBuf):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
59 for prober in self._mProbers:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
60 if not prober:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
61 continue
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
62 if not prober.active:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
63 continue
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
64 st = prober.feed(aBuf)
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
65 if not st:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
66 continue
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
67 if st == constants.eFoundIt:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
68 self._mBestGuessProber = prober
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
69 return self.get_state()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
70 elif st == constants.eNotMe:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
71 prober.active = False
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
72 self._mActiveNum -= 1
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
73 if self._mActiveNum <= 0:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
74 self._mState = constants.eNotMe
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
75 return self.get_state()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
76 return self.get_state()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
77
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
78 def get_confidence(self):
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
79 st = self.get_state()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
80 if st == constants.eFoundIt:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
81 return 0.99
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
82 elif st == constants.eNotMe:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
83 return 0.01
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
84 bestConf = 0.0
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
85 self._mBestGuessProber = None
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
86 for prober in self._mProbers:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
87 if not prober:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
88 continue
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
89 if not prober.active:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
90 if constants._debug:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
91 sys.stderr.write(prober.get_charset_name()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
92 + ' not active\n')
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
93 continue
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
94 cf = prober.get_confidence()
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
95 if constants._debug:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
96 sys.stderr.write('%s confidence = %s\n' %
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
97 (prober.get_charset_name(), cf))
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
98 if bestConf < cf:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
99 bestConf = cf
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
100 self._mBestGuessProber = prober
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
101 if not self._mBestGuessProber:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
102 return 0.0
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
103 return bestConf
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
104 # else:
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
105 # self._mBestGuessProber = self._mProbers[0]
d67268158946 planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff changeset
106 # return self._mBestGuessProber.get_confidence()