1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 """Nodeprep and resourceprep stringprep profiles.
21
22 Normative reference:
23 - `RFC 3920 <http://www.ietf.org/rfc/rfc3920.txt>`__
24 """
25
26 __revision__="$Id: xmppstringprep.py,v 1.16 2004/10/07 22:28:04 jajcus Exp $"
27 __docformat__="restructuredtext en"
28
29 import stringprep
30 import unicodedata
31 from pyxmpp.exceptions import StringprepError
32
34 """Class for looking up RFC 3454 tables using function.
35
36 :Ivariables:
37 - `lookup`: the lookup function."""
39 """Initialize `LookupFunction` object.
40
41 :Parameters:
42 - `function`: function taking character code as input and returning
43 `bool` value or the mapped for `code`."""
44 self.lookup=function
45
47 """Class for looking up RFC 3454 tables using a dictionary and/or list of ranges."""
49 """Initialize `LookupTable` object.
50
51 :Parameters:
52 - `singles`: dictionary mapping Unicode characters into other Unicode characters.
53 - `ranges`: list of ``((start,end),value)`` tuples mapping codes in range (start,end)
54 to the value."""
55 self.singles=singles
56 self.ranges=ranges
57
59 """Do Unicode character lookup.
60
61 :Parameters:
62 - `c`: Unicode character to look up.
63
64 :return: the mapped value."""
65 if self.singles.has_key(c):
66 return self.singles[c]
67 c=ord(c)
68 for (start,end),value in self.ranges:
69 if c<start:
70 return None
71 if c<=end:
72 return value
73 return None
74
75 A_1=LookupFunction(stringprep.in_table_a1)
76
78 """Do RFC 3454 B.1 table mapping.
79
80 :Parameters:
81 - `uc`: Unicode character to map.
82
83 :returns: u"" if there is `uc` code in the table, `None` otherwise."""
84 if stringprep.in_table_b1(uc):
85 return u""
86 else:
87 return None
88
89 B_1=LookupFunction(b1_mapping)
90 B_2=LookupFunction(stringprep.map_table_b2)
91 B_3=LookupFunction(stringprep.map_table_b3)
92 C_1_1=LookupFunction(stringprep.in_table_c11)
93 C_1_2=LookupFunction(stringprep.in_table_c12)
94 C_2_1=LookupFunction(stringprep.in_table_c21)
95 C_2_2=LookupFunction(stringprep.in_table_c22)
96 C_3=LookupFunction(stringprep.in_table_c3)
97 C_4=LookupFunction(stringprep.in_table_c4)
98 C_5=LookupFunction(stringprep.in_table_c5)
99 C_6=LookupFunction(stringprep.in_table_c6)
100 C_7=LookupFunction(stringprep.in_table_c7)
101 C_8=LookupFunction(stringprep.in_table_c8)
102 C_9=LookupFunction(stringprep.in_table_c9)
103 D_1=LookupFunction(stringprep.in_table_d1)
104 D_2=LookupFunction(stringprep.in_table_d2)
105
107 """Do NFKC normalization of Unicode data.
108
109 :Parameters:
110 - `data`: list of Unicode characters or Unicode string.
111
112 :return: normalized Unicode string."""
113 if type(data) is list:
114 data=u"".join(data)
115 return unicodedata.normalize("NFKC",data)
116
118 """Base class for stringprep profiles."""
119 cache_items=[]
120 - def __init__(self,unassigned,mapping,normalization,prohibited,bidi=1):
121 """Initialize Profile object.
122
123 :Parameters:
124 - `unassigned`: the lookup table with unassigned codes
125 - `mapping`: the lookup table with character mappings
126 - `normalization`: the normalization function
127 - `prohibited`: the lookup table with prohibited characters
128 - `bidi`: if True then bidirectional checks should be done
129 """
130 self.unassigned=unassigned
131 self.mapping=mapping
132 self.normalization=normalization
133 self.prohibited=prohibited
134 self.bidi=bidi
135 self.cache={}
136
171
173 """Complete string preparation procedure for 'query' strings.
174 (without checks for unassigned codes)
175
176 :Parameters:
177 - `s`: Unicode string to prepare.
178
179 :return: prepared string
180
181 :raise StringprepError: if the preparation fails
182 """
183
184 s=self.map(s)
185 if self.normalization:
186 s=self.normalization(s)
187 s=self.prohibit(s)
188 if self.bidi:
189 s=self.check_bidi(s)
190 if type(s) is list:
191 s=u"".string.join(s)
192 return s
193
195 """Mapping part of string preparation."""
196 r=[]
197 for c in s:
198 rc=None
199 for t in self.mapping:
200 rc=t.lookup(c)
201 if rc is not None:
202 break
203 if rc is not None:
204 r.append(rc)
205 else:
206 r.append(c)
207 return r
208
210 """Checks for prohibited characters."""
211 for c in s:
212 for t in self.prohibited:
213 if t.lookup(c):
214 raise StringprepError,"Prohibited character: %r" % (c,)
215 return s
216
218 """Checks for unassigned character codes."""
219 for c in s:
220 for t in self.unassigned:
221 if t.lookup(c):
222 raise StringprepError,"Unassigned character: %r" % (c,)
223 return s
224
226 """Checks if sting is valid for bidirectional printing."""
227 has_l=0
228 has_ral=0
229 for c in s:
230 if D_1.lookup(c):
231 has_l=1
232 elif D_2.lookup(c):
233 has_l=1
234 if has_l and has_ral:
235 raise StringprepError,"Both RandALCat and LCat characters present"
236 if has_l and (D_1.lookup(s[0]) is None or D_1.lookup(s[-1]) is None):
237 raise StringprepError,"The first and the last character must be RandALCat"
238 return s
239
240 nodeprep=Profile(
241 unassigned=(A_1,),
242 mapping=(B_1,B_2),
243 normalization=nfkc,
244 prohibited=(C_1_1,C_1_2,C_2_1,C_2_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9,
245 LookupTable({u'"':True,u'&':True,u"'":True,u"/":True,
246 u":":True,u"<":True,u">":True,u"@":True},()) ),
247 bidi=1)
248
249 resourceprep=Profile(
250 unassigned=(A_1,),
251 mapping=(B_1,),
252 normalization=nfkc,
253 prohibited=(C_1_2,C_2_1,C_2_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9),
254 bidi=1)
255
256 stringprep_cache_size=1000
272
273
274