1
16
17 import org.as2lib.regexp.AsciiUtil;
18 import org.as2lib.regexp.Pattern;
19 import org.as2lib.regexp.node.*;
20
21 import org.as2lib.util.StringUtil;
22 import org.as2lib.data.holder.map.HashMap;
23
31
32
33 class org.as2lib.regexp.PosixPattern extends Pattern {
34
35 private static var families:HashMap = null;
36
37 private static var categories:HashMap = null;
38
39 private static var familyNames:Array = [
40 "BasicLatin",
41 "Latin-1Supplement",
42 "LatinExtended-A",
43 "LatinExtended-Bound",
44 "IPAExtensions",
45 "SpacingModifierLetters",
46 "CombiningDiacriticalMarks",
47 "Greek",
48 "Cyrillic",
49 "Armenian",
50 "Hebrew",
51 "Arabic",
52 "Syriac",
53 "Thaana",
54 "Devanagari",
55 "Bengali",
56 "Gurmukhi",
57 "Gujarati",
58 "Oriya",
59 "Tamil",
60 "Telugu",
61 "Kannada",
62 "Malayalam",
63 "Sinhala",
64 "Thai",
65 "Lao",
66 "Tibetan",
67 "Myanmar",
68 "Georgian",
69 "HangulJamo",
70 "Ethiopic",
71 "Cherokee",
72 "UnifiedCanadianAboriginalSyllabics",
73 "Ogham",
74 "Runic",
75 "Khmer",
76 "Mongolian",
77 "LatinExtendedAdditional",
78 "GreekExtended",
79 "GeneralPunctuation",
80 "SuperscriptsandSubscripts",
81 "CurrencySymbols",
82 "CombiningMarksforSymbols",
83 "LetterlikeSymbols",
84 "NumberForms",
85 "Arrows",
86 "MathematicalOperators",
87 "MiscellaneousTechnical",
88 "ControlPictures",
89 "OpticalCharacterRecognition",
90 "EnclosedAlphanumerics",
91 "BoxDrawing",
92 "BlockElements",
93 "GeometricShapes",
94 "MiscellaneousSymbols",
95 "Dingbats",
96 "BraillePatterns",
97 "CJKRadicalsSupplement",
98 "KangxiRadicals",
99 "IdeographicDescriptionCharacters",
100 "CJKSymbolsandPunctuation",
101 "Hiragana",
102 "Katakana",
103 "Bopomofo",
104 "HangulCompatibilityJamo",
105 "Kanbun",
106 "BopomofoExtended",
107 "EnclosedCJKLettersandMonths",
108 "CJKCompatibility",
109 "CJKUnifiedIdeographsExtensionA",
110 "CJKUnifiedIdeographs",
111 "YiSyllables",
112 "YiRadicals",
113 "HangulSyllables",
114 "HighSurrogates",
115 "HighPrivateUseSurrogates",
116 "LowSurrogates",
117 "PrivateUse",
118 "CJKCompatibilityIdeographs",
119 "AlphabeticPresentationForms",
120 "ArabicPresentationForms-A",
121 "CombiningHalfMarks",
122 "CJKCompatibilityForms",
123 "SmallFormVariants",
124 "ArabicPresentationForms-Bound",
125 "Specials",
126 "HalfwidthandFullwidthForms"
127 ];
128
129 private static var categoryNames:Array = [
130 "Cn",
131 "Lu",
132 "Ll",
133 "Lt",
134 "Lm",
135 "Lo",
136 "Mn",
137 "Me",
138 "Mc",
139 "Nd",
140 "Nl",
141 "No",
142 "Zs",
143 "Zl",
144 "Zp",
145 "Cc",
146 "Cf",
147 "Co",
148 "Cs",
149 "Pd",
150 "Ps",
151 "Pe",
152 "Pc",
153 "Po",
154 "Sm",
155 "Sc",
156 "Sk",
157 "So",
158
159 "L",
160 "M",
161 "N",
162 "Z",
163 "C",
164 "P",
165 "S",
166
167 "LD",
168 "L1",
169
170 "all",
171 "ASCII",
172
173 "Alnum",
174 "Alpha",
175 "Blank",
176 "Cntrl",
177 "Digit",
178 "Graph",
179
180 "Lower",
181 "Print",
182 "Punct",
183
184 "Space",
185 "Upper",
186 "XDigit"
187 ];
188
189 private static var familyNodes:Array = [
190 new Range(0x0000007F),
191 new Range(0x008000FF),
192 new Range(0x0100017F),
193 new Range(0x0180024F),
194 new Range(0x025002AF),
195 new Range(0x02B002FF),
196 new Range(0x0300036F),
197 new Range(0x037003FF),
198 new Range(0x040004FF),
199 new Range(0x0530058F),
200 new Range(0x059005FF),
201 new Range(0x060006FF),
202 new Range(0x0700074F),
203 new Range(0x078007BF),
204 new Range(0x0900097F),
205 new Range(0x098009FF),
206 new Range(0x0A000A7F),
207 new Range(0x0A800AFF),
208 new Range(0x0B000B7F),
209 new Range(0x0B800BFF),
210 new Range(0x0C000C7F),
211 new Range(0x0C800CFF),
212 new Range(0x0D000D7F),
213 new Range(0x0D800DFF),
214 new Range(0x0E000E7F),
215 new Range(0x0E800EFF),
216 new Range(0x0F000FFF),
217 new Range(0x1000109F),
218 new Range(0x10A010FF),
219 new Range(0x110011FF),
220 new Range(0x1200137F),
221 new Range(0x13A013FF),
222 new Range(0x1400167F),
223 new Range(0x1680169F),
224 new Range(0x16A016FF),
225 new Range(0x178017FF),
226 new Range(0x180018AF),
227 new Range(0x1E001EFF),
228 new Range(0x1F001FFF),
229 new Range(0x2000206F),
230 new Range(0x2070209F),
231 new Range(0x20A020CF),
232 new Range(0x20D020FF),
233 new Range(0x2100214F),
234 new Range(0x2150218F),
235 new Range(0x219021FF),
236 new Range(0x220022FF),
237 new Range(0x230023FF),
238 new Range(0x2400243F),
239 new Range(0x2440245F),
240 new Range(0x246024FF),
241 new Range(0x2500257F),
242 new Range(0x2580259F),
243 new Range(0x25A025FF),
244 new Range(0x260026FF),
245 new Range(0x270027BF),
246 new Range(0x280028FF),
247 new Range(0x2E802EFF),
248 new Range(0x2F002FDF),
249 new Range(0x2FF02FFF),
250 new Range(0x3000303F),
251 new Range(0x3040309F),
252 new Range(0x30A030FF),
253 new Range(0x3100312F),
254 new Range(0x3130318F),
255 new Range(0x3190319F),
256 new Range(0x31A031BF),
257 new Range(0x320032FF),
258 new Range(0x330033FF),
259 new Range(0x34004DB5),
260 new Range(0x4E009FFF),
261 new Range(0xA000A48F),
262 new Range(0xA490A4CF),
263 new Range(0xAC00D7A3),
264 new Range(0xD800DB7F),
265 new Range(0xDB80DBFF),
266 new Range(0xDC00DFFF),
267 new Range(0xE000F8FF),
268 new Range(0xF900FAFF),
269 new Range(0xFB00FB4F),
270 new Range(0xFB50FDFF),
271 new Range(0xFE20FE2F),
272 new Range(0xFE30FE4F),
273 new Range(0xFE50FE6F),
274 new Range(0xFE70FEFE),
275 new Specials(),
276 new Range(0xFF00FFEF)
277 ];
278
279 private static var categoryNodes:Array = [
280 new Category(1<<0),
281 new Category(1<<1),
282 new Category(1<<2),
283 new Category(1<<3),
284 new Category(1<<4),
285 new Category(1<<5),
286 new Category(1<<6),
287 new Category(1<<7),
288 new Category(1<<8),
289 new Category(1<<9),
290 new Category(1<<10),
291 new Category(1<<11),
292 new Category(1<<12),
293 new Category(1<<13),
294 new Category(1<<14),
295 new Category(1<<15),
296 new Category(1<<16),
297 new Category(1<<18),
298 new Category(1<<19),
299 new Category(1<<20),
300 new Category(1<<21),
301 new Category(1<<22),
302 new Category(1<<23),
303 new Category(1<<24),
304 new Category(1<<25),
305 new Category(1<<26),
306 new Category(1<<27),
307 new Category(1<<28),
308
309 new Category(0x0000003E),
310 new Category(0x000001C0),
311 new Category(0x00000E00),
312 new Category(0x00007000),
313 new Category(0x000D8000),
314 new Category(0x01F00000),
315 new Category(0x1E000000),
316
317 new Category(0x0000023E),
318 new Range(0x000000FF),
319
320 new All(),
321 new Range(0x0000007F),
322
323 new Posix(AsciiUtil.ALNUM),
324 new Posix(AsciiUtil.ALPHA),
325 new Posix(AsciiUtil.BLANK),
326 new Posix(AsciiUtil.CNTRL),
327 new Range((0x30<<16)|0x39),
328 new Posix(AsciiUtil.GRAPH),
329
330 new Range((0x61<<16)|0x7A),
331 new Range(0x0020007E),
332 new Posix(AsciiUtil.PUNCT),
333
334 new Posix(AsciiUtil.SPACE),
335 new Range((0x41<<16)|0x5A),
336 new Posix(AsciiUtil.XDIGIT)
337 ];
338
339
340 private function parseFamily(flag:Boolean, singleLetter:Boolean):Node {
341 nextChar();
342 var name: String;
343
344 if (singleLetter) {
345 name = chr(temp[cursor]);
346 readChar();
347 } else {
348 var i:Number = cursor;
349 markChar(0x7D);
350 while(readChar() != 0x7D) {
351
352 }
353 markChar(0);
354 var j:Number = cursor;
355 if (j > patternLength) {
356 throwError("Unclosed character family", arguments);
357 }
358 if (i + 1 >= j) {
359 throwError("Empty character family", arguments);
360 }
361 name = fromCharCodeArray(temp.slice(i, j-1));
362 }
363
364 if (StringUtil.startsWith(name, "In")) {
365 name = name.substring(2, name.length);
366 return getFamilyNode(name).dup(flag);
367 }
368 if (StringUtil.startsWith(name, "Is")) {
369 name = name.substring(2, name.length);
370 }
371 return getCategoryNode(name).dup(flag);
372 }
373
374 private function getFamilyNode(name:String):Node {
375 if (families == null) {
376 var fns:Number = familyNodes.length;
377 families = new HashMap();
378 for (var x=0; x<fns; x++) {
379 families.put(familyNames[x], familyNodes[x]);
380 }
381 }
382 var n:Node = Node(families.get(name));
383 if (n != null) return n;
384
385 throwFamilyError(name, "Unknown character family", arguments);
386 }
387
388 private function getCategoryNode(name:String):Node {
389 if (categories == null) {
390 var cns:Number = categoryNodes.length;
391 categories = new HashMap();
392 for (var x=0; x<cns; x++) {
393 categories.put(categoryNames[x], categoryNodes[x]);
394 }
395 }
396 var n:Node = Node(categories.get(name));
397 if (n != null) return n;
398
399 throwFamilyError(name, "Unknown character category", arguments);
400 }
401
402 private function throwFamilyError(name:String, type:String, args:FunctionArguments):Void {
403 throwError(type + " " + chr(0x7B) + name + chr(0x7D), args);
404 }
405
406 public function PosixPattern(newPattern:String, newFlags:Number) {
407 super(newPattern, newFlags);
408 }
409
410 }