1
16
17 import org.as2lib.regexp.AsciiUtil;
18 import org.as2lib.regexp.PatternSyntaxException;
19 import org.as2lib.regexp.Matcher;
20 import org.as2lib.regexp.node.*;
21 import org.as2lib.core.BasicClass;
22
23
31
32 class org.as2lib.regexp.Pattern extends BasicClass
33 {
34
35
38 public static var UNIX_LINES:Number = 0x01;
39
40
43 public static var CASE_INSENSITIVE:Number = 0x02;
44
45
48 public static var COMMENTS:Number = 0x04;
49
50
53 public static var MULTILINE:Number = 0x08;
54
55
58 public static var DOTALL:Number = 0x20;
59
60
63 public static var UNICODE_CASE:Number = 0x40;
64
65
66
69 private var pattern:String;
70
71
74 private var flags:Number;
75
76
79 private var root:Node;
80
81
86 public var matchRoot:Node;
87
88
91 private var buffer:Array;
92
93
96 private var groupNodes:Array;
97
98
101 private var temp:Array;
102
103
107 private var groupCount:Number;
108
109
113 private var localCount:Number;
114
115
119 private var cursor:Number;
120
121
124 private var patternLength:Number;
125
126
127 public function Pattern(newPattern:String, newFlags:Number) {
128 super();
129
130 pattern = newPattern;
131 flags = (newFlags != null) ? newFlags : 0;
132
133 cursor = 0;
134
135
136 groupCount = 1;
137 localCount = 0;
138
139 if (pattern.length > 0) {
140 compile();
141 } else {
142 root = new Start(LASTACCEPT);
143 matchRoot = LASTACCEPT;
144 }
145 }
146
147 public function getPattern(Void):String {
148 return pattern;
149 }
150
151 public function getMatcher(input:String):Matcher {
152 var m:Matcher = new Matcher(this, input);
153 return m;
154 }
155
156 public function getFlags(Void):Number {
157 return flags;
158 }
159
160 public static function matches(pattern:String, input:String):Boolean {
161 var p:Pattern = new Pattern(pattern);
162 var m:Matcher = p.getMatcher(input);
163 return m.matches();
164 }
165
166 public function split(input:String, limit:Number):Array {
167 if (limit == null) limit = 0;
168
169 var index:Number = 0;
170 var matchLimited:Boolean = limit > 0;
171 var matchList:Array = new Array();
172 var m:Matcher = getMatcher(input);
173
174
175 while (m.find()) {
176 if (!matchLimited || matchList.length < limit - 1) {
177 var match:String = input.substring(index, m.getStartIndex());
178 matchList.push(match);
179 index = m.getEndIndex();
180 } else if (matchList.length == limit - 1) {
181 var match:String = input.substring(index, input.length);
182 matchList.push(match);
183 index = m.getEndIndex();
184 }
185 }
186
187
188 if (index == 0) return [input.toString()];
189
190
191 if (!matchLimited || matchList.length < limit) {
192 matchList.push(input.substring(index, input.length));
193 }
194
195
196 var resultSize:Number = matchList.length;
197 if (limit == 0) {
198 while (resultSize > 0 && matchList[resultSize-1].equals("")) {
199 resultSize--;
200 }
201 }
202 return matchList.slice(0, resultSize);
203 }
204
205 private function compile(Void):Void {
206
207 patternLength = pattern.length;
208 temp = new Array(patternLength + 2);
209
210
211 temp = toCharCodeArray(pattern);
212 temp[patternLength] = 0;
213 temp[patternLength + 1] = 0;
214
215
216 buffer = new Array(32);
217 groupNodes = new Array(10);
218
219
220 matchRoot = parseExpression(LASTACCEPT);
221
222
223 if (patternLength != cursor) {
224 if (peekChar() == ord(")")) {
225 throwError("Unmatched closing ')'", arguments);
226 } else {
227 throwError("Unexpected internal error", arguments);
228 }
229 }
230
231
232 if (matchRoot instanceof Slice) {
233 root = BnM.optimize(matchRoot);
234 if (root == matchRoot) {
235 root = new Start(matchRoot);
236 }
237 } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
238 root = matchRoot;
239 } else {
240 root = new Start(matchRoot);
241 }
242
243
244 temp = null;
245 buffer = null;
246 groupNodes = null;
247 patternLength = 0;
248 }
249
250 private static function printObjectTree(node:Node):Void {
251 while (node != null) {
252 if (node instanceof Prolog) {
253 trace(node);
254 printObjectTree((Prolog(node)).getLoop());
255 trace("**** end contents prolog loop");
256 } else if (node instanceof Loop) {
257 trace(node);
258 printObjectTree((Loop(node)).getBody());
259 trace("**** end contents Loop body");
260 } else if (node instanceof Curly) {
261 trace(node);
262 printObjectTree((Curly(node)).getAtom());
263 trace("**** end contents Curly body");
264 } else if (node instanceof GroupTail) {
265 trace(node);
266 trace("Tail next is "+node.getNext());
267 return;
268 } else {
269 trace(node);
270 }
271 node = node.getNext();
272 if (node != null) {
273 trace("->next:");
274 }
275 if (node == Pattern.ACCEPT) {
276 trace("Accept Node");
277 node = null;
278 }
279 }
280 }
281
282 private function hasFlag(f:Number):Boolean {
283 return (flags & f) > 0;
284 }
285
286 private function acceptChar(ch:Number, s:String):Void {
287 var testChar:Number = temp[cursor++];
288 if (hasFlag(COMMENTS)) {
289 testChar = parsePastWhitespace(testChar);
290 }
291 if (ch != testChar) {
292 throwError(s, arguments);
293 }
294 }
295
296 private function markChar(c:Number):Void {
297 temp[patternLength] = c;
298 }
299
300 private function peekChar(Void):Number {
301 var ch:Number = temp[cursor];
302 if (hasFlag(COMMENTS)) {
303 ch = peekPastWhitespace(ch);
304 }
305 return ch;
306 }
307
308 private function readChar(Void):Number {
309 var ch:Number = temp[cursor++];
310 if (hasFlag(COMMENTS)) {
311 ch = parsePastWhitespace(ch);
312 }
313 return ch;
314 }
315
316 private function readEscapedChar(Void):Number {
317 var ch:Number = temp[cursor++];
318 return ch;
319 }
320
321 private function nextChar(Void):Number {
322 var ch:Number = temp[++cursor];
323 if (hasFlag(COMMENTS)) {
324 ch = peekPastWhitespace(ch);
325 }
326 return ch;
327 }
328
329 private function nextEscapedChar(Void):Number {
330 var ch:Number = temp[++cursor];
331 return ch;
332 }
333
334 private function peekPastWhitespace(ch:Number):Number {
335 while (AsciiUtil.isSpace(ch) || ch == ord("#")) {
336 while (AsciiUtil.isSpace(ch)) {
337 ch = temp[++cursor];
338 }
339 if (ch == ord("#")) {
340 ch = peekPastLine();
341 }
342 }
343 return ch;
344 }
345
346 private function parsePastWhitespace(ch:Number):Number {
347 while (AsciiUtil.isSpace(ch) || ch == ord("#")) {
348 while (AsciiUtil.isSpace(ch)) {
349 ch = temp[cursor++];
350 }
351 if (ch == ord("#")) {
352 ch = parsePastLine();
353 }
354 }
355 return ch;
356 }
357
358 private function parsePastLine(Void):Number {
359 var ch:Number = temp[cursor++];
360 while (ch != 0 && !isLineSeparator(ch)) {
361 ch = temp[cursor++];
362 }
363 return ch;
364 }
365
366 private function peekPastLine(Void):Number {
367 var ch:Number = temp[++cursor];
368 while (ch != 0 && !isLineSeparator(ch)) {
369 ch = temp[++cursor];
370 }
371 return ch;
372 }
373
374 private function isLineSeparator(ch:Number):Boolean {
375 if (hasFlag(UNIX_LINES)) {
376 return ch == ord("\n");
377 } else {
378 return (ch == ord("\n") ||
379 ch == ord("r") ||
380 (ch|1) == ord("u2029") ||
381 ch == ord("u0085"));
382 }
383 }
384
385 private function skipChar(Void):Number {
386 var i:Number = cursor;
387 var ch:Number = temp[i+1];
388 cursor = i + 2;
389 return ch;
390 }
391
392 private function unreadChar(Void):Void {
393 cursor--;
394 }
395
396 private function throwError(desc:String, args:FunctionArguments):Void {
397 throw new PatternSyntaxException(desc, this, args);
398 }
399
400 private function parseExpression(end:Node):Node {
401 var prev:Node = null;
402 while (true) {
403 var node:Node = parseSequence(end);
404 if (prev == null) {
405 prev = node;
406 } else {
407 prev = new Branch(prev, node);
408 }
409 if (peekChar() != ord("|")) {
410 return prev;
411 }
412 nextChar();
413 }
414 }
415
416 private function parseSequence(end:Node):Node {
417 var head:Node = null;
418 var tail:Node = null;
419 var node:Node = null;
420 var i, j:Number;
421 var ch :Number;
422 while (true) {
423 ch = peekChar();
424 if (ch == ord("(")) {
425
426
427 node = parseGroup();
428
429
430 if (node == null) continue;
431
432 if (head == null) {
433 head = node;
434 } else {
435 tail.setNext(node);
436 }
437
438
439 tail = root;
440 continue;
441 } else if (ch == ord("[")) {
442 node = parseClass(true);
443 } else if (ch == ord("\\")) {
444 ch = nextEscapedChar();
445 if (ch == ord("p") || ch == ord("P")) {
446 var comp:Boolean = (ch == ord("P"));
447 var oneLetter:Boolean = true;
448 ch = nextChar();
449
450 if (ch != 0x7B) {
451 unreadChar();
452 } else {
453 oneLetter = false;
454 }
455 node = parseFamily(comp, oneLetter);
456 } else {
457 unreadChar();
458 node = parseAtom();
459 }
460 }
461 else if (ch == ord("^")) {
462 nextChar();
463 if (hasFlag(MULTILINE)) {
464 if (hasFlag(UNIX_LINES)) {
465 node = new UnixCaret();
466 } else {
467 node = new Caret();
468 }
469 } else {
470 node = new Begin();
471 }
472 }
473 else if (ch == ord("$")) {
474 nextChar();
475 if (hasFlag(UNIX_LINES)) {
476 node = new UnixDollar(hasFlag(MULTILINE));
477 } else {
478 node = new Dollar(hasFlag(MULTILINE));
479 }
480 }
481 else if (ch == ord(".")) {
482 nextChar();
483 if (hasFlag(DOTALL)) {
484 node = new All();
485 } else {
486 if (hasFlag(UNIX_LINES)) {
487 node = new UnixDot();
488 } else {
489 node = new Dot();
490 }
491 }
492 }
493 else if (ch == ord("|") || ch == ord(")")) {
494 break;
495 }
496
497 else if (ch == ord("]") || ch == 0x7D) {
498 node = parseAtom();
499 }
500 else if (ch == ord("?") || ch == ord("*") || ch == ord("+")) {
501 nextChar();
502 throwError("Dangling meta character '" + chr(ch) + "'", arguments);
503 }
504 else {
505 if (ch == 0) {
506 if (cursor >= patternLength) {
507 break;
508 }
509 }
510 node = parseAtom();
511 }
512
513 node = parseClosure(node);
514
515 if (head == null) {
516 head = tail = node;
517 } else {
518 tail.setNext(node);
519 tail = node;
520 }
521 }
522 if (head == null) {
523 return end;
524 }
525 tail.setNext(end);
526 return head;
527 }
528
529 private function parseAtom(Void):Node {
530 var first:Number = 0;
531 var prev:Number = -1;
532 var ch:Number = peekChar();
533 while (true) {
534
535
536 if (ch == 0x2A ||
537 ch == 0x2B ||
538 ch == 0x3F ||
539 ch == 0x7B )
540 {
541 if (first > 1) {
542
543 cursor = prev;
544 first--;
545 }
546
547 } else if (ch == 0x24 ||
548 ch == 0x2E ||
549 ch == 0x5E ||
550 ch == 0x28 ||
551 ch == 0x5B ||
552 ch == 0x7C ||
553 ch == 0x29 )
554 {
555 break;
556
557 } else if (ch == 0x5C) {
558 ch = nextEscapedChar();
559
560 if (ch == 0x70 || ch == 0x50) {
561
562 if (first > 0) {
563
564 unreadChar();
565 } else {
566
567 if (ch == 0x70 || ch == 0x50) {
568 var comp:Boolean = (ch == 0x50);
569 var oneLetter:Boolean = true;
570 ch = nextChar();
571
572 if (ch != 0x7B) {
573 unreadChar();
574 } else {
575 oneLetter = false;
576 }
577 return parseFamily(comp, oneLetter);
578 }
579 }
580 }
581 else {
582 unreadChar();
583 prev = cursor;
584 ch = parseEscape(false, first == 0);
585 if (ch != null) {
586 appendChar(ch, first);
587 first++;
588 ch = peekChar();
589 continue;
590 } else if (first == 0) {
591 return root;
592 }
593
594 cursor = prev;
595 }
596 } else {
597 if (ch == 0) {
598 if (cursor >= patternLength) {
599 break;
600 }
601 }
602 prev = cursor;
603 appendChar(ch, first);
604 first++;
605 ch = nextChar();
606 continue;
607 }
608 break;
609 }
610 if (first == 1) {
611 return ceateSingle(buffer[0]);
612 } else {
613 return createSlice(buffer, first);
614 }
615 }
616
617 private function appendChar(ch:Number, len:Number):Void {
618 buffer[len] = ch;
619 }
620
621 private function parseBackRef(refNum:Number):Node {
622 var done:Boolean = false;
623 while (!done) {
624 var ch:Number = peekChar();
625 switch(ch) {
626 case ord("0"):
627 case ord("1"):
628 case ord("2"):
629 case ord("3"):
630 case ord("4"):
631 case ord("5"):
632 case ord("6"):
633 case ord("7"):
634 case ord("8"):
635 case ord("9"):
636 var newRefNum:Number = (refNum * 10) + (ch - ord("0"));
637
638
639 if (groupCount - 1 < newRefNum) {
640 done = true;
641 break;
642 }
643 refNum = newRefNum;
644 readChar();
645 break;
646 default:
647 done = true;
648 break;
649 }
650 }
651 if (hasFlag(CASE_INSENSITIVE)) {
652 return new BackRefA(refNum);
653 } else {
654 return new BackRef(refNum);
655 }
656 }
657
658 private function parseEscape(inclass:Boolean, create:Boolean):Number {
659 var ch:Number = skipChar();
660 switch (ch) {
661 case ord("0"):
662 return parseOctal();
663 case ord("1"):
664 case ord("2"):
665 case ord("3"):
666 case ord("4"):
667 case ord("5"):
668 case ord("6"):
669 case ord("7"):
670 case ord("8"):
671 case ord("9"):
672 if (inclass) break;
673 if (groupCount < (ch - ord("0"))) {
674 throwError("No such group yet exists at this point in the pattern", arguments);
675 }
676 if (create) {
677 root = parseBackRef(ch - ord("0"));
678 }
679 return null;
680 case ord("A"):
681 if (inclass) break;
682 if (create) root = new Begin();
683 return null;
684 case ord("B"):
685 if (inclass) break;
686 if (create) root = new Bound(Bound.NONE);
687 return null;
688 case ord("C"):
689 break;
690 case ord("D"):
691 if (create) root = new NotPosix(AsciiUtil.DIGIT);
692 return null;
693 case ord("E"):
694 case ord("F"):
695 break;
696 case ord("G"):
697 if (inclass) break;
698 if (create) root = new LastMatch();
699 return null;
700 case ord("H"):
701 case ord("I"):
702 case ord("J"):
703 case ord("K"):
704 case ord("L"):
705 case ord("M"):
706 case ord("N"):
707 case ord("O"):
708 case ord("P"):
709 break;
710 case ord("Q"):
711 if (create) {
712
713
714 var i:Number = cursor;
715 var c:Number;
716 while ((c = readEscapedChar()) != 0) {
717 if (c == ord("\\")) {
718 c = readEscapedChar();
719 if (c == ord("E") || c == 0) break;
720 }
721 }
722 var j:Number = cursor-1;
723 if (c == ord("E")) {
724 j--;
725 } else {
726 unreadChar();
727 }
728 for (var x = i; x < j; x++) {
729 appendChar(temp[x], x-i);
730 }
731 root = createSlice(buffer, j-i);
732 }
733 return null;
734 case ord("R"):
735 break;
736 case ord("S"):
737 if (create) root = new NotPosix(AsciiUtil.SPACE);
738 return null;
739 case ord("T"):
740 case ord("U"):
741 case ord("V"):
742 break;
743 case ord("W"):
744 if (create) root = new NotPosix(AsciiUtil.WORD);
745 return null;
746 case ord("X"):
747 case ord("Y"):
748 break;
749 case ord("Z"):
750 if (inclass) break;
751 if (create) {
752 if (hasFlag(UNIX_LINES)) {
753 root = new UnixDollar(false);
754 } else {
755 root = new Dollar(false);
756 }
757 }
758 return null;
759 case ord("a"):
760 return Number(ord("007"));
761 case ord("b"):
762 if (inclass) break;
763 if (create) root = new Bound(Bound.BOTH);
764 return null;
765 case ord("c"):
766 return parseControl();
767 case ord("d"):
768 if (create) root = new Posix(AsciiUtil.DIGIT);
769 return null;
770 case ord("e"):
771 return Number(ord("033"));
772 case ord("f"):
773 return Number(ord("f"));
774 case ord("g"):
775 case ord("h"):
776 case ord("i"):
777 case ord("j"):
778 case ord("k"):
779 case ord("l"):
780 case ord("m"):
781 break;
782 case ord("n"):
783 return Number(ord("\n"));
784 case ord("o"):
785 case ord("p"):
786 case ord("q"):
787 break;
788 case ord("r"):
789 return Number(ord("r"));
790 case ord("s"):
791 if (create) root = new Posix(AsciiUtil.SPACE);
792 return null;
793 case ord("t"):
794 return Number(ord("\t"));
795 case ord("u"):
796 return parseUnicode();
797 case ord("v"):
798 return Number(ord("013"));
799 case ord("w"):
800 if (create) root = new Posix(AsciiUtil.WORD);
801 return null;
802 case ord("x"):
803 return parseHexal();
804 case ord("y"):
805 break;
806 case ord("z"):
807 if (inclass) break;
808 if (create) root = new End();
809 return null;
810 default:
811 return ch;
812 }
813 throwError("Illegal/unsupported escape squence", arguments);
814 return null;
815 }
816
817 private function parseClass(consume:Boolean):Node {
818 var prev:Node = null;
819 var node:Node = null;
820 var bits:BitClass = new BitClass(false);
821 var include:Boolean = true;
822 var firstInClass:Boolean = true;
823 var ch:Number = nextChar();
824 while (true) {
825 switch (ch) {
826 case ord("^"):
827
828 if (firstInClass) {
829 if (temp[cursor-1] != ord("[")) break;
830 ch = nextChar();
831 include = !include;
832 continue;
833 } else {
834
835 break;
836 }
837 case ord("["):
838 firstInClass = false;
839 node = parseClass(true);
840 if (prev == null) {
841 prev = node;
842 } else {
843 prev = new Add(prev, node);
844 }
845 ch = peekChar();
846 continue;
847 case ord("&"):
848 firstInClass = false;
849 ch = nextChar();
850 if (ch == ord("&")) {
851 ch = nextChar();
852 var rightNode:Node = null;
853 while (ch != ord("]") && ch != ord("&")) {
854 if (ch == ord("[")) {
855 if (rightNode == null) {
856 rightNode = parseClass(true);
857 } else {
858 rightNode = new Add(rightNode, parseClass(true));
859 }
860 } else {
861 unreadChar();
862 rightNode = parseClass(false);
863 }
864 ch = peekChar();
865 }
866 if (rightNode != null) node = rightNode;
867 if (prev == null) {
868 if (rightNode == null) {
869 throwError("Bad class syntax", arguments);
870 } else {
871 prev = rightNode;
872 }
873 } else {
874 prev = new Both(prev, node);
875 }
876 } else {
877
878 unreadChar();
879 break;
880 }
881 continue;
882 case 0:
883 firstInClass = false;
884 if (cursor >= patternLength) {
885 throwError("Unclosed character class", arguments);
886 }
887 break;
888 case ord("]"):
889 firstInClass = false;
890 if (prev != null) {
891 if (consume) nextChar();
892 return prev;
893 }
894 break;
895 default:
896 firstInClass = false;
897 break;
898 }
899 node = parseRange(bits);
900 if (include) {
901 if (prev == null) {
902 prev = node;
903 } else {
904 if (prev != node)
905 {
906 prev = new Add(prev, node);
907 }
908 }
909 } else {
910 if (prev == null) {
911 prev = node.dup(true);
912 } else {
913 if (prev != node) {
914 prev = new Sub(prev, node);
915 }
916 }
917 }
918 ch = peekChar();
919 }
920 }
921
922 private function parseRange(bits:BitClass):Node {
923 var ch:Number = peekChar();
924 if (ch == ord("\\")) {
925 ch = nextEscapedChar();
926 if (ch == ord("p") || ch == ord("P")) {
927 var comp:Boolean = (ch == ord("P"));
928 var oneLetter:Boolean = true;
929
930 ch = nextChar();
931 if (ch == 0x7B) {
932 unreadChar();
933 } else {
934 oneLetter = false;
935 }
936 return parseFamily(comp, oneLetter);
937 } else {
938 unreadChar();
939 ch = parseEscape(true, true);
940 if (ch == null) return root;
941 }
942 } else {
943 ch = parseSingle();
944 }
945 if (ch != null) {
946 if (peekChar() == ord("-")) {
947 var endRange:Number = temp[cursor+1];
948 if (endRange == ord("[")) {
949 if (ch < 256) {
950 return bits.addChar(ch, getFlags());
951 }
952 return ceateSingle(ch);
953 }
954 if (endRange != ord("]")) {
955 nextChar();
956 var m:Number = parseSingle();
957 if (m < ch) {
958 throwError("Illegal character range", arguments);
959 }
960 if (hasFlag(CASE_INSENSITIVE)) {
961 return new RangeA((ch<<16)+m);
962 } else {
963 return new Range((ch<<16)+m);
964 }
965 }
966 }
967 if (ch < 256) {
968 return bits.addChar(ch, getFlags());
969 }
970 return ceateSingle(ch);
971 }
972 throwError("Unexpected character '"+chr(ch)+"'", arguments);
973 }
974
975 private function parseSingle(Void):Number {
976 var ch:Number = peekChar();
977 switch (ch) {
978 case ord("\\"):
979 return parseEscape(true, false);
980 default:
981 nextChar();
982 return ch;
983 }
984 }
985
986 private function parseFamily(flag:Boolean, singleLetter:Boolean):Node {
987 throwError("Families dosn't supported in the current Pattern's implementation", arguments);
988 return null;
989 }
990
991 private function parseGroup(Void):Node {
992 var head:Node = null;
993 var tail:Node = null;
994 var save:Number = flags;
995 root = null;
996 var ch:Number = nextChar();
997 if (ch == ord("?")) {
998 ch = skipChar();
999 switch (ch) {
1000 case ord(":"):
1001 head = createGroup(true);
1002 tail = root;
1003 head.setNext(parseExpression(tail));
1004 break;
1005 case ord("="):
1006 case ord("!"):
1007 head = createGroup(true);
1008 tail = root;
1009 head.setNext(parseExpression(tail));
1010 if (ch == ord("=")) {
1011 head = tail = new Pos(head);
1012 } else {
1013 head = tail = new Neg(head);
1014 }
1015 break;
1016 case ord(">"):
1017 head = createGroup(true);
1018 tail = root;
1019 head.setNext(parseExpression(tail));
1020 head = tail = new Ques(head, INDEPENDENT);
1021 break;
1022 case ord("<"):
1023 ch = readChar();
1024 head = createGroup(true);
1025 tail = root;
1026 head.setNext(parseExpression(tail));
1027 var info:TreeInfo = new TreeInfo();
1028 head.study(info);
1029 if (info.maxValid == false) {
1030 throwError("Look-behind group does not have "
1031 + "an obvious maximum length", arguments);
1032 }
1033 if (ch == ord("=")) {
1034 head = tail = new Behind(head, info.maxLength, info.minLength);
1035 } else if (ch == ord("!")) {
1036 head = tail = new NotBehind(head, info.maxLength, info.minLength);
1037 } else {
1038 throwError("Unknown look-behind group", arguments);
1039 }
1040 break;
1041 case ord("1"):case ord("2"):case ord("3"):case ord("4"):case ord("5"):
1042 case ord("6"):case ord("7"):case ord("8"):case ord("9"):
1043 if (groupNodes[ch - ord("0")] != null) {
1044 head = tail = new GroupRef(groupNodes[ch - ord("0")]);
1045 break;
1046 }
1047 throwError("Unknown group reference", arguments);
1048 case ord("$"):
1049 case ord("@"):
1050 throwError("Unknown group type", arguments);
1051 default:
1052 unreadChar();
1053 addFlag();
1054 ch = readChar();
1055 if (ch == ord(")")) {
1056 return null;
1057 }
1058 if (ch != ord(":")) {
1059 throwError("Unknown inline modifier", arguments);
1060 }
1061 head = createGroup(true);
1062 tail = root;
1063 head.setNext(parseExpression(tail));
1064 break;
1065 }
1066 } else {
1067 head = createGroup(false);
1068 tail = root;
1069 head.setNext(parseExpression(tail));
1070 }
1071
1072 acceptChar(Number(ord(")")), "Unclosed group");
1073 flags = save;
1074
1075
1076 var node:Node = parseClosure(head);
1077 if (node == head) {
1078 root = tail;
1079 return node;
1080 }
1081 if (head == tail) {
1082 root = node;
1083 return node;
1084 }
1085
1086 if (node instanceof Ques) {
1087 var ques:Ques = Ques(node);
1088 if (ques.getType() == POSSESSIVE) {
1089 root = node;
1090 return node;
1091 }
1092
1093 tail.setNext(new Dummy());
1094 tail = tail.getNext();
1095 if (ques.getType() == GREEDY) {
1096 head = new Branch(head, tail);
1097 } else {
1098
1099 head = new Branch(tail, head);
1100 }
1101 root = tail;
1102 return head;
1103 } else if (node instanceof Curly) {
1104 var curly:Curly = Curly(node);
1105 if (curly.getType() == POSSESSIVE) {
1106 root = node;
1107 return node;
1108 }
1109
1110 var info:TreeInfo = new TreeInfo();
1111 if (head.study(info)) {
1112
1113 var temp:GroupTail = GroupTail(tail);
1114 head = root = new GroupCurly(head.getNext(), curly.getCmin(),
1115 curly.getCmax(), curly.getType(),
1116 (GroupTail(tail)).getLocalIndex(),
1117 (GroupTail(tail)).getGroupIndex());
1118 return head;
1119 } else {
1120
1121 var tmp:Number = (GroupHead(head)).getLocalIndex();
1122 var loop:Loop;
1123 if (curly.getType() == GREEDY) {
1124 loop = new Loop(this.localCount, tmp);
1125 } else {
1126
1127 loop = new LazyLoop(this.localCount, tmp);
1128 }
1129 var prolog:Prolog = new Prolog(loop);
1130 this.localCount += 1;
1131 loop.setCmin(curly.getCmin());
1132 loop.setCmax(curly.getCmax());
1133 loop.setBody(head);
1134 tail.setNext(loop);
1135 root = loop;
1136 return prolog;
1137 }
1138 } else if (node instanceof First) {
1139 root = node;
1140 return node;
1141 }
1142 throwError("Internal logic error", arguments);
1143 }
1144
1145 private function createGroup(anonymous:Boolean):Node {
1146 var localIndex:Number = localCount++;
1147 var groupIndex:Number = 0;
1148 if (!anonymous) {
1149 groupIndex = groupCount++;
1150 }
1151 var head:GroupHead = new GroupHead(localIndex);
1152 root = new GroupTail(localIndex, groupIndex);
1153 if (!anonymous && groupIndex < 10) {
1154 groupNodes[groupIndex] = head;
1155 }
1156 return head;
1157 }
1158
1159 private function addFlag(Void):Void {
1160 var ch:Number = peekChar();
1161 while (true) {
1162 switch (ch) {
1163 case ord("i"):
1164 flags |= CASE_INSENSITIVE;
1165 break;
1166 case ord("m"):
1167 flags |= MULTILINE;
1168 break;
1169 case ord("s"):
1170 flags |= DOTALL;
1171 break;
1172 case ord("d"):
1173 flags |= UNIX_LINES;
1174 break;
1175 case ord("u"):
1176 flags |= UNICODE_CASE;
1177 break;
1178 case ord("x"):
1179 flags |= COMMENTS;
1180 break;
1181 case ord("-"):
1182 ch = nextChar();
1183 subFlag();
1184 default:
1185 return;
1186 }
1187 ch = nextChar();
1188 }
1189 }
1190
1191 private function subFlag(Void):Void {
1192 var ch:Number = peekChar();
1193 while (true) {
1194 switch (ch) {
1195 case ord("i"):
1196 flags &= ~CASE_INSENSITIVE;
1197 break;
1198 case ord("m"):
1199 flags &= ~MULTILINE;
1200 break;
1201 case ord("s"):
1202 flags &= ~DOTALL;
1203 break;
1204 case ord("d"):
1205 flags &= ~UNIX_LINES;
1206 break;
1207 case ord("u"):
1208 flags &= ~UNICODE_CASE;
1209 break;
1210 case ord("x"):
1211 flags &= ~COMMENTS;
1212 break;
1213 default:
1214 return;
1215 }
1216 ch = nextChar();
1217 }
1218 }
1219
1220 private static var MAX_REPS:Number = 0x7FFFFFFF;
1221
1222 public static var GREEDY:Number = 0;
1223 public static var LAZY:Number = 1;
1224 public static var POSSESSIVE:Number = 2;
1225 public static var INDEPENDENT:Number = 3;
1226
1227
1228 private function parseClosure(prev:Node):Node {
1229 var atom:Node;
1230 var ch:Number = peekChar();
1231 switch (ch) {
1232 case ord("?"):
1233 ch = nextChar();
1234 if (ch == ord("?")) {
1235 nextChar();
1236 return new Ques(prev, LAZY);
1237 } else if (ch == ord("+")) {
1238 nextChar();
1239 return new Ques(prev, POSSESSIVE);
1240 }
1241 return new Ques(prev, GREEDY);
1242 case ord("*"):
1243 ch = nextChar();
1244 if (ch == ord("?")) {
1245 nextChar();
1246 return new Curly(prev, 0, MAX_REPS, LAZY);
1247 } else if (ch == ord("+")) {
1248 nextChar();
1249 return new Curly(prev, 0, MAX_REPS, POSSESSIVE);
1250 }
1251 return new Curly(prev, 0, MAX_REPS, GREEDY);
1252 case ord("+"):
1253 ch = nextChar();
1254 if (ch == ord("?")) {
1255 nextChar();
1256 return new Curly(prev, 1, MAX_REPS, LAZY);
1257 } else if (ch == ord("+")) {
1258 nextChar();
1259 return new Curly(prev, 1, MAX_REPS, POSSESSIVE);
1260 }
1261 return new Curly(prev, 1, MAX_REPS, GREEDY);
1262 case 0x7B:
1263 ch = temp[cursor+1];
1264 if (AsciiUtil.isDigit(ch)) {
1265 skipChar();
1266 var cmin:Number = 0;
1267 do {
1268 cmin = cmin * 10 + (ch - ord("0"));
1269 } while (AsciiUtil.isDigit((ch = readChar())));
1270 var cmax:Number = cmin;
1271 if (ch == ord(",")) {
1272 ch = readChar();
1273 cmax = MAX_REPS;
1274 if (ch != 0x7D) {
1275 cmax = 0;
1276 while (AsciiUtil.isDigit(ch)) {
1277 cmax = cmax * 10 + (ch - ord("0"));
1278 ch = readChar();
1279 }
1280 }
1281 }
1282 if (ch != 0x7D) {
1283 throwError("Unclosed counted closure", arguments);
1284 }
1285 if (((cmin) | (cmax) | (cmax - cmin)) < 0) {
1286 throwError("Illegal repetition range", arguments);
1287 }
1288 var curly:Curly;
1289 ch = peekChar();
1290 if (ch == ord("?")) {
1291 nextChar();
1292 curly = new Curly(prev, cmin, cmax, LAZY);
1293 } else if (ch == ord("+")) {
1294 nextChar();
1295 curly = new Curly(prev, cmin, cmax, POSSESSIVE);
1296 } else {
1297 curly = new Curly(prev, cmin, cmax, GREEDY);
1298 }
1299 return curly;
1300 } else {
1301 throwError("Illegal repetition", arguments);
1302 }
1303 return prev;
1304 default:
1305 return prev;
1306 }
1307 }
1308
1309 private function parseControl(Void):Number {
1310 if (cursor < patternLength) {
1311 return (readChar() ^ 64);
1312 }
1313 throwError("Illegal control escape sequence", arguments);
1314 return null;
1315 }
1316
1317
1320 private function parseOctal(Void):Number {
1321 var n:Number = readChar();
1322 if (((n - ord("0")) | (ord("7") - n)) >= 0) {
1323 var m:Number = readChar();
1324 if (((m - ord("0")) | (ord("7") - m)) >= 0) {
1325 var o:Number = readChar();
1326 if ((((o - ord("0")) | (ord("7") - o)) >= 0) && (((n - ord("0")) | (ord("3") - n)) >= 0)) {
1327 return (n - ord("0")) * 64 + (m - ord("0")) * 8 + (o - ord("0"));
1328 }
1329 unreadChar();
1330 return (n - ord("0")) * 8 + (m - ord("0"));
1331 }
1332 unreadChar();
1333 return (n - ord("0"));
1334 }
1335 throwError("Illegal octal escape sequence", arguments);
1336 return null;
1337 }
1338
1339 private function parseHexal(Void):Number {
1340 var n:Number = readChar();
1341 if (AsciiUtil.isHexDigit(n)) {
1342 var m:Number = readChar();
1343 if (AsciiUtil.isHexDigit(m)) {
1344 return AsciiUtil.toDigit(n) * 16 + AsciiUtil.toDigit(m);
1345 }
1346 }
1347 throwError("Illegal hexadecimal escape sequence", arguments);
1348 return null;
1349 }
1350
1351 private function parseUnicode(Void):Number {
1352 var n:Number = 0;
1353 for (var i = 0; i < 4; i++) {
1354 var ch:Number = readChar();
1355 if (!AsciiUtil.isHexDigit(ch)) {
1356 throwError("Illegal Unicode escape sequence", arguments);
1357 }
1358 n = n * 16 + AsciiUtil.toDigit(ch);
1359 }
1360 return n;
1361 }
1362
1363 private function ceateSingle(ch:Number):Node {
1364 var f:Number = flags;
1365 if ((f & CASE_INSENSITIVE) == 0) {
1366 return new Single(ch);
1367 }
1368 if ((f & UNICODE_CASE) == 0) {
1369 return new SingleA(ch);
1370 }
1371 return new SingleU(ch);
1372 }
1373
1374 private function createSlice(buf:Array, count:Number, hasSupplementary:Boolean):Node {
1375 var tmp:Array = new Array(count);
1376 var i:Number = flags;
1377 if ((i & CASE_INSENSITIVE) == 0) {
1378 for (i = 0; i < count; i++) {
1379 tmp[i] = buf[i];
1380 }
1381 return new Slice(tmp);
1382 } else if ((i & UNICODE_CASE) == 0) {
1383 for (i = 0; i < count; i++) {
1384 tmp[i] = AsciiUtil.toLower(buf[i]);
1385 }
1386 return new SliceA(tmp);
1387 } else {
1388 for (i = 0; i < count; i++) {
1389 var c:Number = buf[i];
1390 c = AsciiUtil.toLower(AsciiUtil.toUpper(c));
1391 tmp[i] = c;
1392 }
1393 return new SliceU(tmp);
1394 }
1395 }
1396
1397 static var ACCEPT:Node = new Node();
1398 static var LASTACCEPT:Node = new LastNode();
1399
1400
1401 private function toCharCodeArray(source:String):Array {
1402 var charCodeArray:Array = new Array(source.length);
1403 for (var i = 0; i < source.length; i++) {
1404 charCodeArray[i] = source.charCodeAt(i);
1405 }
1406 return charCodeArray;
1407 }
1408
1409 private function fromCharCodeArray(source:Array):String {
1410 var str:String = new String();
1411 for (var i = 0; i < source.length; i++) {
1412 str += String.fromCharCode(source[i]);
1413 }
1414 return str;
1415 }
1416
1417 }
1418