1  /*
     2   * Copyright the original author or authors.
     3   * 
     4   * Licensed under the MOZILLA PUBLIC LICENSE, Version 1.1 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   * 
     8   *      http://www.mozilla.org/MPL/MPL-1.1.html
     9   * 
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  import org.as2lib.regexp.AsciiUtil;
    18  import org.as2lib.regexp.PatternSyntaxException;
    19  import org.as2lib.regexp.Matcher;
    20  import org.as2lib.regexp.node.*;
    21  import org.as2lib.core.BasicClass;
    22  
    23  /**
    24   * {@code Pattern} provides implementations of the parsing engine for 
    25   * basic RegExp constructs.
    26   * 
    27   * @author Igor Sadovskiy
    28   * @see org.as2lib.regexp.Matcher
    29   * @see org.as2lib.regexp.PosixPattern
    30   */
    31  
    32  class org.as2lib.regexp.Pattern extends BasicClass
    33  {
    34  
    35      /**
    36       * Enables Unix lines mode.
    37       */
    38      public static var UNIX_LINES:Number = 0x01;
    39  
    40      /**
    41       * Enables case-insensitive matching.
    42       */
    43      public static var CASE_INSENSITIVE:Number = 0x02;
    44  
    45      /**
    46       * Permits whitespace and comments in pattern.
    47       */
    48      public static var COMMENTS:Number = 0x04;
    49  
    50      /**
    51       * Enables multiline mode.
    52       */
    53      public static var MULTILINE:Number = 0x08;
    54  
    55      /**
    56       * Enables dotall mode.
    57       */
    58      public static var DOTALL:Number = 0x20;
    59  
    60      /**
    61       * Enables Unicode-aware case folding.
    62       */
    63      public static var UNICODE_CASE:Number = 0x40;
    64  
    65  
    66      /**
    67       * The original regular-expression pattern string.
    68       */
    69      private var pattern:String;
    70  
    71      /**
    72       * The original pattern flags.
    73       */
    74      private var flags:Number;
    75  
    76      /**
    77       * The starting point of state machine for the find operation.
    78       */
    79      private var root:Node;
    80  
    81      /**
    82       * The root of object tree for a match operation.  The pattern is matched
    83       * at the beginning.  This may include a find that uses BnM or a First
    84       * node.
    85       */
    86      public var matchRoot:Node;
    87  
    88      /**
    89       * Temporary storage used by parsing pattern slice.
    90       */
    91      private var buffer:Array;
    92  
    93      /**
    94       * Temporary storage used while parsing group references.
    95       */
    96      private var groupNodes:Array;
    97  
    98      /**
    99       * Temporary null terminating char array used by pattern compiling.
   100       */
   101      private var temp:Array;
   102  
   103      /**
   104       * The group count of this Pattern. Used by matchers to allocate storage
   105       * needed to perform a match.
   106       */
   107      private var groupCount:Number;
   108  
   109      /**
   110       * The local variable count used by parsing tree. Used by matchers to
   111       * allocate storage needed to perform a match.
   112       */
   113      private var localCount:Number;
   114  
   115      /**
   116       * Index into the pattern string that keeps track of how much has been
   117       * parsed.
   118       */
   119      private var cursor:Number;
   120  
   121      /**
   122       * Holds the length of the pattern string.
   123       */
   124      private var patternLength:Number;
   125  
   126  
   127      public function Pattern(newPattern:String, newFlags:Number) {
   128      	super();
   129      	
   130          pattern = newPattern;
   131          flags = (newFlags != null) ? newFlags : 0;
   132          
   133  		cursor = 0;        
   134  
   135          // reset group index and count
   136          groupCount = 1;
   137          localCount = 0;
   138  
   139          if (pattern.length > 0) {
   140              compile();
   141          } else {
   142              root = new Start(LASTACCEPT);
   143              matchRoot = LASTACCEPT;
   144          }
   145      }
   146  
   147      public function getPattern(Void):String {
   148          return pattern;
   149      }
   150  
   151      public function getMatcher(input:String):Matcher {
   152          var m:Matcher = new Matcher(this, input);
   153          return m;
   154      }
   155  
   156      public function getFlags(Void):Number {
   157          return flags;
   158      }
   159  
   160      public static function matches(pattern:String, input:String):Boolean {
   161          var p:Pattern = new Pattern(pattern);
   162          var m:Matcher = p.getMatcher(input);
   163          return m.matches();
   164      }
   165  
   166      public function split(input:String, limit:Number):Array { 
   167      	if (limit == null) limit = 0;
   168      
   169          var index:Number = 0;
   170          var matchLimited:Boolean = limit > 0;
   171          var matchList:Array = new Array();
   172          var m:Matcher = getMatcher(input);
   173  
   174          // Add segments before each match found
   175          while (m.find()) {
   176              if (!matchLimited || matchList.length < limit - 1) {
   177                  var match:String = input.substring(index, m.getStartIndex());
   178                  matchList.push(match);
   179                  index = m.getEndIndex();
   180              } else if (matchList.length == limit - 1) { // last one
   181                  var match:String = input.substring(index, input.length);
   182                  matchList.push(match);
   183                  index = m.getEndIndex();
   184              }
   185          }
   186  
   187          // If no match was found, return this
   188          if (index == 0) return [input.toString()];
   189  
   190          // Add remaining segment
   191          if (!matchLimited || matchList.length < limit) {
   192              matchList.push(input.substring(index, input.length));
   193          }
   194  
   195          // Construct result
   196          var resultSize:Number = matchList.length;
   197          if (limit == 0) {
   198              while (resultSize > 0 && matchList[resultSize-1].equals("")) {
   199                  resultSize--;
   200              }
   201          }
   202          return matchList.slice(0, resultSize);
   203      }
   204  
   205      private function compile(Void):Void {
   206          // Copy pattern to char array for convenience
   207          patternLength = pattern.length;
   208          temp = new Array(patternLength + 2);
   209  
   210          // Use double null characters to terminate pattern
   211          temp = toCharCodeArray(pattern);
   212          temp[patternLength] = 0;
   213          temp[patternLength + 1] = 0;
   214  		
   215          // Allocate all temporary objects here.
   216          buffer = new Array(32);
   217          groupNodes = new Array(10);
   218          
   219          // Start recursive decedent parsing
   220          matchRoot = parseExpression(LASTACCEPT);
   221  
   222          // Check extra pattern characters
   223          if (patternLength != cursor) {
   224              if (peekChar() == ord(")")) {
   225                  throwError("Unmatched closing ')'", arguments);
   226              } else {
   227                  throwError("Unexpected internal error", arguments);
   228              }
   229          }
   230  
   231          // Peephole optimization
   232          if (matchRoot instanceof Slice) {
   233              root = BnM.optimize(matchRoot);
   234              if (root == matchRoot) {
   235                  root = new Start(matchRoot);
   236              }
   237          } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
   238              root = matchRoot;
   239          } else {
   240              root = new Start(matchRoot);
   241          }
   242  
   243          // Release temporary storage
   244          temp = null;
   245          buffer = null;
   246          groupNodes = null;
   247          patternLength = 0;
   248      }
   249  
   250      private static function printObjectTree(node:Node):Void {
   251          while (node != null) {
   252              if (node instanceof Prolog) {
   253  	            trace(node);
   254                  printObjectTree((Prolog(node)).getLoop());
   255                  trace("**** end contents prolog loop");
   256              } else if (node instanceof Loop) {
   257                  trace(node);
   258                  printObjectTree((Loop(node)).getBody());
   259                  trace("**** end contents Loop body");
   260              } else if (node instanceof Curly) {
   261                  trace(node);
   262                  printObjectTree((Curly(node)).getAtom());
   263                  trace("**** end contents Curly body");
   264              } else if (node instanceof GroupTail) {
   265                  trace(node);
   266                  trace("Tail next is "+node.getNext());
   267                  return;
   268              } else {
   269                  trace(node);
   270              }      
   271              node = node.getNext();
   272              if (node != null) {
   273                  trace("->next:");
   274              }
   275              if (node == Pattern.ACCEPT) {
   276                  trace("Accept Node");
   277                  node = null;
   278              }
   279      	}
   280      }
   281  
   282      private function hasFlag(f:Number):Boolean {
   283          return (flags & f) > 0;
   284      }
   285  
   286      private function acceptChar(ch:Number, s:String):Void {
   287          var testChar:Number = temp[cursor++];
   288          if (hasFlag(COMMENTS)) {
   289              testChar = parsePastWhitespace(testChar);
   290      	}
   291          if (ch != testChar) {
   292             throwError(s, arguments);
   293          }
   294      }
   295  
   296      private function markChar(c:Number):Void {
   297          temp[patternLength] = c;
   298      }
   299  
   300      private function peekChar(Void):Number {
   301          var ch:Number = temp[cursor];
   302          if (hasFlag(COMMENTS)) {
   303              ch = peekPastWhitespace(ch);
   304          }
   305          return ch;
   306      }
   307  
   308      private function readChar(Void):Number {
   309          var ch:Number = temp[cursor++];
   310          if (hasFlag(COMMENTS)) {
   311              ch = parsePastWhitespace(ch);
   312          }
   313          return ch;
   314      }
   315  
   316      private function readEscapedChar(Void):Number {
   317          var ch:Number = temp[cursor++];
   318          return ch;
   319      }
   320  
   321      private function nextChar(Void):Number {
   322          var ch:Number = temp[++cursor];
   323          if (hasFlag(COMMENTS)) {
   324              ch = peekPastWhitespace(ch);
   325          }
   326          return ch;
   327      }
   328  
   329      private function nextEscapedChar(Void):Number {
   330          var ch:Number = temp[++cursor];
   331          return ch;
   332      }
   333  
   334      private function peekPastWhitespace(ch:Number):Number {
   335          while (AsciiUtil.isSpace(ch) || ch == ord("#")) {
   336              while (AsciiUtil.isSpace(ch)) {
   337                  ch = temp[++cursor];
   338              }
   339              if (ch == ord("#")) {
   340                  ch = peekPastLine();
   341              }
   342          }
   343          return ch;
   344      }
   345  
   346      private function parsePastWhitespace(ch:Number):Number {
   347          while (AsciiUtil.isSpace(ch) || ch == ord("#")) {
   348              while (AsciiUtil.isSpace(ch)) {
   349                  ch = temp[cursor++];
   350              }
   351              if (ch == ord("#")) {
   352                  ch = parsePastLine();
   353              }
   354          }
   355          return ch;
   356      }
   357  
   358      private function parsePastLine(Void):Number {
   359          var ch:Number = temp[cursor++];
   360          while (ch != 0 && !isLineSeparator(ch)) {
   361              ch = temp[cursor++];
   362          }
   363          return ch;
   364      }
   365  
   366      private function peekPastLine(Void):Number {
   367          var ch:Number = temp[++cursor];
   368          while (ch != 0 && !isLineSeparator(ch)) {
   369              ch = temp[++cursor];
   370          }
   371          return ch;
   372      }
   373  
   374      private function isLineSeparator(ch:Number):Boolean {
   375          if (hasFlag(UNIX_LINES)) {
   376              return ch == ord("\n");
   377          } else {
   378              return (ch == ord("\n") ||
   379                      ch == ord("r") ||
   380                      (ch|1) == ord("u2029") ||
   381                      ch == ord("u0085"));
   382          }
   383      }
   384  
   385      private function skipChar(Void):Number {
   386          var i:Number = cursor;
   387          var ch:Number = temp[i+1];
   388          cursor = i + 2;
   389          return ch;
   390      }
   391  
   392      private function unreadChar(Void):Void {
   393          cursor--;
   394      }
   395  
   396      private function throwError(desc:String, args:FunctionArguments):Void {
   397  		throw new PatternSyntaxException(desc, this, args);
   398      }
   399  
   400      private function parseExpression(end:Node):Node {
   401          var prev:Node = null;
   402          while (true) {
   403              var node:Node = parseSequence(end);
   404              if (prev == null) {
   405                  prev = node;
   406              } else {
   407                  prev = new Branch(prev, node);
   408              }
   409              if (peekChar() != ord("|")) {
   410                  return prev;
   411              }
   412              nextChar();
   413          }
   414      }
   415  
   416      private function parseSequence(end:Node):Node {
   417          var head:Node = null;
   418          var tail:Node = null;
   419          var node:Node = null;
   420          var i, j:Number;
   421          var ch :Number;
   422          while (true) {
   423              ch = peekChar();
   424              if (ch == ord("(")) {
   425                  // Because group handles its own closure,
   426                  // we need to treat it differently
   427                  node = parseGroup();
   428                  
   429                  // Check for comment or flag group
   430                  if (node == null) continue;
   431                  
   432                  if (head == null) {
   433                      head = node;
   434                  } else {
   435                      tail.setNext(node);
   436                  }
   437                  
   438                  // Double return:Tail was returned in root
   439                  tail = root;
   440                  continue;
   441              } else if (ch == ord("[")) {
   442                  node = parseClass(true);
   443              } else if (ch == ord("\\")) {
   444                  ch = nextEscapedChar();
   445                  if (ch == ord("p") || ch == ord("P")) {
   446                      var comp:Boolean = (ch == ord("P"));
   447                      var oneLetter:Boolean = true;
   448                      ch = nextChar(); 
   449                      // Consume figure bracket if present
   450                      if (ch != 0x7B) {
   451                          unreadChar();
   452                      } else {
   453                          oneLetter = false;
   454                      }
   455                      node = parseFamily(comp, oneLetter);
   456                  } else {
   457                      unreadChar();
   458                      node = parseAtom();
   459                  }
   460              }
   461              else if (ch == ord("^")) {
   462                  nextChar();
   463                  if (hasFlag(MULTILINE)) {
   464                      if (hasFlag(UNIX_LINES)) {
   465                          node = new UnixCaret();
   466                      } else {
   467                          node = new Caret();
   468                      }
   469                  } else {
   470                      node = new Begin();
   471                  }
   472              }
   473              else if (ch == ord("$")) {
   474                  nextChar();
   475                  if (hasFlag(UNIX_LINES)) {
   476                      node = new UnixDollar(hasFlag(MULTILINE));
   477                  } else {
   478                      node = new Dollar(hasFlag(MULTILINE));
   479                  }
   480              }
   481              else if (ch == ord(".")) {
   482                  nextChar();
   483                  if (hasFlag(DOTALL)) {
   484                      node = new All();
   485                  } else {
   486                      if (hasFlag(UNIX_LINES)) {
   487                          node = new UnixDot();
   488                      } else {
   489                          node = new Dot();
   490                      }
   491                  }
   492              }
   493              else if (ch == ord("|") || ch == ord(")")) {
   494                  break;
   495              }
   496              // Now interpreting dangling square and figure brackets as literals
   497              else if (ch == ord("]") || ch == 0x7D) {
   498                  node = parseAtom();
   499              }
   500              else if (ch == ord("?") || ch == ord("*") || ch == ord("+")) {
   501                  nextChar();
   502                  throwError("Dangling meta character '" + chr(ch) + "'", arguments);
   503              }
   504              else { 
   505  	            if (ch == 0) {
   506  	                if (cursor >= patternLength) {
   507  	                    break;
   508  	                }
   509  	            }
   510                  node = parseAtom();
   511              }
   512  
   513              node = parseClosure(node);
   514  
   515              if (head == null) {
   516                  head = tail = node;
   517              } else {
   518                  tail.setNext(node);
   519                  tail = node;
   520              }
   521          }
   522          if (head == null) {
   523              return end;
   524          }
   525          tail.setNext(end);
   526          return head;
   527      }
   528  
   529      private function parseAtom(Void):Node {
   530          var first:Number = 0;
   531          var prev:Number = -1;
   532          var ch:Number = peekChar();
   533          while (true) {
   534          	
   535          	// check for "*", "+", "?", open figure bracker 
   536          	if (ch == 0x2A ||
   537          		ch == 0x2B ||
   538          		ch == 0x3F ||
   539          		ch == 0x7B ) 
   540      		{
   541                  if (first > 1) {
   542                  	// Unwind one character
   543                      cursor = prev;    
   544                      first--;
   545                  }
   546              // check for "$", ".", "^", "(", "[", "|", ")"
   547          	} else if (ch == 0x24 ||
   548          			 ch == 0x2E ||
   549          			 ch == 0x5E ||
   550          			 ch == 0x28 ||
   551          			 ch == 0x5B ||
   552          			 ch == 0x7C ||
   553          			 ch == 0x29	)
   554          	{
   555                  break;
   556              // check for "\"
   557          	} else if (ch == 0x5C) { 
   558                  ch = nextEscapedChar();
   559                  // "P" or "p"
   560                  if (ch == 0x70 || ch == 0x50) { 
   561                  	// Property
   562                      if (first > 0) { 
   563                      	// Slice is waiting; handle it first
   564                          unreadChar();
   565                      } else { // No slice; just return the family node
   566  	                    // "P" or "p"
   567                          if (ch == 0x70 || ch == 0x50) {
   568                              var comp:Boolean = (ch == 0x50);
   569                              var oneLetter:Boolean = true;
   570                              ch = nextChar(); 
   571                              // Consume open figure bracket if present
   572                              if (ch != 0x7B) {
   573                                  unreadChar();
   574                              } else {
   575                                  oneLetter = false;
   576                              }
   577                              return parseFamily(comp, oneLetter);
   578                          }
   579                      }
   580                  }
   581                  else {
   582  	                unreadChar();
   583  	                prev = cursor;
   584  	                ch = parseEscape(false, first == 0);
   585  	                if (ch != null) {
   586  	                    appendChar(ch, first);
   587  	                    first++;
   588  	                    ch = peekChar();
   589  	                    continue;
   590  	                } else if (first == 0) {
   591  	                    return root;
   592  	                }
   593  	                // Unwind meta escape sequence
   594  	                cursor = prev;
   595                  }
   596          	} else {
   597          		if (ch == 0) {
   598  	                if (cursor >= patternLength) {
   599  	                    break;
   600  	                }
   601          		}
   602                  prev = cursor;
   603                  appendChar(ch, first);
   604                  first++;
   605                  ch = nextChar();
   606                  continue;
   607              }
   608              break;
   609          }
   610          if (first == 1) {
   611              return ceateSingle(buffer[0]);
   612          } else {
   613              return createSlice(buffer, first);
   614          }
   615      }
   616  
   617      private function appendChar(ch:Number, len:Number):Void {
   618          buffer[len] = ch;
   619      }
   620  
   621     private function parseBackRef(refNum:Number):Node {
   622          var done:Boolean = false;
   623          while (!done) {
   624              var ch:Number = peekChar();
   625              switch(ch) {
   626                  case ord("0"):
   627                  case ord("1"):
   628                  case ord("2"):
   629                  case ord("3"):
   630                  case ord("4"):
   631                  case ord("5"):
   632                  case ord("6"):
   633                  case ord("7"):
   634                  case ord("8"):
   635                  case ord("9"):
   636                      var newRefNum:Number = (refNum * 10) + (ch - ord("0"));
   637                      // Add another number if it doesn't make a group
   638                      // that doesn't exist
   639                      if (groupCount - 1 < newRefNum) {
   640                          done = true;
   641                          break;
   642                      }
   643                      refNum = newRefNum;
   644                      readChar();
   645                      break;
   646                  default:
   647                      done = true;
   648                      break;
   649              }
   650          }
   651          if (hasFlag(CASE_INSENSITIVE)) {
   652              return new BackRefA(refNum);
   653          } else {
   654              return new BackRef(refNum);
   655          }
   656      }
   657  
   658      private function parseEscape(inclass:Boolean, create:Boolean):Number {
   659          var ch:Number = skipChar();
   660          switch (ch) {
   661              case ord("0"):
   662                  return parseOctal();
   663              case ord("1"):
   664              case ord("2"):
   665              case ord("3"):
   666              case ord("4"):
   667              case ord("5"):
   668              case ord("6"):
   669              case ord("7"):
   670              case ord("8"):
   671              case ord("9"):
   672                  if (inclass) break;
   673                  if (groupCount < (ch - ord("0"))) {
   674                      throwError("No such group yet exists at this point in the pattern", arguments);
   675                  }
   676                  if (create) {
   677                      root = parseBackRef(ch - ord("0"));
   678                  }
   679                  return null;
   680              case ord("A"):
   681                  if (inclass) break;
   682                  if (create) root = new Begin();
   683                  return null;
   684              case ord("B"):
   685                  if (inclass) break;
   686                  if (create) root = new Bound(Bound.NONE);
   687                  return null;
   688              case ord("C"):
   689                  break;
   690              case ord("D"):
   691                  if (create) root = new NotPosix(AsciiUtil.DIGIT);
   692                  return null;
   693              case ord("E"):
   694              case ord("F"):
   695                  break;
   696              case ord("G"):
   697                  if (inclass) break;
   698                  if (create) root = new LastMatch();
   699                  return null;
   700              case ord("H"):
   701              case ord("I"):
   702              case ord("J"):
   703              case ord("K"):
   704              case ord("L"):
   705              case ord("M"):
   706              case ord("N"):
   707              case ord("O"):
   708              case ord("P"):
   709                  break;
   710              case ord("Q"):
   711                  if (create) {
   712                      // Disable metacharacters. We will return a slice
   713                      // up to the next \E
   714                      var i:Number = cursor;
   715                      var c:Number;
   716                      while ((c = readEscapedChar()) != 0) {
   717                          if (c == ord("\\")) {
   718                              c = readEscapedChar();
   719                              if (c == ord("E") || c == 0) break;
   720                          }
   721                      }
   722                      var j:Number = cursor-1;
   723                      if (c == ord("E")) {
   724                          j--;
   725                      } else {
   726                          unreadChar();
   727                      }
   728                      for (var x = i; x < j; x++) {
   729                          appendChar(temp[x], x-i);
   730                      }
   731                      root = createSlice(buffer, j-i);
   732                  }
   733                  return null;
   734              case ord("R"):
   735                  break;
   736              case ord("S"):
   737                  if (create) root = new NotPosix(AsciiUtil.SPACE);
   738                  return null;
   739              case ord("T"):
   740              case ord("U"):
   741              case ord("V"):
   742                  break;
   743              case ord("W"):
   744                  if (create) root = new NotPosix(AsciiUtil.WORD);
   745                  return null;
   746              case ord("X"):
   747              case ord("Y"):
   748                  break;
   749              case ord("Z"):
   750                  if (inclass) break;
   751                  if (create) {
   752                      if (hasFlag(UNIX_LINES)) {
   753                          root = new UnixDollar(false);
   754                      } else {
   755                          root = new Dollar(false);
   756                  	}
   757                  }
   758                  return null;
   759              case ord("a"):
   760                  return Number(ord("007"));
   761              case ord("b"):
   762                  if (inclass) break;
   763                  if (create) root = new Bound(Bound.BOTH);
   764                  return null;
   765              case ord("c"):
   766                  return parseControl();
   767              case ord("d"):
   768                  if (create) root = new Posix(AsciiUtil.DIGIT);
   769                  return null;
   770              case ord("e"):
   771                  return Number(ord("033"));
   772              case ord("f"):
   773                  return Number(ord("f"));
   774              case ord("g"):
   775              case ord("h"):
   776              case ord("i"):
   777              case ord("j"):
   778              case ord("k"):
   779              case ord("l"):
   780              case ord("m"):
   781                  break;
   782              case ord("n"):
   783                  return Number(ord("\n"));
   784              case ord("o"):
   785              case ord("p"):
   786              case ord("q"):
   787                  break;
   788              case ord("r"):
   789                  return Number(ord("r"));
   790              case ord("s"):
   791                  if (create) root = new Posix(AsciiUtil.SPACE);
   792                  return null;
   793              case ord("t"):
   794                  return Number(ord("\t"));
   795              case ord("u"):
   796                  return parseUnicode();
   797              case ord("v"):
   798                  return Number(ord("013"));
   799              case ord("w"):
   800                  if (create) root = new Posix(AsciiUtil.WORD);
   801                  return null;
   802              case ord("x"):
   803                  return parseHexal();
   804              case ord("y"):
   805                  break;
   806              case ord("z"):
   807                  if (inclass) break;
   808                  if (create) root = new End();
   809                  return null;
   810              default:
   811                  return ch;
   812          }
   813          throwError("Illegal/unsupported escape squence", arguments);
   814          return null;
   815      }
   816  
   817     private function parseClass(consume:Boolean):Node {
   818          var prev:Node = null;
   819          var node:Node = null;
   820          var bits:BitClass = new BitClass(false);
   821          var include:Boolean = true;
   822          var firstInClass:Boolean = true;
   823          var ch:Number = nextChar();
   824          while (true) {
   825              switch (ch) {
   826                  case ord("^"):
   827                      // Negates if first char in a class, otherwise literal
   828                      if (firstInClass) {
   829                          if (temp[cursor-1] != ord("[")) break;
   830                          ch = nextChar();
   831                          include = !include;
   832                          continue;
   833                      } else {
   834                          // ^ not first in class, treat as literal
   835                          break;
   836                      }
   837                  case ord("["):
   838                      firstInClass = false;
   839                      node = parseClass(true);
   840                      if (prev == null) {
   841                          prev = node;
   842                      } else {
   843                          prev = new Add(prev, node);
   844                      }
   845                      ch = peekChar();
   846                      continue;
   847                  case ord("&"):
   848                      firstInClass = false;
   849                      ch = nextChar();
   850                      if (ch == ord("&")) {
   851                          ch = nextChar();
   852                          var rightNode:Node = null;
   853                          while (ch != ord("]") && ch != ord("&")) {
   854                              if (ch == ord("[")) {
   855                                  if (rightNode == null) {
   856                                      rightNode = parseClass(true);
   857                                  } else {
   858                                      rightNode = new Add(rightNode, parseClass(true));
   859                                  }
   860                              } else { // abc&&def
   861                                  unreadChar();
   862                                  rightNode = parseClass(false);
   863                              }
   864                              ch = peekChar();
   865                          }
   866                          if (rightNode != null) node = rightNode;
   867                          if (prev == null) {
   868                              if (rightNode == null) {
   869                                  throwError("Bad class syntax", arguments);
   870                              } else {
   871                                  prev = rightNode;
   872                              }
   873                          } else {
   874                              prev = new Both(prev, node);
   875                          }
   876                      } else {
   877                          // treat as a literal &
   878                          unreadChar();
   879                          break;
   880                      }
   881                      continue;
   882                  case 0:
   883                      firstInClass = false;
   884                      if (cursor >= patternLength) {
   885                          throwError("Unclosed character class", arguments);
   886                      }
   887                      break;
   888                  case ord("]"):
   889                      firstInClass = false;
   890                      if (prev != null) {
   891                          if (consume) nextChar();
   892                          return prev;
   893                      }
   894                      break;
   895                  default:
   896                      firstInClass = false;
   897                      break;
   898              }
   899              node = parseRange(bits);
   900              if (include) {
   901                  if (prev == null) {
   902                      prev = node;
   903                  } else {
   904                      if (prev != node)
   905                      {
   906                          prev = new Add(prev, node);
   907                      }
   908                  }
   909              } else {
   910                  if (prev == null) {
   911                      prev = node.dup(true);  // Complement
   912                  } else {
   913                      if (prev != node) {
   914                          prev = new Sub(prev, node);
   915                      }
   916                  }
   917              }
   918              ch = peekChar();
   919          }
   920      }
   921  
   922      private function parseRange(bits:BitClass):Node {
   923          var ch:Number = peekChar();
   924          if (ch == ord("\\")) {
   925              ch = nextEscapedChar();
   926              if (ch == ord("p") || ch == ord("P")) { // A property
   927                  var comp:Boolean = (ch == ord("P"));
   928                  var oneLetter:Boolean = true;
   929                  // Consume fugure bracket if present
   930                  ch = nextChar();
   931                  if (ch == 0x7B) {
   932                      unreadChar();
   933                  } else {
   934                      oneLetter = false;
   935                  }
   936                  return parseFamily(comp, oneLetter);
   937              } else { // ordinary escape
   938                  unreadChar();
   939                  ch = parseEscape(true, true);
   940                  if (ch == null) return root;
   941              }
   942          } else {
   943              ch = parseSingle();
   944          }
   945          if (ch != null) {
   946              if (peekChar() == ord("-")) {
   947                  var endRange:Number = temp[cursor+1];
   948                  if (endRange == ord("[")) {
   949                      if (ch < 256) {
   950                          return bits.addChar(ch, getFlags());
   951                      }
   952                      return ceateSingle(ch);
   953                  }
   954                  if (endRange != ord("]")) {
   955                      nextChar();
   956                      var m:Number = parseSingle();
   957                      if (m < ch) {
   958                          throwError("Illegal character range", arguments);
   959                      }
   960                      if (hasFlag(CASE_INSENSITIVE)) {
   961                          return new RangeA((ch<<16)+m);
   962                      } else {
   963                          return new Range((ch<<16)+m);
   964                      }
   965                  }
   966              }
   967              if (ch < 256) {
   968                  return bits.addChar(ch, getFlags());
   969              }
   970              return ceateSingle(ch);
   971          }
   972          throwError("Unexpected character '"+chr(ch)+"'", arguments);
   973      }
   974  
   975      private function parseSingle(Void):Number {
   976          var ch:Number = peekChar();
   977          switch (ch) {
   978  	        case ord("\\"):
   979  	            return parseEscape(true, false);
   980  	        default:
   981  	            nextChar();
   982  	            return ch;
   983          }
   984      }
   985  
   986      private function parseFamily(flag:Boolean, singleLetter:Boolean):Node {
   987      	throwError("Families dosn't supported in the current Pattern's implementation", arguments);
   988      	return null;
   989      }
   990  
   991      private function parseGroup(Void):Node {
   992          var head:Node = null;
   993          var tail:Node = null;
   994          var save:Number = flags;
   995          root = null;
   996          var ch:Number = nextChar();
   997          if (ch == ord("?")) {
   998              ch = skipChar();
   999              switch (ch) {
  1000              case ord(":"):  //  (?:xxx) pure group
  1001                  head = createGroup(true);
  1002                  tail = root;
  1003                  head.setNext(parseExpression(tail));
  1004                  break;
  1005              case ord("="):  // (?=xxx) and (?!xxx) lookahead
  1006              case ord("!"):
  1007                  head = createGroup(true);
  1008                  tail = root;
  1009                  head.setNext(parseExpression(tail));
  1010                  if (ch == ord("=")) {
  1011                      head = tail = new Pos(head);
  1012                  } else {
  1013                      head = tail = new Neg(head);
  1014                  }
  1015                  break;
  1016              case ord(">"):  // (?>xxx)  independent group
  1017                  head = createGroup(true);
  1018                  tail = root;
  1019                  head.setNext(parseExpression(tail));
  1020                  head = tail = new Ques(head, INDEPENDENT);
  1021                  break;
  1022              case ord("<"):  // (?<xxx)  look behind
  1023                  ch = readChar();
  1024                  head = createGroup(true);
  1025                  tail = root;
  1026                  head.setNext(parseExpression(tail));
  1027                  var info:TreeInfo = new TreeInfo();
  1028                  head.study(info);
  1029                  if (info.maxValid == false) {
  1030                      throwError("Look-behind group does not have "
  1031                                   + "an obvious maximum length", arguments);
  1032                  }
  1033                  if (ch == ord("=")) {
  1034                      head = tail = new Behind(head, info.maxLength, info.minLength);
  1035                  } else if (ch == ord("!")) {
  1036                      head = tail = new NotBehind(head, info.maxLength, info.minLength);
  1037                  } else {
  1038                      throwError("Unknown look-behind group", arguments);
  1039                  }
  1040                  break;
  1041              case ord("1"):case ord("2"):case ord("3"):case ord("4"):case ord("5"):
  1042              case ord("6"):case ord("7"):case ord("8"):case ord("9"):
  1043                  if (groupNodes[ch - ord("0")] != null) {
  1044                      head = tail = new GroupRef(groupNodes[ch - ord("0")]);
  1045                      break;
  1046                  }
  1047                  throwError("Unknown group reference", arguments);
  1048              case ord("$"):
  1049              case ord("@"):
  1050  				throwError("Unknown group type", arguments);
  1051              default:   // (?xxx:) inlined match flags
  1052                  unreadChar();
  1053                  addFlag();
  1054                  ch = readChar();
  1055                  if (ch == ord(")")) {
  1056                      return null;    // Inline modifier only
  1057                  }
  1058                  if (ch != ord(":")) {
  1059                      throwError("Unknown inline modifier", arguments);
  1060                  }
  1061                  head = createGroup(true);
  1062                  tail = root;
  1063                  head.setNext(parseExpression(tail));
  1064                  break;
  1065              }
  1066          } else { // (xxx) a regular group
  1067              head = createGroup(false);
  1068              tail = root;
  1069              head.setNext(parseExpression(tail));
  1070          }
  1071  
  1072          acceptChar(Number(ord(")")), "Unclosed group");
  1073          flags = save;
  1074  
  1075          // Check for quantifiers
  1076          var node:Node = parseClosure(head);
  1077          if (node == head) { // No closure
  1078              root = tail;
  1079              return node;    // Dual return
  1080          }
  1081          if (head == tail) { // Zero length assertion
  1082              root = node;
  1083              return node;    // Dual return
  1084          }
  1085  
  1086          if (node instanceof Ques) {
  1087              var ques:Ques = Ques(node);
  1088              if (ques.getType() == POSSESSIVE) {
  1089                  root = node;
  1090                  return node;
  1091              }
  1092              // Dummy node to connect branch
  1093              tail.setNext(new Dummy());
  1094              tail = tail.getNext();
  1095              if (ques.getType() == GREEDY) {
  1096                  head = new Branch(head, tail);
  1097              } else { 
  1098              	// Reluctant quantifier
  1099                  head = new Branch(tail, head);
  1100              }
  1101              root = tail;
  1102              return head;
  1103          } else if (node instanceof Curly) {
  1104              var curly:Curly = Curly(node);
  1105              if (curly.getType() == POSSESSIVE) {
  1106                  root = node;
  1107                  return node;
  1108              }
  1109              // Discover if the group is deterministic
  1110              var info:TreeInfo = new TreeInfo();
  1111              if (head.study(info)) { 
  1112              	// Deterministic
  1113                  var temp:GroupTail = GroupTail(tail);
  1114                  head = root = new GroupCurly(head.getNext(), curly.getCmin(),
  1115                                     curly.getCmax(), curly.getType(),
  1116                                     (GroupTail(tail)).getLocalIndex(),
  1117                                     (GroupTail(tail)).getGroupIndex());
  1118                  return head;
  1119              } else { 
  1120              	// Non-deterministic
  1121                  var tmp:Number = (GroupHead(head)).getLocalIndex();
  1122                  var loop:Loop;
  1123                  if (curly.getType() == GREEDY) {
  1124                      loop = new Loop(this.localCount, tmp);
  1125                  } else { 
  1126                  	// Reluctant Curly
  1127                      loop = new LazyLoop(this.localCount, tmp);
  1128                  }
  1129                  var prolog:Prolog = new Prolog(loop);
  1130                  this.localCount += 1;
  1131                  loop.setCmin(curly.getCmin());
  1132                  loop.setCmax(curly.getCmax());
  1133                  loop.setBody(head);
  1134                  tail.setNext(loop);
  1135                  root = loop;
  1136                  return prolog; // Dual return
  1137              }
  1138          } else if (node instanceof First) {
  1139              root = node;
  1140              return node;
  1141          }
  1142          throwError("Internal logic error", arguments);
  1143      }
  1144  
  1145      private function createGroup(anonymous:Boolean):Node {
  1146          var localIndex:Number = localCount++;
  1147          var groupIndex:Number = 0;
  1148          if (!anonymous) {
  1149              groupIndex = groupCount++;
  1150          }
  1151          var head:GroupHead = new GroupHead(localIndex);
  1152          root = new GroupTail(localIndex, groupIndex);
  1153          if (!anonymous && groupIndex < 10) {
  1154              groupNodes[groupIndex] = head;
  1155          }
  1156          return head;
  1157      }
  1158  
  1159      private function addFlag(Void):Void {
  1160          var ch:Number = peekChar();
  1161          while (true) {
  1162              switch (ch) {
  1163              case ord("i"):
  1164                  flags |= CASE_INSENSITIVE;
  1165                  break;
  1166              case ord("m"):
  1167                  flags |= MULTILINE;
  1168                  break;
  1169              case ord("s"):
  1170                  flags |= DOTALL;
  1171                  break;
  1172              case ord("d"):
  1173                  flags |= UNIX_LINES;
  1174                  break;
  1175              case ord("u"):
  1176                  flags |= UNICODE_CASE;
  1177                  break;
  1178              case ord("x"):
  1179                  flags |= COMMENTS;
  1180                  break;
  1181              case ord("-"): // subFlag then fall through
  1182                  ch = nextChar();
  1183                  subFlag();
  1184              default:
  1185                  return;
  1186              }
  1187              ch = nextChar();
  1188          }
  1189      }
  1190  
  1191      private function subFlag(Void):Void {
  1192          var ch:Number = peekChar();
  1193          while (true) {
  1194              switch (ch) {
  1195              case ord("i"):
  1196                  flags &= ~CASE_INSENSITIVE;
  1197                  break;
  1198              case ord("m"):
  1199                  flags &= ~MULTILINE;
  1200                  break;
  1201              case ord("s"):
  1202                  flags &= ~DOTALL;
  1203                  break;
  1204              case ord("d"):
  1205                  flags &= ~UNIX_LINES;
  1206                  break;
  1207              case ord("u"):
  1208                  flags &= ~UNICODE_CASE;
  1209                  break;
  1210              case ord("x"):
  1211                  flags &= ~COMMENTS;
  1212                  break;
  1213              default:
  1214                  return;
  1215              }
  1216              ch = nextChar();
  1217          }
  1218      }
  1219  
  1220      private static var MAX_REPS:Number	= 0x7FFFFFFF;
  1221  
  1222      public static var GREEDY:Number			= 0;
  1223      public static var LAZY:Number			= 1;
  1224      public static var POSSESSIVE:Number 	= 2;
  1225      public static var INDEPENDENT:Number 	= 3;
  1226  
  1227  
  1228      private function parseClosure(prev:Node):Node {
  1229          var atom:Node;
  1230          var ch:Number = peekChar();
  1231          switch (ch) {
  1232          case ord("?"):
  1233              ch = nextChar();
  1234              if (ch == ord("?")) {
  1235                  nextChar();
  1236                  return new Ques(prev, LAZY);
  1237              } else if (ch == ord("+")) {
  1238                  nextChar();
  1239                  return new Ques(prev, POSSESSIVE);
  1240              }
  1241              return new Ques(prev, GREEDY);
  1242          case ord("*"):
  1243              ch = nextChar();
  1244              if (ch == ord("?")) {
  1245                  nextChar();
  1246                  return new Curly(prev, 0, MAX_REPS, LAZY);
  1247              } else if (ch == ord("+")) {
  1248                  nextChar();
  1249                  return new Curly(prev, 0, MAX_REPS, POSSESSIVE);
  1250              }
  1251              return new Curly(prev, 0, MAX_REPS, GREEDY);
  1252          case ord("+"):
  1253              ch = nextChar();
  1254              if (ch == ord("?")) {
  1255                  nextChar();
  1256                  return new Curly(prev, 1, MAX_REPS, LAZY);
  1257              } else if (ch == ord("+")) {
  1258                  nextChar();
  1259                  return new Curly(prev, 1, MAX_REPS, POSSESSIVE);
  1260              }
  1261              return new Curly(prev, 1, MAX_REPS, GREEDY);
  1262          case 0x7B:
  1263              ch = temp[cursor+1];
  1264              if (AsciiUtil.isDigit(ch)) {
  1265                  skipChar();
  1266                  var cmin:Number = 0;
  1267                  do {
  1268                      cmin = cmin * 10 + (ch - ord("0"));
  1269                  } while (AsciiUtil.isDigit((ch = readChar())));
  1270                  var cmax:Number = cmin;
  1271                  if (ch == ord(",")) {
  1272                      ch = readChar();
  1273                      cmax = MAX_REPS;
  1274                      if (ch != 0x7D) {
  1275                          cmax = 0;
  1276                          while (AsciiUtil.isDigit(ch)) {
  1277                              cmax = cmax * 10 + (ch - ord("0"));
  1278                              ch = readChar();
  1279                          }
  1280                      }
  1281                  }
  1282                  if (ch != 0x7D) {
  1283                      throwError("Unclosed counted closure", arguments);
  1284                  }
  1285                  if (((cmin) | (cmax) | (cmax - cmin)) < 0) {
  1286                      throwError("Illegal repetition range", arguments);
  1287                  }
  1288                  var curly:Curly;
  1289                  ch = peekChar();
  1290                  if (ch == ord("?")) {
  1291                      nextChar();
  1292                      curly = new Curly(prev, cmin, cmax, LAZY);
  1293                  } else if (ch == ord("+")) {
  1294                      nextChar();
  1295                      curly = new Curly(prev, cmin, cmax, POSSESSIVE);
  1296                  } else {
  1297                      curly = new Curly(prev, cmin, cmax, GREEDY);
  1298                  }
  1299                  return curly;
  1300              } else {
  1301                  throwError("Illegal repetition", arguments);
  1302              }
  1303              return prev;
  1304          default:
  1305              return prev;
  1306          }
  1307      }
  1308  
  1309      private function parseControl(Void):Number {
  1310          if (cursor < patternLength) {
  1311              return (readChar() ^ 64);
  1312          }
  1313          throwError("Illegal control escape sequence", arguments);
  1314          return null;
  1315      }
  1316  
  1317      /**
  1318       *  Utility method for parsing octal escape sequences.
  1319       */
  1320      private function parseOctal(Void):Number {
  1321          var n:Number = readChar();
  1322          if (((n - ord("0")) | (ord("7") - n)) >= 0) {
  1323              var m:Number = readChar();
  1324              if (((m - ord("0")) | (ord("7") - m)) >= 0) {
  1325                  var o:Number = readChar();
  1326                  if ((((o - ord("0")) | (ord("7") - o)) >= 0) && (((n - ord("0")) | (ord("3") - n)) >= 0)) {
  1327                      return (n - ord("0")) * 64 + (m - ord("0")) * 8 + (o - ord("0"));
  1328                  }
  1329                  unreadChar();
  1330                  return (n - ord("0")) * 8 + (m - ord("0"));
  1331              }
  1332              unreadChar();
  1333              return (n - ord("0"));
  1334          }
  1335          throwError("Illegal octal escape sequence", arguments);
  1336          return null;
  1337      }
  1338  
  1339      private function parseHexal(Void):Number {
  1340          var n:Number = readChar();
  1341          if (AsciiUtil.isHexDigit(n)) {
  1342              var m:Number = readChar();
  1343              if (AsciiUtil.isHexDigit(m)) {
  1344                  return AsciiUtil.toDigit(n) * 16 + AsciiUtil.toDigit(m);
  1345              }
  1346          }
  1347          throwError("Illegal hexadecimal escape sequence", arguments);
  1348          return null;
  1349      }
  1350  
  1351      private function parseUnicode(Void):Number {
  1352          var n:Number = 0;
  1353          for (var i = 0; i < 4; i++) {
  1354              var ch:Number = readChar();
  1355              if (!AsciiUtil.isHexDigit(ch)) {
  1356                  throwError("Illegal Unicode escape sequence", arguments);
  1357              }
  1358              n = n * 16 + AsciiUtil.toDigit(ch);
  1359          }
  1360          return n;
  1361      }
  1362  
  1363      private function ceateSingle(ch:Number):Node {
  1364          var f:Number = flags;
  1365          if ((f & CASE_INSENSITIVE) == 0) {
  1366              return new Single(ch);
  1367          }
  1368          if ((f & UNICODE_CASE) == 0) {
  1369              return new SingleA(ch);
  1370          }
  1371          return new SingleU(ch);
  1372      }
  1373  
  1374      private function createSlice(buf:Array, count:Number, hasSupplementary:Boolean):Node {
  1375          var tmp:Array = new Array(count); 
  1376          var i:Number = flags;
  1377          if ((i & CASE_INSENSITIVE) == 0) {
  1378              for (i = 0; i < count; i++) {
  1379                  tmp[i] = buf[i];
  1380              }
  1381              return new Slice(tmp);
  1382          } else if ((i & UNICODE_CASE) == 0) {
  1383              for (i = 0; i < count; i++) {
  1384                  tmp[i] = AsciiUtil.toLower(buf[i]);
  1385              }
  1386              return new SliceA(tmp);
  1387          } else {
  1388              for (i = 0; i < count; i++) {
  1389                  var c:Number = buf[i];
  1390                  c = AsciiUtil.toLower(AsciiUtil.toUpper(c));
  1391                  tmp[i] = c;
  1392              }
  1393              return new SliceU(tmp);
  1394          }
  1395      }
  1396  
  1397      static var ACCEPT:Node 		= new Node();
  1398      static var LASTACCEPT:Node 	= new LastNode();
  1399  
  1400  
  1401  	private function toCharCodeArray(source:String):Array {
  1402  		var charCodeArray:Array = new Array(source.length);
  1403  		for (var i = 0; i < source.length; i++) {
  1404  			charCodeArray[i] = source.charCodeAt(i);
  1405  		}	
  1406  		return charCodeArray;
  1407  	}
  1408  	
  1409  	private function fromCharCodeArray(source:Array):String {
  1410  		var str:String = new String();
  1411  		for (var i = 0; i < source.length; i++) {
  1412  			str += String.fromCharCode(source[i]);
  1413  		}	
  1414  		return str;
  1415  	}
  1416  
  1417  }
  1418