1  /*
     2   * Copyright the original author or authors.
     3   * 
     4   * Licensed under the MOZILLA PUBLIC LICENSE, Version 1.1 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   * 
     8   *      http://www.mozilla.org/MPL/MPL-1.1.html
     9   * 
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  import org.as2lib.regexp.Pattern;
    18  import org.as2lib.core.BasicClass;
    19  import org.as2lib.env.except.Exception;
    20  
    21  /**
    22   * {@code Matcher} provides implementations of the match, search and 
    23   * replace RegExp routines.
    24   * 
    25   * @author Igor Sadovskiy
    26   * @see org.as2lib.regexp.Pattern
    27   * @see org.as2lib.regexp.PosixPattern
    28   */
    29  
    30  class org.as2lib.regexp.Matcher extends BasicClass {
    31  	
    32      /**
    33       * The Pattern object that created this Matcher.
    34       */
    35      private var parentPattern:Pattern;
    36  
    37      /**
    38       * The storage used by groups. They may contain invalid values if
    39       * a group was skipped during the matching.
    40       */
    41      private var groups:Array;
    42  
    43      /**
    44       * The range within the string that is to be matched.
    45       */
    46      private var from, to:Number;
    47  
    48      /**
    49       * The original string being matched.
    50       */
    51      private var text:String;
    52  
    53      /**
    54       * Matcher state used by the last node. NOANCHOR is used when a
    55       * match does not have to consume all of the input. ENDANCHOR is
    56       * the mode used for matching all the input.
    57       */
    58      public static var ENDANCHOR:Number = 1;
    59      public static var NOANCHOR:Number  = 0;
    60      
    61      private var acceptMode:Number = NOANCHOR;
    62  
    63      /**
    64       * The range of string that last matched the pattern.
    65       */
    66      private var first, last:Number;
    67  
    68      /**
    69       * The end index of what matched in the last match operation.
    70       */
    71      private var oldLast:Number;
    72  
    73      /**
    74       * The index of the last position appended in a substitution.
    75       */
    76      private var lastAppendPosition:Number;
    77  
    78      /**
    79       * Storage used by nodes to tell what repetition they are on in
    80       * a pattern, and where groups begin. The nodes themselves are stateless,
    81       * so they rely on this field to hold state during a match.
    82       */
    83      private var locals:Array;
    84  
    85  
    86      public function Matcher(newParent:Pattern, newText:String) {
    87      	acceptMode = NOANCHOR;
    88      	first = -1;
    89      	last = -1;
    90      	oldLast = -1;
    91      	lastAppendPosition = 0;
    92      	
    93          parentPattern = newParent;
    94          text = newText;
    95  
    96          // Allocate state storage
    97          var parentGroupCount:Number = Math.max(newParent["groupCount"], 10);
    98          groups = new Array(parentGroupCount * 2);
    99          locals = new Array(newParent["localCount"]);
   100  
   101          // Put fields into initial states
   102          reset();
   103      }
   104  
   105      public function getPattern(Void):Pattern {
   106          return parentPattern;
   107      }
   108  
   109      public function reset(input:String):Matcher {
   110      	if (input != null) text = input;
   111      	
   112          first = -1;
   113          last = -1;
   114          oldLast = -1;
   115          for (var i = 0; i < groups.length; i++) {
   116              groups[i] = -1;
   117          }
   118          for (var i = 0; i < locals.length; i++) {
   119              locals[i] = -1;
   120          }
   121          lastAppendPosition = 0;
   122  		return this;
   123      }
   124  
   125      public function getStartIndex(group:Number):Number {
   126      	if (first < 0) {
   127              throw new Exception("No match available", this, arguments);
   128      	}
   129      	if (group != null) {
   130  	        if (group > getGroupCount()) {
   131  	            throw new Exception("No group " + group, this, arguments);
   132  	        }
   133  	        return groups[group * 2];
   134      	} else return first;
   135      }
   136  
   137      public function getEndIndex(group:Number):Number {
   138          if (first < 0) {
   139              throw new Exception("No match available", this, arguments);
   140  		}
   141          if (group != null) {
   142  	        if (group > getGroupCount()) {
   143  	            throw new Exception("No group " + group, this, arguments);
   144  	        }
   145  	        return groups[group * 2 + 1];
   146          } else return last;
   147      }
   148  
   149      public function getGroup(group:Number):String {
   150          if (first < 0) {
   151              throw new Exception("No match found", this, arguments);
   152          }
   153          if (group == null) group  = 0;
   154          if (group < 0 || group > getGroupCount()) {
   155              throw new Exception("No group " + group, this, arguments);
   156          }
   157          if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) {
   158              return null;
   159          }
   160          return getSubSequence(groups[group * 2], groups[group * 2 + 1]);
   161      }
   162  
   163      public function getGroupCount(Void):Number {
   164          return parentPattern["groupCount"] - 1;
   165      }
   166  
   167      public function matches(Void):Boolean {
   168          reset();
   169          return match(0, getTextLength(), ENDANCHOR);
   170      }
   171  
   172      public function find(newFrom:Number, newTo:Number):Boolean {
   173      	if (newFrom == null && newTo == null) {
   174  	        if (last == first) {
   175  	           last++;
   176  	        }
   177  	        if (last > to) {
   178  	            for (var i = 0; i < groups.length; i++) {
   179  	                groups[i] = -1;
   180  	            }
   181  	            return false;
   182  	        }
   183  			newFrom = last;
   184  			newTo = getTextLength();
   185      	} else if (from != null && to == null) {
   186  	        newTo = getTextLength();
   187  	        reset();
   188      	}
   189      	
   190          from   	= newFrom < 0 ? 0 : newFrom;
   191          to     	= newTo;
   192          first  	= from;
   193          last   	= -1;
   194          oldLast = oldLast < 0 ? from : oldLast;
   195          for (var i = 0; i < groups.length; i++) {
   196          	groups[i] = -1;
   197          }
   198          acceptMode = NOANCHOR;
   199  
   200          var result:Boolean = parentPattern["root"].match(this, from, text);
   201          if (!result) first = -1;
   202          oldLast = last;
   203          return result;
   204      }
   205  
   206      public function lookingAt(Void):Boolean {
   207          reset();
   208          return match(0, getTextLength(), NOANCHOR);
   209      }
   210  
   211      public function appendReplacement(source:String, replacement:String):String {
   212  
   213          // If no match, return error
   214          if (first < 0) {
   215              throw new Exception("No match available", this, arguments);
   216          }
   217  
   218          // Process substitution string to replace group references with groups
   219          var cursor:Number = 0;
   220          var s:String = replacement;
   221          var result:String = new String();
   222  
   223          while (cursor < replacement.length) {
   224              var nextChar:Number = replacement.charCodeAt(cursor);
   225              if (nextChar == 0x5C) { // check for "\"
   226                  cursor++;
   227                  nextChar = replacement.charCodeAt(cursor);
   228                  result += chr(nextChar);
   229                  cursor++;
   230              } else if (nextChar == 0x24) { // check for "$"
   231                  // Skip past $
   232                  cursor++;
   233  
   234                  // The first number is always a group
   235                  var refNum:Number = replacement.charCodeAt(cursor) - 0x30;
   236                  if ((refNum < 0)||(refNum > 9)) {
   237                      throw new Exception("Illegal group reference", this, arguments);
   238                  }
   239                  cursor++;
   240  
   241                  // Capture the largest legal group string
   242                  var done:Boolean = false;
   243                  while (!done) {
   244                      if (cursor >= replacement.length) {
   245                          break;
   246                      }
   247                      var nextDigit:Number = replacement.charCodeAt(cursor) - 0x30;
   248                      if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
   249                          break;
   250                      }
   251                      var newRefNum:Number = (refNum * 10) + nextDigit;
   252                      if (getGroupCount() < newRefNum) {
   253                          done = true;
   254                      } else {
   255                          refNum = newRefNum;
   256                          cursor++;
   257                      }
   258                  }
   259  
   260                  // Append group
   261                  if (getGroup(refNum) != null) {
   262                      result += String(getGroup(refNum));
   263                  }
   264              } else {
   265                  result += chr(nextChar);
   266                  cursor++;
   267              }
   268          }
   269  
   270          // Append the intervening text
   271          source += getSubSequence(lastAppendPosition, first);
   272          // Append the match substitution
   273          source += result;
   274  
   275          lastAppendPosition = last;
   276  		return source;
   277      }
   278  
   279      public function appendTail(source:String):String {
   280          return (source + getSubSequence(lastAppendPosition, getTextLength()));
   281      }
   282  
   283      public function replaceAll(replacement:String):String {
   284          reset();
   285          var result:Boolean = find();
   286          if (result) {
   287              var temp:String = new String();
   288              do {
   289                  appendReplacement(temp, replacement);
   290                  result = find();
   291              } while (result);
   292              appendTail(temp);
   293              return temp;
   294          }
   295          return text;
   296      }
   297  
   298      public function replaceFirst(replacement:String):String {
   299          var temp:String = new String();
   300          reset();
   301          if (find()) appendReplacement(temp, replacement);
   302          appendTail(temp);
   303          return temp;
   304      }
   305  
   306      private function match(newFrom:Number, newTo:Number, anchor:Number):Boolean {
   307          from 	= newFrom < 0 ? 0 : newFrom;
   308          to 		= newTo;
   309          first 	= from;
   310          last 	= -1;
   311          oldLast = oldLast < 0 ? from : oldLast;
   312          for (var i = 0; i < groups.length; i++) {
   313              groups[i] = -1;
   314          }
   315          acceptMode = anchor;
   316  
   317          var result:Boolean = parentPattern["matchRoot"].match(this, from, text);
   318          if (!result) first = -1;
   319          oldLast = last;
   320          return result;
   321      }
   322  
   323      private function getTextLength(Void):Number {
   324          return text.length;
   325      }
   326  
   327      private function getSubSequence(beginIndex:Number, endIndex:Number):String {
   328          return text.substring(beginIndex, endIndex);
   329      }
   330  
   331  }
   332