libjava/classpath/java/text/CollationElementIterator.java

   1 /* CollationElementIterator.java -- Walks through collation elements
   2    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38
  39 package java.text;
  40
  41 import gnu.java.lang.CPStringBuilder;
  42
  43 import java.util.ArrayList;
  44
  45 /* Written using "Java Class Libraries", 2nd edition, plus online
  46  * API docs for JDK 1.2 from http://www.javasoft.com.
  47  * Status: Believed complete and correct to JDK 1.1.
  48  */
  49
  50 /**
  51  * This class walks through the character collation elements of a
  52  * <code>String</code> as defined by the collation rules in an instance of
  53  * <code>RuleBasedCollator</code>.  There is no public constructor for
  54  * this class.  An instance is created by calling the
  55  * <code>getCollationElementIterator</code> method on
  56  * <code>RuleBasedCollator</code>.
  57  *
  58  * @author Aaron M. Renn (arenn@urbanophile.com)
  59  * @author Tom Tromey (tromey@cygnus.com)
  60  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  61  */
  62 public final class CollationElementIterator
  63 {
  64   /**
  65    * This is a constant value that is returned to indicate that the end of
  66    * the string was encountered.
  67    */
  68   public static final int NULLORDER = -1;
  69
  70   /**
  71    * This is the RuleBasedCollator this object was created from.
  72    */
  73   RuleBasedCollator collator;
  74
  75   /**
  76    * This is the String that is being iterated over.
  77    */
  78   CharacterIterator text;
  79
  80   /**
  81    * This is the index into the collation decomposition where we are currently scanning.
  82    */
  83   int index;
  84
  85   /**
  86    * This is the index into the String where we are currently scanning.
  87    */
  88   int textIndex;
  89
  90   /**
  91    * Array containing the collation decomposition of the
  92    * text given to the constructor.
  93    */
  94   private RuleBasedCollator.CollationElement[] text_decomposition;
  95
  96   /**
  97    * Array containing the index of the specified block.
  98    */
  99   private int[] text_indexes;
 100
 101   /**
 102    * This method initializes a new instance of <code>CollationElementIterator</code>
 103    * to iterate over the specified <code>String</code> using the rules in the
 104    * specified <code>RuleBasedCollator</code>.
 105    *
 106    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 107    * @param text The <code>String</code> to iterate over.
 108    */
 109   CollationElementIterator(RuleBasedCollator collator, String text)
 110   {
 111     this.collator = collator;
 112
 113     setText (text);
 114   }
 115
 116   /**
 117    * This method initializes a new instance of <code>CollationElementIterator</code>
 118    * to iterate over the specified <code>String</code> using the rules in the
 119    * specified <code>RuleBasedCollator</code>.
 120    *
 121    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 122    * @param text The character iterator to iterate over.
 123    */
 124   CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
 125   {
 126     this.collator = collator;
 127
 128     setText (text);
 129   }
 130
 131   RuleBasedCollator.CollationElement nextBlock()
 132   {
 133     if (index >= text_decomposition.length)
 134       return null;
 135
 136     RuleBasedCollator.CollationElement e = text_decomposition[index];
 137
 138     textIndex = text_indexes[index+1];
 139
 140     index++;
 141
 142     return e;
 143   }
 144
 145   RuleBasedCollator.CollationElement previousBlock()
 146   {
 147     if (index == 0)
 148       return null;
 149
 150     index--;
 151     RuleBasedCollator.CollationElement e = text_decomposition[index];
 152
 153     textIndex = text_indexes[index+1];
 154
 155     return e;
 156   }
 157
 158   /**
 159    * This method returns the collation ordering value of the next character sequence
 160    * in the string (it may be an extended character following collation rules).
 161    * This method will return <code>NULLORDER</code> if the
 162    * end of the string was reached.
 163    *
 164    * @return The collation ordering value.
 165    */
 166   public int next()
 167   {
 168     RuleBasedCollator.CollationElement e = nextBlock();
 169
 170     if (e == null)
 171       return NULLORDER;
 172
 173     return e.getValue();
 174   }
 175
 176   /**
 177    * This method returns the collation ordering value of the previous character
 178    * in the string.  This method will return <code>NULLORDER</code> if the
 179    * beginning of the string was reached.
 180    *
 181    * @return The collation ordering value.
 182    */
 183   public int previous()
 184   {
 185     RuleBasedCollator.CollationElement e = previousBlock();
 186
 187     if (e == null)
 188       return NULLORDER;
 189
 190     return e.getValue();
 191   }
 192
 193   /**
 194    * This method returns the primary order value for the given collation
 195    * value.
 196    *
 197    * @param order The collation value returned from <code>next()</code> or
 198    *              <code>previous()</code>.
 199    *
 200    * @return The primary order value of the specified collation value.  This is
 201    *         the high 16 bits.
 202    */
 203   public static int primaryOrder(int order)
 204   {
 205     // From the JDK 1.2 spec.
 206     return order >>> 16;
 207   }
 208
 209   /**
 210    * This method resets the internal position pointer to read from the
 211    * beginning of the <code>String</code> again.
 212    */
 213   public void reset()
 214   {
 215     index = 0;
 216     textIndex = 0;
 217   }
 218
 219   /**
 220    * This method returns the secondary order value for the given collation
 221    * value.
 222    *
 223    * @param order The collation value returned from <code>next()</code> or
 224    *              <code>previous()</code>.
 225    *
 226    * @return The secondary order value of the specified collation value.  This
 227    *         is the bits 8-15.
 228    */
 229   public static short secondaryOrder(int order)
 230   {
 231     // From the JDK 1.2 spec.
 232     return (short) ((order >>> 8) & 255);
 233   }
 234
 235   /**
 236    * This method returns the tertiary order value for the given collation
 237    * value.
 238    *
 239    * @param order The collation value returned from <code>next()</code> or
 240    *              <code>previous()</code>.
 241    *
 242    * @return The tertiary order value of the specified collation value.  This
 243    *         is the low eight bits.
 244    */
 245   public static short tertiaryOrder(int order)
 246   {
 247     // From the JDK 1.2 spec.
 248     return (short) (order & 255);
 249   }
 250
 251   /**
 252    * This method sets the <code>String</code> that it is iterating over
 253    * to the specified <code>String</code>.
 254    *
 255    * @param text The new <code>String</code> to iterate over.
 256    *
 257    * @since 1.2
 258    */
 259   public void setText(String text)
 260   {
 261     int idx = 0;
 262     int idx_idx = 0;
 263     int alreadyExpanded = 0;
 264     int idxToMove = 0;
 265
 266     this.text = new StringCharacterIterator(text);
 267     this.index = 0;
 268
 269     String work_text = text.intern();
 270
 271     ArrayList a_element = new ArrayList();
 272     ArrayList a_idx = new ArrayList();
 273
 274     // Build element collection ordered as they come in "text".
 275     while (idx < work_text.length())
 276       {
 277         String key, key_old;
 278
 279         Object object = null;
 280         int p = 1;
 281
 282         // IMPROVE: use a TreeMap with a prefix-ordering rule.
 283         key_old = key = null;
 284         do
 285           {
 286             if (object != null)
 287               key_old = key;
 288             key = work_text.substring (idx, idx+p);
 289             object = collator.prefix_tree.get (key);
 290             if (object != null && idx < alreadyExpanded)
 291               {
 292                 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 293                 if (prefix.expansion != null &&
 294                     prefix.expansion.startsWith(work_text.substring(0, idx)))
 295                 {
 296                   object = null;
 297                   key = key_old;
 298                 }
 299               }
 300             p++;
 301           }
 302         while (idx+p <= work_text.length());
 303
 304         if (object == null)
 305           key = key_old;
 306
 307         RuleBasedCollator.CollationElement prefix =
 308           (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 309
 310         /*
 311          * First case: There is no such sequence in the database.
 312          * We will have to build one from the context.
 313          */
 314         if (prefix == null)
 315           {
 316             /*
 317              * We are dealing with sequences in an expansion. They
 318              * are treated as accented characters (tertiary order).
 319              */
 320             if (alreadyExpanded > 0)
 321               {
 322                 RuleBasedCollator.CollationElement e =
 323                   collator.getDefaultAccentedElement (work_text.charAt (idx));
 324
 325                 a_element.add (e);
 326                 a_idx.add (new Integer(idx_idx));
 327                 idx++;
 328                 alreadyExpanded--;
 329                 if (alreadyExpanded == 0)
 330                   {
 331                     /* There is not any characters left in the expansion set.
 332                      * We can increase the pointer in the source string.
 333                      */
 334                     idx_idx += idxToMove;
 335                     idxToMove = 0;
 336                   }
 337                 else
 338                   idx_idx++;
 339               }
 340             else
 341               {
 342                 /* This is a normal character. */
 343                 RuleBasedCollator.CollationElement e =
 344                   collator.getDefaultElement (work_text.charAt (idx));
 345                 Integer i_ref = new Integer(idx_idx);
 346
 347                 /* Don't forget to mark it as a special sequence so the
 348                  * string can be ordered.
 349                  */
 350                 a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 351                 a_idx.add (i_ref);
 352                 a_element.add (e);
 353                 a_idx.add (i_ref);
 354                 idx_idx++;
 355                 idx++;
 356               }
 357             continue;
 358           }
 359
 360         /*
 361          * Second case: Here we have found a matching sequence.
 362          * Here we have an expansion string prepend it to the "work text" and
 363          * add the corresponding sorting element. We must also mark
 364          */
 365         if (prefix.expansion != null)
 366           {
 367             work_text = prefix.expansion
 368               + work_text.substring (idx+prefix.key.length());
 369             idx = 0;
 370             a_element.add (prefix);
 371             a_idx.add (new Integer(idx_idx));
 372             if (alreadyExpanded == 0)
 373               idxToMove = prefix.key.length();
 374             alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 375           }
 376         else
 377           {
 378             /* Third case: the simplest. We have got the prefix and it
 379              * has not to be expanded.
 380              */
 381             a_element.add (prefix);
 382             a_idx.add (new Integer(idx_idx));
 383             idx += prefix.key.length();
 384             /* If the sequence is in an expansion, we must decrease the
 385              * counter.
 386              */
 387             if (alreadyExpanded > 0)
 388               {
 389                 alreadyExpanded -= prefix.key.length();
 390                 if (alreadyExpanded == 0)
 391                   {
 392                     idx_idx += idxToMove;
 393                     idxToMove = 0;
 394                   }
 395               }
 396             else
 397               idx_idx += prefix.key.length();
 398           }
 399       }
 400
 401     text_decomposition = (RuleBasedCollator.CollationElement[])
 402            a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 403     text_indexes = new int[a_idx.size()+1];
 404     for (int i = 0; i < a_idx.size(); i++)
 405       {
 406         text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 407       }
 408     text_indexes[a_idx.size()] = text.length();
 409   }
 410
 411   /**
 412    * This method sets the <code>String</code> that it is iterating over
 413    * to the <code>String</code> represented by the specified
 414    * <code>CharacterIterator</code>.
 415    *
 416    * @param source The <code>CharacterIterator</code> containing the new
 417    * <code>String</code> to iterate over.
 418    */
 419   public void setText(CharacterIterator source)
 420   {
 421     CPStringBuilder expand = new CPStringBuilder();
 422
 423     // For now assume we read from the beginning of the string.
 424     for (char c = source.first();
 425          c != CharacterIterator.DONE;
 426          c = source.next())
 427       expand.append(c);
 428
 429     setText(expand.toString());
 430   }
 431
 432   /**
 433    * This method returns the current offset into the <code>String</code>
 434    * that is being iterated over.
 435    *
 436    * @return The iteration index position.
 437    *
 438    * @since 1.2
 439    */
 440   public int getOffset()
 441   {
 442     return textIndex;
 443   }
 444
 445   /**
 446    * This method sets the iteration index position into the current
 447    * <code>String</code> to the specified value.  This value must not
 448    * be negative and must not be greater than the last index position
 449    * in the <code>String</code>.
 450    *
 451    * @param offset The new iteration index position.
 452    *
 453    * @exception IllegalArgumentException If the new offset is not valid.
 454    */
 455   public void setOffset(int offset)
 456   {
 457     if (offset < 0)
 458       throw new IllegalArgumentException("Negative offset: " + offset);
 459
 460     if (offset > (text.getEndIndex() - 1))
 461       throw new IllegalArgumentException("Offset too large: " + offset);
 462
 463     for (index = 0; index < text_decomposition.length; index++)
 464       {
 465         if (offset <= text_indexes[index])
 466           break;
 467       }
 468     /*
 469      * As text_indexes[0] == 0, we should not have to take care whether index is
 470      * greater than 0. It is always.
 471      */
 472     if (text_indexes[index] == offset)
 473       textIndex = offset;
 474     else
 475       textIndex = text_indexes[index-1];
 476   }
 477
 478   /**
 479    * This method returns the maximum length of any expansion sequence that
 480    * ends with the specified collation order value.  (Whatever that means).
 481    *
 482    * @param value The collation order value
 483    *
 484    * @return The maximum length of an expansion sequence.
 485    */
 486   public int getMaxExpansion(int value)
 487   {
 488     return 1;
 489   }
 490 }