libjava/classpath/java/text/RuleBasedCollator.java

   1 /* RuleBasedCollator.java -- Concrete Collator Class
   2    Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38
  39 package java.text;
  40
  41 import gnu.classpath.NotImplementedException;
  42
  43 import java.util.ArrayList;
  44 import java.util.HashMap;
  45
  46 /* Written using "Java Class Libraries", 2nd edition, plus online
  47  * API docs for JDK 1.2 from http://www.javasoft.com.
  48  * Status: Believed complete and correct
  49  */
  50
  51 /**
  52  * This class is a concrete subclass of <code>Collator</code> suitable
  53  * for string collation in a wide variety of languages.  An instance of
  54  * this class is normally returned by the <code>getInstance</code> method
  55  * of <code>Collator</code> with rules predefined for the requested
  56  * locale.  However, an instance of this class can be created manually
  57  * with any desired rules.
  58  * <p>
  59  * Rules take the form of a <code>String</code> with the following syntax
  60  * <ul>
  61  * <li> Modifier: '@'</li>
  62  * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
  63  * <li> Reset: '&amp;' : &lt;text&gt;</li>
  64  * </ul>
  65  * The modifier character indicates that accents sort backward as is the
  66  * case with French.  The modifier applies to all rules <b>after</b>
  67  * the modifier but before the next primary sequence. If placed at the end
  68  * of the sequence if applies to all unknown accented character.
  69  * The relational operators specify how the text
  70  * argument relates to the previous term.  The relation characters have
  71  * the following meanings:
  72  * <ul>
  73  * <li>'&lt;' - The text argument is greater than the prior term at the primary
  74  * difference level.</li>
  75  * <li>';' - The text argument is greater than the prior term at the secondary
  76  * difference level.</li>
  77  * <li>',' - The text argument is greater than the prior term at the tertiary
  78  * difference level.</li>
  79  * <li>'=' - The text argument is equal to the prior term</li>
  80  * </ul>
  81  * <p>
  82  * As for the text argument itself, this is any sequence of Unicode
  83  * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
  84  * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
  85  * desired, they must be enclosed in single quotes.  If any whitespace is
  86  * encountered, it is ignored.  (For example, "a b" is equal to "ab").
  87  * <p>
  88  * The reset operation inserts the following rule at the point where the
  89  * text argument to it exists in the previously declared rule string.  This
  90  * makes it easy to add new rules to an existing string by simply including
  91  * them in a reset sequence at the end.  Note that the text argument, or
  92  * at least the first character of it, must be present somewhere in the
  93  * previously declared rules in order to be inserted properly.  If this
  94  * is not satisfied, a <code>ParseException</code> will be thrown.
  95  * <p>
  96  * This system of configuring <code>RuleBasedCollator</code> is needlessly
  97  * complex and the people at Taligent who developed it (along with the folks
  98  * at Sun who accepted it into the Java standard library) deserve a slow
  99  * and agonizing death.
 100  * <p>
 101  * Here are a couple of example of rule strings:
 102  * <p>
 103  * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
 104  * greater than c, with all differences being primary differences.
 105  * <p>
 106  * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
 107  * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
 108  * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
 109  * under a tertiary strength comparison.
 110  * <p>
 111  * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
 112  * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
 113  * the rule "&lt; b" is to be inserted after the text argument "a" in the
 114  * previous rule string segment.
 115  * <p>
 116  * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
 117  * anywhere in the previous rule string segment so the rule following the
 118  * reset rule cannot be inserted.
 119  * <p>
 120  * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
 121  * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
 122  * <p>
 123  * For a description of the various comparison strength types, see the
 124  * documentation for the <code>Collator</code> class.
 125  * <p>
 126  * As an additional complication to this already overly complex rule scheme,
 127  * if any characters precede the first rule, these characters are considered
 128  * ignorable.  They will be treated as if they did not exist during
 129  * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
 130  * character such that the strings "high-tech" and "hightech" would
 131  * be considered identical.
 132  * <p>
 133  * A <code>ParseException</code> will be thrown for any of the following
 134  * conditions:
 135  * <ul>
 136  * <li>Unquoted punctuation characters in a text argument.</li>
 137  * <li>A relational or reset operator not followed by a text argument</li>
 138  * <li>A reset operator where the text argument is not present in
 139  * the previous rule string section.</li>
 140  * </ul>
 141  *
 142  * @author Aaron M. Renn (arenn@urbanophile.com)
 143  * @author Tom Tromey (tromey@cygnus.com)
 144  * @author Guilhem Lavaux (guilhem@kaffe.org)
 145  */
 146 public class RuleBasedCollator extends Collator
 147 {
 148   /**
 149    * This class describes what rank has a character (or a sequence of characters)
 150    * in the lexicographic order. Each element in a rule has a collation element.
 151    */
 152   static final class CollationElement
 153   {
 154     String key;
 155     int primary;
 156     short secondary;
 157     short tertiary;
 158     short equality;
 159     boolean ignore;
 160     String expansion;
 161
 162     CollationElement(String key, int primary, short secondary, short tertiary,
 163                      short equality, String expansion, boolean ignore)
 164     {
 165       this.key = key;
 166       this.primary = primary;
 167       this.secondary = secondary;
 168       this.tertiary = tertiary;
 169       this.equality = equality;
 170       this.ignore = ignore;
 171       this.expansion = expansion;
 172     }
 173
 174     int getValue()
 175     {
 176       return (primary << 16) + (secondary << 8) + tertiary;
 177     }
 178   }
 179
 180   /**
 181    * Basic collation instruction (internal format) to build the series of
 182    * collation elements. It contains an instruction which specifies the new
 183    * state of the generator. The sequence of instruction should not contain
 184    * RESET (it is used by
 185    * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
 186    * as a temporary state while merging two sets of instructions.
 187    */
 188   static final class CollationSorter
 189   {
 190     static final int GREATERP = 0;
 191     static final int GREATERS = 1;
 192     static final int GREATERT = 2;
 193     static final int EQUAL = 3;
 194     static final int RESET = 4;
 195     static final int INVERSE_SECONDARY = 5;
 196
 197     int comparisonType;
 198     String textElement;
 199     int hashText;
 200     int offset;
 201     boolean ignore;
 202
 203     String expansionOrdering;
 204   }
 205
 206   /**
 207    * This the the original rule string.
 208    */
 209   private String rules;
 210
 211   /**
 212    * This is the table of collation element values
 213    */
 214   private Object[] ce_table;
 215
 216   /**
 217    * Quick-prefix finder.
 218    */
 219   HashMap prefix_tree;
 220
 221   /**
 222    * This is the value of the last sequence entered into
 223    * <code>ce_table</code>. It is used to compute the
 224    * ordering value of unspecified character.
 225    */
 226   private int last_primary_value;
 227
 228   /**
 229    * This is the value of the last secondary sequence of the
 230    * primary 0, entered into
 231    * <code>ce_table</code>. It is used to compute the
 232    * ordering value of an unspecified accented character.
 233    */
 234   private int last_tertiary_value;
 235
 236   /**
 237    * This variable is true if accents need to be sorted
 238    * in the other direction.
 239    */
 240   private boolean inverseAccentComparison;
 241
 242   /**
 243    * This collation element is special to unknown sequence.
 244    * The JDK uses it to mark and sort the characters which has
 245    * no collation rules.
 246    */
 247   static final CollationElement SPECIAL_UNKNOWN_SEQ =
 248     new CollationElement("", (short) 32767, (short) 0, (short) 0,
 249                          (short) 0, null, false);
 250
 251   /**
 252    * This method initializes a new instance of <code>RuleBasedCollator</code>
 253    * with the specified collation rules.  Note that an application normally
 254    * obtains an instance of <code>RuleBasedCollator</code> by calling the
 255    * <code>getInstance</code> method of <code>Collator</code>.  That method
 256    * automatically loads the proper set of rules for the desired locale.
 257    *
 258    * @param rules The collation rule string.
 259    *
 260    * @exception ParseException If the rule string contains syntax errors.
 261    */
 262   public RuleBasedCollator(String rules) throws ParseException
 263   {
 264     if (rules.equals(""))
 265       throw new ParseException("empty rule set", 0);
 266
 267     this.rules = rules;
 268
 269     buildCollationVector(parseString(rules));
 270     buildPrefixAccess();
 271   }
 272
 273   /**
 274    * This method returns the number of common characters at the beginning
 275    * of the string of the two parameters.
 276    *
 277    * @param prefix A string considered as a prefix to test against
 278    * the other string.
 279    * @param s A string to test the prefix against.
 280    * @return The number of common characters.
 281    */
 282   static int findPrefixLength(String prefix, String s)
 283   {
 284     int index;
 285     int len = prefix.length();
 286
 287     for (index = 0; index < len && index < s.length(); ++index)
 288       {
 289         if (prefix.charAt(index) != s.charAt(index))
 290           return index;
 291       }
 292
 293
 294     return index;
 295   }
 296
 297   /**
 298    * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
 299    * checks whether it is possible to find an anchor point for the rules to be merged and
 300    * then insert them at that precise point.
 301    *
 302    * @param offset Offset in the string containing rules of the beginning of the rules
 303    * being merged in.
 304    * @param starter Text of the rules being merged.
 305    * @param main Repository of all already parsed rules.
 306    * @param patch Rules to be merged into the repository.
 307    * @throws ParseException if it is impossible to find an anchor point for the new rules.
 308    */
 309   private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
 310     throws ParseException
 311   {
 312     int insertion_point = -1;
 313     int max_length = 0;
 314
 315     /* We must check that no rules conflict with another already present. If it
 316      * is the case delete the old rule.
 317      */
 318
 319     /* For the moment good old O(N^2) algorithm.
 320      */
 321     for (int i = 0; i < patch.size(); i++)
 322       {
 323         int j = 0;
 324
 325         while (j < main.size())
 326           {
 327             CollationSorter rule1 = (CollationSorter) patch.get(i);
 328             CollationSorter rule2 = (CollationSorter) main.get(j);
 329
 330             if (rule1.textElement.equals(rule2.textElement))
 331               main.remove(j);
 332             else
 333               j++;
 334           }
 335       }
 336
 337     // Find the insertion point... O(N)
 338     for (int i = 0; i < main.size(); i++)
 339       {
 340         CollationSorter sorter = (CollationSorter) main.get(i);
 341         int length = findPrefixLength(starter, sorter.textElement);
 342
 343         if (length > max_length)
 344           {
 345             max_length = length;
 346             insertion_point = i+1;
 347           }
 348       }
 349
 350     if (insertion_point < 0)
 351       throw new ParseException("no insertion point found for " + starter, offset);
 352
 353     if (max_length < starter.length())
 354       {
 355         /*
 356          * We need to expand the first entry. It must be sorted
 357          * like if it was the reference key itself (like the spec
 358          * said. So the first entry is special: the element is
 359          * replaced by the specified text element for the sorting.
 360          * This text replace the old one for comparisons. However
 361          * to preserve the behaviour we replace the first key (corresponding
 362          * to the found prefix) by a new code rightly ordered in the
 363          * sequence. The rest of the subsequence must be appended
 364          * to the end of the sequence.
 365          */
 366         CollationSorter sorter = (CollationSorter) patch.get(0);
 367         CollationSorter expansionPrefix =
 368           (CollationSorter) main.get(insertion_point-1);
 369
 370         sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
 371
 372         main.add(insertion_point, sorter);
 373
 374         /*
 375          * This is a new set of rules. Append to the list.
 376          */
 377         patch.remove(0);
 378         insertion_point++;
 379       }
 380
 381     // Now insert all elements of patch at the insertion point.
 382     for (int i = 0; i < patch.size(); i++)
 383       main.add(i+insertion_point, patch.get(i));
 384   }
 385
 386   /**
 387    * This method parses a string and build a set of sorting instructions. The parsing
 388    * may only be partial on the case the rules are to be merged sometime later.
 389    *
 390    * @param stop_on_reset If this parameter is true then the parser stops when it
 391    * encounters a reset instruction. In the other case, it tries to parse the subrules
 392    * and merged it in the same repository.
 393    * @param v Output vector for the set of instructions.
 394    * @param base_offset Offset in the string to begin parsing.
 395    * @param rules Rules to be parsed.
 396    * @return -1 if the parser reached the end of the string, an integer representing the
 397    * offset in the string at which it stopped parsing.
 398    * @throws ParseException if something turned wrong during the parsing. To get details
 399    * decode the message.
 400    */
 401   private int subParseString(boolean stop_on_reset, ArrayList v,
 402                              int base_offset, String rules)
 403     throws ParseException
 404   {
 405     boolean ignoreChars = (base_offset == 0);
 406     int operator = -1;
 407     StringBuffer sb = new StringBuffer();
 408     boolean doubleQuote = false;
 409     boolean eatingChars = false;
 410     boolean nextIsModifier = false;
 411     boolean isModifier = false;
 412     int i;
 413
 414 main_parse_loop:
 415     for (i = 0; i < rules.length(); i++)
 416       {
 417         char c = rules.charAt(i);
 418         int type = -1;
 419
 420         if (!eatingChars &&
 421             ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
 422               continue;
 423
 424         isModifier = nextIsModifier;
 425         nextIsModifier = false;
 426
 427         if (eatingChars && c != '\'')
 428           {
 429             doubleQuote = false;
 430             sb.append(c);
 431             continue;
 432           }
 433         if (doubleQuote && eatingChars)
 434           {
 435             sb.append(c);
 436             doubleQuote = false;
 437             continue;
 438           }
 439
 440         switch (c)
 441           {
 442           case '!':
 443             throw new ParseException
 444               ("Modifier '!' is not yet supported by Classpath", i + base_offset);
 445           case '<':
 446             type = CollationSorter.GREATERP;
 447             break;
 448           case ';':
 449             type = CollationSorter.GREATERS;
 450             break;
 451           case ',':
 452             type = CollationSorter.GREATERT;
 453             break;
 454           case '=':
 455             type = CollationSorter.EQUAL;
 456             break;
 457           case '\'':
 458             eatingChars = !eatingChars;
 459             doubleQuote = true;
 460             break;
 461           case '@':
 462             if (ignoreChars)
 463               throw new ParseException
 464                 ("comparison list has not yet been started. You may only use"
 465                  + "(<,;=&)", i + base_offset);
 466             // Inverse the order of secondaries from now on.
 467             nextIsModifier = true;
 468             type = CollationSorter.INVERSE_SECONDARY;
 469             break;
 470           case '&':
 471             type = CollationSorter.RESET;
 472             if (stop_on_reset)
 473               break main_parse_loop;
 474             break;
 475           default:
 476             if (operator < 0)
 477               throw new ParseException
 478                 ("operator missing at " + (i + base_offset), i + base_offset);
 479             if (! eatingChars
 480                 && ((c >= 0x21 && c <= 0x2F)
 481                     || (c >= 0x3A && c <= 0x40)
 482                     || (c >= 0x5B && c <= 0x60)
 483                     || (c >= 0x7B && c <= 0x7E)))
 484               throw new ParseException
 485                 ("unquoted punctuation character '" + c + "'", i + base_offset);
 486
 487             //type = ignoreChars ? CollationSorter.IGNORE : -1;
 488             sb.append(c);
 489             break;
 490           }
 491
 492         if (type  < 0)
 493           continue;
 494
 495         if (operator < 0)
 496           {
 497             operator = type;
 498             continue;
 499           }
 500
 501         if (sb.length() == 0 && !isModifier)
 502           throw new ParseException
 503             ("text element empty at " + (i+base_offset), i+base_offset);
 504
 505         if (operator == CollationSorter.RESET)
 506           {
 507             /* Reposition in the sorting list at the position
 508              * indicated by the text element.
 509              */
 510             String subrules = rules.substring(i);
 511             ArrayList sorted_rules = new ArrayList();
 512             int idx;
 513
 514             // Parse the subrules but do not iterate through all
 515             // sublist. This is the privilege of the first call.
 516             idx = subParseString(true, sorted_rules, base_offset+i, subrules);
 517
 518             // Merge new parsed rules into the list.
 519             mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
 520             sb.setLength(0);
 521
 522             // Reset state to none.
 523             operator = -1;
 524             type = -1;
 525             // We have found a new subrule at 'idx' but it has not been parsed.
 526             if (idx >= 0)
 527               {
 528                 i += idx-1;
 529                 continue main_parse_loop;
 530               }
 531             else
 532                 // No more rules.
 533                 break main_parse_loop;
 534           }
 535
 536         CollationSorter sorter = new CollationSorter();
 537
 538         if (operator == CollationSorter.GREATERP)
 539           ignoreChars = false;
 540
 541         sorter.comparisonType = operator;
 542         sorter.textElement = sb.toString();
 543         sorter.hashText = sorter.textElement.hashCode();
 544         sorter.offset = base_offset+rules.length();
 545         sorter.ignore = ignoreChars;
 546         sb.setLength(0);
 547
 548         v.add(sorter);
 549         operator = type;
 550       }
 551
 552     if (operator >= 0)
 553       {
 554         CollationSorter sorter = new CollationSorter();
 555         int pos = rules.length() + base_offset;
 556
 557         if ((sb.length() != 0 && nextIsModifier)
 558             || (sb.length() == 0 && !nextIsModifier && !eatingChars))
 559           throw new ParseException("text element empty at " + pos, pos);
 560
 561         if (operator == CollationSorter.GREATERP)
 562           ignoreChars = false;
 563
 564         sorter.comparisonType = operator;
 565         sorter.textElement = sb.toString();
 566         sorter.hashText = sorter.textElement.hashCode();
 567         sorter.offset = base_offset+pos;
 568         sorter.ignore = ignoreChars;
 569         v.add(sorter);
 570       }
 571
 572     if (i == rules.length())
 573       return -1;
 574     else
 575       return i;
 576   }
 577
 578   /**
 579    * This method creates a copy of this object.
 580    *
 581    * @return A copy of this object.
 582    */
 583   public Object clone()
 584   {
 585     return super.clone();
 586   }
 587
 588   /**
 589    * This method completely parses a string 'rules' containing sorting rules.
 590    *
 591    * @param rules String containing the rules to be parsed.
 592    * @return A set of sorting instructions stored in a Vector.
 593    * @throws ParseException if something turned wrong during the parsing. To get details
 594    * decode the message.
 595    */
 596   private ArrayList parseString(String rules)
 597     throws ParseException
 598   {
 599     ArrayList v = new ArrayList();
 600
 601     // result of the first subParseString is not absolute (may be -1 or a
 602     // positive integer). But we do not care.
 603     subParseString(false, v, 0, rules);
 604
 605     return v;
 606   }
 607
 608   /**
 609    * This method uses the sorting instructions built by {@link #parseString}
 610    * to build collation elements which can be directly used to sort strings.
 611    *
 612    * @param parsedElements Parsed instructions stored in a ArrayList.
 613    * @throws ParseException if the order of the instructions are not valid.
 614    */
 615   private void buildCollationVector(ArrayList parsedElements)
 616     throws ParseException
 617   {
 618     int primary_seq = 0;
 619     int last_tertiary_seq = 0;
 620     short secondary_seq = 0;
 621     short tertiary_seq = 0;
 622     short equality_seq = 0;
 623     boolean inverseComparisons = false;
 624     final boolean DECREASING = false;
 625     final boolean INCREASING = true;
 626     boolean secondaryType = INCREASING;
 627     ArrayList v = new ArrayList();
 628
 629     // elts is completely sorted.
 630 element_loop:
 631     for (int i = 0; i < parsedElements.size(); i++)
 632       {
 633         CollationSorter elt = (CollationSorter) parsedElements.get(i);
 634         boolean ignoreChar = false;
 635
 636         switch (elt.comparisonType)
 637           {
 638           case CollationSorter.GREATERP:
 639             primary_seq++;
 640             if (inverseComparisons)
 641               {
 642                 secondary_seq = Short.MAX_VALUE;
 643                 secondaryType = DECREASING;
 644               }
 645             else
 646               {
 647                 secondary_seq = 0;
 648                 secondaryType = INCREASING;
 649               }
 650             tertiary_seq = 0;
 651             equality_seq = 0;
 652             inverseComparisons = false;
 653             break;
 654           case CollationSorter.GREATERS:
 655             if (secondaryType == DECREASING)
 656               secondary_seq--;
 657             else
 658               secondary_seq++;
 659             tertiary_seq = 0;
 660             equality_seq = 0;
 661             break;
 662           case CollationSorter.INVERSE_SECONDARY:
 663             inverseComparisons = true;
 664             continue element_loop;
 665           case CollationSorter.GREATERT:
 666             tertiary_seq++;
 667             if (primary_seq == 0)
 668               last_tertiary_seq = tertiary_seq;
 669             equality_seq = 0;
 670             break;
 671           case CollationSorter.EQUAL:
 672             equality_seq++;
 673             break;
 674           case CollationSorter.RESET:
 675             throw new ParseException
 676               ("Invalid reached state 'RESET'. Internal error", elt.offset);
 677           default:
 678             throw new ParseException
 679               ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
 680           }
 681
 682         v.add(new CollationElement(elt.textElement, primary_seq,
 683                                    secondary_seq, tertiary_seq,
 684                                    equality_seq, elt.expansionOrdering, elt.ignore));
 685       }
 686
 687     this.inverseAccentComparison = inverseComparisons;
 688
 689     ce_table = v.toArray();
 690
 691     last_primary_value = primary_seq+1;
 692     last_tertiary_value = last_tertiary_seq+1;
 693   }
 694
 695   /**
 696    * Build a tree where all keys are the texts of collation elements and data is
 697    * the collation element itself. The tree is used when extracting all prefix
 698    * for a given text.
 699    */
 700   private void buildPrefixAccess()
 701   {
 702     prefix_tree = new HashMap();
 703
 704     for (int i = 0; i < ce_table.length; i++)
 705       {
 706         CollationElement e = (CollationElement) ce_table[i];
 707
 708         prefix_tree.put(e.key, e);
 709       }
 710   }
 711
 712   /**
 713    * This method returns an integer which indicates whether the first
 714    * specified <code>String</code> is less than, greater than, or equal to
 715    * the second.  The value depends not only on the collation rules in
 716    * effect, but also the strength and decomposition settings of this object.
 717    *
 718    * @param source The first <code>String</code> to compare.
 719    * @param target A second <code>String</code> to compare to the first.
 720    *
 721    * @return A negative integer if source &lt; target, a positive integer
 722    * if source &gt; target, or 0 if source == target.
 723    */
 724   public int compare(String source, String target)
 725   {
 726     CollationElementIterator cs, ct;
 727     CollationElement ord1block = null;
 728     CollationElement ord2block = null;
 729     boolean advance_block_1 = true;
 730     boolean advance_block_2 = true;
 731
 732     cs = getCollationElementIterator(source);
 733     ct = getCollationElementIterator(target);
 734
 735     for(;;)
 736       {
 737         int ord1;
 738         int ord2;
 739
 740         /*
 741          * We have to check whether the characters are ignorable.
 742          * If it is the case then forget them.
 743          */
 744         if (advance_block_1)
 745           {
 746             ord1block = cs.nextBlock();
 747             if (ord1block != null && ord1block.ignore)
 748               continue;
 749           }
 750
 751         if (advance_block_2)
 752           {
 753             ord2block = ct.nextBlock();
 754             if (ord2block != null && ord2block.ignore)
 755               {
 756                 advance_block_1 = false;
 757                 continue;
 758               }
 759          }
 760         else
 761           advance_block_2 = true;
 762
 763         if (!advance_block_1)
 764           advance_block_1 = true;
 765
 766         if (ord1block != null)
 767           ord1 = ord1block.getValue();
 768         else
 769           {
 770             if (ord2block == null)
 771               return 0;
 772             return -1;
 773           }
 774
 775         if (ord2block == null)
 776           return 1;
 777
 778         ord2 = ord2block.getValue();
 779
 780         // We know chars are totally equal, so skip
 781         if (ord1 == ord2)
 782           {
 783             if (getStrength() == IDENTICAL)
 784               if (!ord1block.key.equals(ord2block.key))
 785                 return ord1block.key.compareTo(ord2block.key);
 786             continue;
 787           }
 788
 789         // Check for primary strength differences
 790         int prim1 = CollationElementIterator.primaryOrder(ord1);
 791         int prim2 = CollationElementIterator.primaryOrder(ord2);
 792
 793         if (prim1 == 0 && getStrength() < TERTIARY)
 794           {
 795             advance_block_2 = false;
 796             continue;
 797           }
 798         else if (prim2 == 0 && getStrength() < TERTIARY)
 799           {
 800             advance_block_1 = false;
 801             continue;
 802           }
 803
 804         if (prim1 < prim2)
 805           return -1;
 806         else if (prim1 > prim2)
 807           return 1;
 808         else if (getStrength() == PRIMARY)
 809           continue;
 810
 811         // Check for secondary strength differences
 812         int sec1 = CollationElementIterator.secondaryOrder(ord1);
 813         int sec2 = CollationElementIterator.secondaryOrder(ord2);
 814
 815         if (sec1 < sec2)
 816           return -1;
 817         else if (sec1 > sec2)
 818           return 1;
 819         else if (getStrength() == SECONDARY)
 820           continue;
 821
 822         // Check for tertiary differences
 823         int tert1 = CollationElementIterator.tertiaryOrder(ord1);
 824         int tert2 = CollationElementIterator.tertiaryOrder(ord2);
 825
 826         if (tert1 < tert2)
 827           return -1;
 828         else if (tert1 > tert2)
 829           return 1;
 830         else if (getStrength() == TERTIARY)
 831           continue;
 832
 833         // Apparently JDK does this (at least for my test case).
 834         return ord1block.key.compareTo(ord2block.key);
 835       }
 836   }
 837
 838   /**
 839    * This method tests this object for equality against the specified
 840    * object.  This will be true if and only if the specified object is
 841    * another reference to this object.
 842    *
 843    * @param obj The <code>Object</code> to compare against this object.
 844    *
 845    * @return <code>true</code> if the specified object is equal to this object,
 846    * <code>false</code> otherwise.
 847    */
 848   public boolean equals(Object obj)
 849   {
 850     if (obj == this)
 851       return true;
 852     else
 853       return false;
 854   }
 855
 856   /**
 857    * This method builds a default collation element without invoking
 858    * the database created from the rules passed to the constructor.
 859    *
 860    * @param c Character which needs a collation element.
 861    * @return A valid brand new CollationElement instance.
 862    */
 863   CollationElement getDefaultElement(char c)
 864   {
 865     int v;
 866
 867     // Preliminary support for generic accent sorting inversion (I don't know if all
 868     // characters in the range should be sorted backward). This is the place
 869     // to fix this if needed.
 870     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 871       v = 0x0361 - ((int) c - 0x02B9);
 872     else
 873       v = (short) c;
 874     return new CollationElement("" + c, last_primary_value + v,
 875                                 (short) 0, (short) 0, (short) 0, null, false);
 876   }
 877
 878   /**
 879    * This method builds a default collation element for an accented character
 880    * without invoking the database created from the rules passed to the constructor.
 881    *
 882    * @param c Character which needs a collation element.
 883    * @return A valid brand new CollationElement instance.
 884    */
 885   CollationElement getDefaultAccentedElement(char c)
 886   {
 887     int v;
 888
 889     // Preliminary support for generic accent sorting inversion (I don't know if all
 890     // characters in the range should be sorted backward). This is the place
 891     // to fix this if needed.
 892     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 893       v = 0x0361 - ((int) c - 0x02B9);
 894     else
 895       v = (short) c;
 896     return new CollationElement("" + c, (short) 0,
 897                                 (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
 898   }
 899
 900   /**
 901    * This method returns an instance for <code>CollationElementIterator</code>
 902    * for the specified <code>String</code> under the collation rules for this
 903    * object.
 904    *
 905    * @param source The <code>String</code> to return the
 906    * <code>CollationElementIterator</code> instance for.
 907    *
 908    * @return A <code>CollationElementIterator</code> for the specified
 909    * <code>String</code>.
 910    */
 911   public CollationElementIterator getCollationElementIterator(String source)
 912   {
 913     return new CollationElementIterator(this, source);
 914   }
 915
 916   /**
 917    * This method returns an instance of <code>CollationElementIterator</code>
 918    * for the <code>String</code> represented by the specified
 919    * <code>CharacterIterator</code>.
 920    *
 921    * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
 922    *
 923    * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
 924    */
 925   public CollationElementIterator getCollationElementIterator(CharacterIterator source)
 926   {
 927     return new CollationElementIterator(this, source);
 928   }
 929
 930   /**
 931    * This method returns an instance of <code>CollationKey</code> for the
 932    * specified <code>String</code>.  The object returned will have a
 933    * more efficient mechanism for its comparison function that could
 934    * provide speed benefits if multiple comparisons are performed, such
 935    * as during a sort.
 936    *
 937    * @param source The <code>String</code> to create a <code>CollationKey</code> for.
 938    *
 939    * @return A <code>CollationKey</code> for the specified <code>String</code>.
 940    */
 941   public CollationKey getCollationKey(String source)
 942   {
 943     CollationElementIterator cei = getCollationElementIterator(source);
 944     ArrayList vect = new ArrayList();
 945
 946     int ord = cei.next();
 947     cei.reset(); //set to start of string
 948
 949     while (ord != CollationElementIterator.NULLORDER)
 950       {
 951         // If the primary order is null, it means this is an ignorable
 952         // character.
 953         if (CollationElementIterator.primaryOrder(ord) == 0)
 954           {
 955             ord = cei.next();
 956             continue;
 957           }
 958         switch (getStrength())
 959           {
 960             case PRIMARY:
 961               ord = CollationElementIterator.primaryOrder(ord);
 962               break;
 963
 964             case SECONDARY:
 965               ord = CollationElementIterator.primaryOrder(ord) << 8;
 966               ord |= CollationElementIterator.secondaryOrder(ord);
 967
 968             default:
 969                break;
 970           }
 971
 972         vect.add(new Integer(ord));
 973         ord = cei.next(); //increment to next key
 974       }
 975
 976     Object[] objarr = vect.toArray();
 977     byte[] key = new byte[objarr.length * 4];
 978
 979     for (int i = 0; i < objarr.length; i++)
 980       {
 981         int j = ((Integer) objarr[i]).intValue();
 982         key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
 983         key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
 984         key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
 985         key [i * 4 + 3] = (byte) (j & 0x000000FF);
 986       }
 987
 988     return new CollationKey(this, source, key);
 989   }
 990
 991   /**
 992    * This method returns a <code>String</code> containing the collation rules
 993    * for this object.
 994    *
 995    * @return The collation rules for this object.
 996    */
 997   public String getRules()
 998   {
 999     return rules;
1000   }
1001
1002   /**
1003    * This method returns a hash value for this object.
1004    *
1005    * @return A hash value for this object.
1006    */
1007   public int hashCode()
1008   {
1009     return System.identityHashCode(this);
1010   }
1011 }