--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>\r
+<classpath>\r
+ <classpathentry kind="src" path="src"/>\r
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/PDFRenderer.jar"/>\r
+ <classpathentry kind="lib" path="D:/Nevernote-prod/lib/h2-1.2.136.jar"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/evernote-api-1.15.jar"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/libthrift.jar"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/log4j-1.2.14.jar"/>\r
+ <classpathentry kind="lib" path="D:/qtjambi-win32-lgpl-4.5.0_01/qtjambi-4.5.0_01.jar"/>\r
+ <classpathentry kind="lib" path="D:/qtjambi-win32-lgpl-4.5.0_01/qtjambi-win32-msvc2005-4.5.0_01.jar"/>\r
+ <classpathentry kind="output" path="bin"/>\r
+</classpath>\r
--- /dev/null
+bin
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>\r
+<projectDescription>\r
+ <name>NeverNote</name>\r
+ <comment></comment>\r
+ <projects>\r
+ </projects>\r
+ <buildSpec>\r
+ <buildCommand>\r
+ <name>com.trolltech.qtjambi.juicBuilder</name>\r
+ <arguments>\r
+ </arguments>\r
+ </buildCommand>\r
+ <buildCommand>\r
+ <name>org.eclipse.jdt.core.javabuilder</name>\r
+ <arguments>\r
+ </arguments>\r
+ </buildCommand>\r
+ </buildSpec>\r
+ <natures>\r
+ <nature>org.eclipse.jdt.core.javanature</nature>\r
+ </natures>\r
+</projectDescription>\r
--- /dev/null
+#Mon Feb 01 13:37:03 EST 2010\r
+eclipse.preferences.version=1\r
+editor_save_participant_org.eclipse.jdt.ui.postsavelistener.cleanup=true\r
+sp_cleanup.add_default_serial_version_id=true\r
+sp_cleanup.add_generated_serial_version_id=false\r
+sp_cleanup.add_missing_annotations=true\r
+sp_cleanup.add_missing_deprecated_annotations=true\r
+sp_cleanup.add_missing_methods=false\r
+sp_cleanup.add_missing_nls_tags=false\r
+sp_cleanup.add_missing_override_annotations=true\r
+sp_cleanup.add_serial_version_id=false\r
+sp_cleanup.always_use_blocks=true\r
+sp_cleanup.always_use_parentheses_in_expressions=false\r
+sp_cleanup.always_use_this_for_non_static_field_access=false\r
+sp_cleanup.always_use_this_for_non_static_method_access=false\r
+sp_cleanup.convert_to_enhanced_for_loop=true\r
+sp_cleanup.correct_indentation=false\r
+sp_cleanup.format_source_code=false\r
+sp_cleanup.format_source_code_changes_only=false\r
+sp_cleanup.make_local_variable_final=false\r
+sp_cleanup.make_parameters_final=false\r
+sp_cleanup.make_private_fields_final=true\r
+sp_cleanup.make_type_abstract_if_missing_method=false\r
+sp_cleanup.make_variable_declarations_final=true\r
+sp_cleanup.never_use_blocks=false\r
+sp_cleanup.never_use_parentheses_in_expressions=true\r
+sp_cleanup.on_save_use_additional_actions=true\r
+sp_cleanup.organize_imports=true\r
+sp_cleanup.qualify_static_field_accesses_with_declaring_class=false\r
+sp_cleanup.qualify_static_member_accesses_through_instances_with_declaring_class=true\r
+sp_cleanup.qualify_static_member_accesses_through_subtypes_with_declaring_class=true\r
+sp_cleanup.qualify_static_member_accesses_with_declaring_class=false\r
+sp_cleanup.qualify_static_method_accesses_with_declaring_class=false\r
+sp_cleanup.remove_private_constructors=true\r
+sp_cleanup.remove_trailing_whitespaces=false\r
+sp_cleanup.remove_trailing_whitespaces_all=true\r
+sp_cleanup.remove_trailing_whitespaces_ignore_empty=false\r
+sp_cleanup.remove_unnecessary_casts=true\r
+sp_cleanup.remove_unnecessary_nls_tags=false\r
+sp_cleanup.remove_unused_imports=false\r
+sp_cleanup.remove_unused_local_variables=false\r
+sp_cleanup.remove_unused_private_fields=true\r
+sp_cleanup.remove_unused_private_members=false\r
+sp_cleanup.remove_unused_private_methods=true\r
+sp_cleanup.remove_unused_private_types=true\r
+sp_cleanup.sort_members=false\r
+sp_cleanup.sort_members_all=false\r
+sp_cleanup.use_blocks=false\r
+sp_cleanup.use_blocks_only_for_return_and_throw=false\r
+sp_cleanup.use_parentheses_in_expressions=false\r
+sp_cleanup.use_this_for_non_static_field_access=false\r
+sp_cleanup.use_this_for_non_static_field_access_only_if_necessary=true\r
+sp_cleanup.use_this_for_non_static_method_access=false\r
+sp_cleanup.use_this_for_non_static_method_access_only_if_necessary=true\r
--- /dev/null
+#Thu Aug 06 07:27:30 EDT 2009\r
+eclipse.preferences.version=1\r
+org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false\r
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>\r
+<classpath>\r
+ <classpathentry kind="lib" path="D:/qtjambi-win32-lgpl-4.5.0_01/qtjambi-4.5.0_01.jar">\r
+ <attributes>\r
+ <attribute name="javadoc_location" value="file:D:/qtjambi-win32-lgpl-4.5.0_01/doc/html"/>\r
+ </attributes>\r
+ </classpathentry>\r
+ <classpathentry kind="lib" path="D:/qtjambi-win32-lgpl-4.5.0_01/qtjambi-win32-msvc2005-4.5.0_01.jar">\r
+ <attributes>\r
+ <attribute name="javadoc_location" value="file:D:/qtjambi-win32-lgpl-4.5.0_01/doc/html"/>\r
+ </attributes>\r
+ </classpathentry>\r
+ <classpathentry kind="src" path="src"/>\r
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>\r
+ <classpathentry kind="var" path="JAMBI_LOCATION"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/log4j-1.2.14.jar"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/libthrift.jar"/>\r
+ <classpathentry kind="lib" path="D:/NeverNote/lib/evernote-api-1.15.jar"/>\r
+ <classpathentry kind="output" path="bin"/>\r
+</classpath>\r
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>\r
+<projectDescription>\r
+ <name>NeverNote 0.80</name>\r
+ <comment></comment>\r
+ <projects>\r
+ </projects>\r
+ <buildSpec>\r
+ <buildCommand>\r
+ <name>com.trolltech.qtjambi.juicBuilder</name>\r
+ <arguments>\r
+ </arguments>\r
+ </buildCommand>\r
+ <buildCommand>\r
+ <name>org.eclipse.jdt.core.javabuilder</name>\r
+ <arguments>\r
+ </arguments>\r
+ </buildCommand>\r
+ </buildSpec>\r
+ <natures>\r
+ <nature>org.eclipse.jdt.core.javanature</nature>\r
+ </natures>\r
+</projectDescription>\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.security.AccessControlException;\r
+\r
+\r
+/**\r
+ * The various settings used to control how a spell checker works are read from here.\r
+ * Includes the COST_* constants that decide how to figure the cost of converting one word to\r
+ * another in the EditDistance class.\r
+ * <p/>\r
+ * Also includes SPELL_* constants that control how misspellings are detected, for example, how to handle\r
+ * mixed-case words, etc.\r
+ *\r
+ * @author aim4min\r
+ * @see EditDistance\r
+ */\r
+public abstract class Configuration {\r
+\r
+ /** used by EditDistance: the cost of having to remove a character <br/>(integer greater than 0) */\r
+ public static final String COST_REMOVE_CHAR = "EDIT_DEL1";\r
+\r
+ /** used by EditDistance: the cost of having to insert a character <br/>(integer greater than 0)*/\r
+ public static final String COST_INSERT_CHAR = "EDIT_DEL2";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to swap two adjoining characters\r
+ * for the swap value to ever be used, it should be smaller than the COST_REMOVE_CHAR or COST_INSERT_CHAR values\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_SWAP_CHARS = "EDIT_SWAP";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to change case, for example, from i to I.\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_CHANGE_CASE = "EDIT_CASE";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to substitute one character for another\r
+ * for the sub value to ever be used, it should be smaller than the COST_REMOVE_CHAR or COST_INSERT_CHAR values\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_SUBST_CHARS = "EDIT_SUB";\r
+\r
+// public static final String EDIT_SIMILAR = "EDIT_SIMILAR"; //DMV: these does not seem to be used at all\r
+// public static final String EDIT_MIN = "EDIT_MIN";\r
+// public static final String EDIT_MAX = "EDIT_MAX";\r
+\r
+ /** the maximum cost of suggested spelling. Any suggestions that cost more are thrown away\r
+ * <br/> integer greater than 1)\r
+ */\r
+ public static final String SPELL_THRESHOLD = "SPELL_THRESHOLD";\r
+\r
+ /** words that are all upper case are not spell checked, example: "CIA" <br/>(boolean) */\r
+ public static final String SPELL_IGNOREUPPERCASE = "SPELL_IGNOREUPPERCASE";\r
+ /** words that have mixed case are not spell checked, example: "SpellChecker"<br/>(boolean) */\r
+ public static final String SPELL_IGNOREMIXEDCASE = "SPELL_IGNOREMIXEDCASE";\r
+ /** words that look like an Internet address are not spell checked, example: "http://www.google.com" <br/>(boolean)*/\r
+ public static final String SPELL_IGNOREINTERNETADDRESSES = "SPELL_IGNOREINTERNETADDRESS";\r
+ /** words that have digits in them are not spell checked, example: "mach5" <br/>(boolean) */\r
+ public static final String SPELL_IGNOREDIGITWORDS = "SPELL_IGNOREDIGITWORDS";\r
+ /** I don't know what this does. It doesn't seem to be used <br/>(boolean) */\r
+ public static final String SPELL_IGNOREMULTIPLEWORDS = "SPELL_IGNOREMULTIPLEWORDS";\r
+ /** the first word of a sentence is expected to start with an upper case letter <br/>(boolean) */\r
+ public static final String SPELL_IGNORESENTENCECAPITALIZATION = "SPELL_IGNORESENTENCECAPTILIZATION";\r
+\r
+ /**\r
+ * Gets one of the integer constants\r
+ * @param key one of the integer constants defined in this class\r
+ * @return int value of the setting\r
+ */\r
+ public abstract int getInteger(String key);\r
+\r
+ /**\r
+ * Gets one of the boolean constants\r
+ * @param key one of the boolean constants defined in this class\r
+ * @return boolean value of the setting\r
+ */\r
+ public abstract boolean getBoolean(String key);\r
+\r
+ /**\r
+ * Sets one of the integer constants\r
+ * @param key one of the integer constants defined in this class\r
+ * @param value new integer value of the constant\r
+ */\r
+ public abstract void setInteger(String key, int value);\r
+\r
+ /**\r
+ * Sets one of the boolean constants\r
+ * @param key one of the boolean constants defined in this class\r
+ * @param value new boolean value of this setting\r
+ */\r
+ public abstract void setBoolean(String key, boolean value);\r
+\r
+ /**\r
+ * gets a new default Configuration\r
+ * @return Configuration\r
+ */\r
+ public static final Configuration getConfiguration() {\r
+ try {\r
+ String config = System.getProperty("jazzy.config"); // added by bd\r
+ if (config != null && config.length() > 0)\r
+ return getConfiguration(config);\r
+ } catch (AccessControlException e) {\r
+ e.printStackTrace();\r
+ } \r
+ return getConfiguration(null);\r
+ }\r
+\r
+ /**\r
+ * Returns a new instance of a Configuration class\r
+ * @param className - the class to return, must be based on Configuration\r
+ * @return Configuration\r
+ */\r
+ public static final Configuration getConfiguration(String className) {\r
+\r
+ Configuration result;\r
+\r
+ if (className != null && className.length() > 0) {\r
+ try {\r
+ result = (Configuration) Class.forName(className).newInstance();\r
+ } catch (InstantiationException e) {\r
+ result = new PropertyConfiguration();\r
+ } catch (IllegalAccessException e) {\r
+ result = new PropertyConfiguration();\r
+ } catch (ClassNotFoundException e) {\r
+ result = new PropertyConfiguration();\r
+ }\r
+ } else {\r
+ result = new PropertyConfiguration();\r
+ }\r
+ return result;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+\r
+/**\r
+ * A phonetic encoding algorithm that takes an English word and computes a phonetic version of it. This\r
+ * allows for phonetic matches in a spell checker. This class is a port of the C++ DoubleMetaphone() class,\r
+ * which was intended to return two possible phonetic translations for certain words, although the Java version\r
+ * only seems to be concerned with one, making the "double" part erroneous.\r
+ * <br>\r
+ * source code for the original C++ can be found\r
+ * here: <a href="http://aspell.sourceforge.net/metaphone/"/>http://aspell.sourceforge.net/metaphone/</a>\r
+ * DoubleMetaphone does some processing, such as uppercasing, on the input string first to normalize it. Then, to\r
+ * create the key, the function traverses the input string in a while loop, sending successive characters into a giant\r
+ * switch statement. Before determining the appropriate pronunciation, the algorithm considers the context\r
+ * surrounding each character within the input string.\r
+ * <p>\r
+ * Things that were changed:\r
+ * <br/>The alternate flag could be set to true but was never checked so why bother with it. REMOVED\r
+ * <br/>Why was this class serializable?\r
+ * <br/>The primary, in, length and last variables could be initialized and local to the\r
+ * process method and references passed around the appropriate methods. As such there are\r
+ * no class variables and this class becomes firstly threadsafe and secondly could be static final.\r
+ * <br/>The function call SlavoGermaic was called repeatedly in the process function, it is now only called once.\r
+ *\r
+ */\r
+public class DoubleMeta implements Transformator {\r
+\r
+ /**\r
+ * The replace list is used in the getSuggestions method.\r
+ * All of the letters in the misspelled word are replaced with the characters from\r
+ * this list to try and generate more suggestions, which implies l*n tries,\r
+ * if l is the size of the string, and n is the size of this list.\r
+ *\r
+ * In addition to that, each of these letters is added to the misspelled word.\r
+ */\r
+ private static char[] replaceList = {'A', 'B', 'X', 'S', 'K', 'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0'};\r
+\r
+\r
+ private static final String[] myList = {"GN", "KN", "PN", "WR", "PS", ""};\r
+ private static final String[] list1 = {"ACH", ""};\r
+ private static final String[] list2 = {"BACHER", "MACHER", ""};\r
+ private static final String[] list3 = {"CAESAR", ""};\r
+ private static final String[] list4 = {"CHIA", ""};\r
+ private static final String[] list5 = {"CH", ""};\r
+ private static final String[] list6 = {"CHAE", ""};\r
+ private static final String[] list7 = {"HARAC", "HARIS", ""};\r
+ private static final String[] list8 = {"HOR", "HYM", "HIA", "HEM", ""};\r
+ private static final String[] list9 = {"CHORE", ""};\r
+ private static final String[] list10 = {"VAN ", "VON ", ""};\r
+ private static final String[] list11 = {"SCH", ""};\r
+ private static final String[] list12 = {"ORCHES", "ARCHIT", "ORCHID", ""};\r
+ private static final String[] list13 = {"T", "S", ""};\r
+ private static final String[] list14 = {"A", "O", "U", "E", ""};\r
+ private static final String[] list15 = {"L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""};\r
+ private static final String[] list16 = {"MC", ""};\r
+ private static final String[] list17 = {"CZ", ""};\r
+ private static final String[] list18 = {"WICZ", ""};\r
+ private static final String[] list19 = {"CIA", ""};\r
+ private static final String[] list20 = {"CC", ""};\r
+ private static final String[] list21 = {"I", "E", "H", ""};\r
+ private static final String[] list22 = {"HU", ""};\r
+ private static final String[] list23 = {"UCCEE", "UCCES", ""};\r
+ private static final String[] list24 = {"CK", "CG", "CQ", ""};\r
+ private static final String[] list25 = {"CI", "CE", "CY", ""};\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list26 = {\r
+// "CIO", "CIE", "CIA", ""\r
+// };\r
+ private static final String[] list27 = {" C", " Q", " G", ""};\r
+ private static final String[] list28 = {"C", "K", "Q", ""};\r
+ private static final String[] list29 = {"CE", "CI", ""};\r
+ private static final String[] list30 = {"DG", ""};\r
+ private static final String[] list31 = {"I", "E", "Y", ""};\r
+ private static final String[] list32 = {"DT", "DD", ""};\r
+ private static final String[] list33 = {"B", "H", "D", ""};\r
+ private static final String[] list34 = {"B", "H", "D", ""};\r
+ private static final String[] list35 = {"B", "H", ""};\r
+ private static final String[] list36 = {"C", "G", "L", "R", "T", ""};\r
+ private static final String[] list37 = {"EY", ""};\r
+ private static final String[] list38 = {"LI", ""};\r
+ private static final String[] list39 = {"ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", ""};\r
+ private static final String[] list40 = {"ER", ""};\r
+ private static final String[] list41 = {"DANGER", "RANGER", "MANGER", ""};\r
+ private static final String[] list42 = {"E", "I", ""};\r
+ private static final String[] list43 = {"RGY", "OGY", ""};\r
+ private static final String[] list44 = {"E", "I", "Y", ""};\r
+ private static final String[] list45 = {"AGGI", "OGGI", ""};\r
+ private static final String[] list46 = {"VAN ", "VON ", ""};\r
+ private static final String[] list47 = {"SCH", ""};\r
+ private static final String[] list48 = {"ET", ""};\r
+\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list49 = {\r
+// "IER ", ""\r
+// };\r
+ private static final String[] list50 = {"JOSE", ""};\r
+ private static final String[] list51 = {"SAN ", ""};\r
+ private static final String[] list52 = {"SAN ", ""};\r
+ private static final String[] list53 = {"JOSE", ""};\r
+ private static final String[] list54 = {"L", "T", "K", "S", "N", "M", "B", "Z", ""};\r
+ private static final String[] list55 = {"S", "K", "L", ""};\r
+ private static final String[] list56 = {"ILLO", "ILLA", "ALLE", ""};\r
+ private static final String[] list57 = {"AS", "OS", ""};\r
+ private static final String[] list58 = {"A", "O", ""};\r
+ private static final String[] list59 = {"ALLE", ""};\r
+ private static final String[] list60 = {"UMB", ""};\r
+ private static final String[] list61 = {"ER", ""};\r
+ private static final String[] list62 = {"P", "B", ""};\r
+ private static final String[] list63 = {"IE", ""};\r
+ private static final String[] list64 = {"ME", "MA", ""};\r
+ private static final String[] list65 = {"ISL", "YSL", ""};\r
+ private static final String[] list66 = {"SUGAR", ""};\r
+ private static final String[] list67 = {"SH", ""};\r
+ private static final String[] list68 = {"HEIM", "HOEK", "HOLM", "HOLZ", ""};\r
+ private static final String[] list69 = {"SIO", "SIA", ""};\r
+ private static final String[] list70 = {"SIAN", ""};\r
+ private static final String[] list71 = {"M", "N", "L", "W", ""};\r
+ private static final String[] list72 = {"Z", ""};\r
+ private static final String[] list73 = {"Z", ""};\r
+ private static final String[] list74 = {"SC", ""};\r
+ private static final String[] list75 = {"OO", "ER", "EN", "UY", "ED", "EM", ""};\r
+ private static final String[] list76 = {"ER", "EN", ""};\r
+ private static final String[] list77 = {"I", "E", "Y", ""};\r
+ private static final String[] list78 = {"AI", "OI", ""};\r
+ private static final String[] list79 = {"S", "Z", ""};\r
+ private static final String[] list80 = {"TION", ""};\r
+ private static final String[] list81 = {"TIA", "TCH", ""};\r
+ private static final String[] list82 = {"TH", ""};\r
+ private static final String[] list83 = {"TTH", ""};\r
+ private static final String[] list84 = {"OM", "AM", ""};\r
+ private static final String[] list85 = {"VAN ", "VON ", ""};\r
+ private static final String[] list86 = {"SCH", ""};\r
+ private static final String[] list87 = {"T", "D", ""};\r
+ private static final String[] list88 = {"WR", ""};\r
+ private static final String[] list89 = {"WH", ""};\r
+ private static final String[] list90 = {"EWSKI", "EWSKY", "OWSKI", "OWSKY", ""};\r
+ private static final String[] list91 = {"SCH", ""};\r
+ private static final String[] list92 = {"WICZ", "WITZ", ""};\r
+ private static final String[] list93 = {"IAU", "EAU", ""};\r
+ private static final String[] list94 = {"AU", "OU", ""};\r
+ private static final String[] list95 = {"C", "X", ""};\r
+\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list96 = {\r
+// "ZO", "ZI", "ZA", ""\r
+// };\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @return\r
+ */\r
+ private final static boolean SlavoGermanic(String in) {\r
+ if ((in.indexOf("W") > -1) || (in.indexOf("K") > -1) || (in.indexOf("CZ") > -1) || (in.indexOf("WITZ") > -1))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param main\r
+ */\r
+ private final static void MetaphAdd(StringBuffer primary, String main) {\r
+ if (main != null) {\r
+ primary.append(main);\r
+ }\r
+ }\r
+\r
+ private final static void MetaphAdd(StringBuffer primary, char main) {\r
+ primary.append(main);\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param at\r
+ * @return\r
+ */\r
+ private final static boolean isVowel(String in, int at, int length) {\r
+ if ((at < 0) || (at >= length))\r
+ return false;\r
+ char it = in.charAt(at);\r
+ if ((it == 'A') || (it == 'E') || (it == 'I') || (it == 'O') || (it == 'U') || (it == 'Y'))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param string\r
+ * @param start\r
+ * @param length\r
+ * @param list\r
+ * @return\r
+ */\r
+ private final static boolean stringAt(String string, int start, int length, String[] list) {\r
+ if ((start < 0) || (start >= string.length()) || list.length == 0)\r
+ return false;\r
+ String substr = string.substring(start, start + length);\r
+ for (int i = 0; i < list.length; i++) {\r
+ if (list[i].equals(substr))\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Take the given word, and return the best phonetic hash for it.\r
+ * Vowels are minimized as much as possible, and consenants\r
+ * that have similiar sounds are converted to the same consenant\r
+ * for example, 'v' and 'f' are both converted to 'f'\r
+ * @param word the texte to transform\r
+ * @return the result of the phonetic transformation\r
+ */\r
+ public final String transform(String word) {\r
+ StringBuffer primary = new StringBuffer(word.length() + 5);\r
+ String in = word.toUpperCase() + " ";\r
+ int current = 0;\r
+ int length = in.length();\r
+ if (length < 1)\r
+ return "";\r
+ int last = length - 1;\r
+ boolean isSlavoGermaic = SlavoGermanic(in);\r
+ if (stringAt(in, 0, 2, myList))\r
+ current += 1;\r
+ if (in.charAt(0) == 'X') {\r
+ MetaphAdd(primary, 'S');\r
+ current += 1;\r
+ }\r
+ while (current < length) {\r
+ switch (in.charAt(current)) {\r
+ case 'A':\r
+ case 'E':\r
+ case 'I':\r
+ case 'O':\r
+ case 'U':\r
+ case 'Y':\r
+ if (current == 0)\r
+ MetaphAdd(primary, 'A');\r
+ current += 1;\r
+ break;\r
+ case 'B':\r
+ MetaphAdd(primary, 'P');\r
+ if (in.charAt(current + 1) == 'B')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case '\u00C7':\r
+ MetaphAdd(primary, 'S');\r
+ current += 1;\r
+ break;\r
+ case 'C':\r
+ if ((current > 1) && !isVowel(in, current - 2, length) && stringAt(in, (current - 1), 3, list1) && (in.charAt(current + 2) != 'I') && (in.charAt(current + 2) != 'E') || stringAt(in, (current - 2), 6, list2)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, current, 6, list3)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 4, list4)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list5)) {\r
+ if ((current > 0) && stringAt(in, current, 4, list6)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, (current + 1), 5, list7) || stringAt(in, current + 1, 3, list8) && !stringAt(in, 0, 5, list9)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, 0, 4, list10) || stringAt(in, 0, 3, list11) || stringAt(in, current - 2, 6, list12) || stringAt(in, current + 2, 1, list13) || (stringAt(in, current - 1, 1, list14) || (current == 0)) && stringAt(in, current + 2, 1, list15)) {\r
+ MetaphAdd(primary, 'K');\r
+ } else {\r
+ if (current > 0) {\r
+ if (stringAt(in, 0, 2, list16))\r
+ MetaphAdd(primary, 'K');\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ } else {\r
+ MetaphAdd(primary, 'X');\r
+ }\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list17) && !stringAt(in, current, 4, list18)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list19)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list20) && !((current == 1) && in.charAt(0) == 'M')) {\r
+ if (stringAt(in, current + 2, 1, list21) && !stringAt(in, current + 2, 2, list22)) {\r
+ if (((current == 1) && (in.charAt(current - 1) == 'A')) || stringAt(in, (current - 1), 5, list23))\r
+ MetaphAdd(primary, "KS");\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if (stringAt(in, current, 2, list24)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ } else if (stringAt(in, current, 2, list25)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+\r
+ MetaphAdd(primary, 'K');\r
+ if (stringAt(in, current + 1, 2, list27))\r
+ current += 3;\r
+ else if (stringAt(in, current + 1, 1, list28) && !stringAt(in, current + 1, 2, list29))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'D':\r
+ if (stringAt(in, current, 2, list30)) {\r
+ if (stringAt(in, current + 2, 1, list31)) {\r
+ MetaphAdd(primary, 'J');\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, "TK");\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ MetaphAdd(primary, 'T');\r
+ if (stringAt(in, current, 2, list32)) {\r
+ current += 2;\r
+ } else {\r
+ current += 1;\r
+ }\r
+ break;\r
+ case 'F':\r
+ if (in.charAt(current + 1) == 'F')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'F');\r
+ break;\r
+ case 'G':\r
+ if (in.charAt(current + 1) == 'H') {\r
+ if ((current > 0) && !isVowel(in, current - 1, length)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (current < 3) {\r
+ if (current == 0) {\r
+ if (in.charAt(current + 2) == 'I')\r
+ MetaphAdd(primary, 'J');\r
+ else\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if ((current > 1) && stringAt(in, current - 2, 1, list33) || ((current > 2) && stringAt(in, current - 3, 1, list34)) || ((current > 3) && stringAt(in, current - 4, 1, list35))) {\r
+ current += 2;\r
+ break;\r
+ } else {\r
+ if ((current > 2) && (in.charAt(current - 1) == 'U') && stringAt(in, current - 3, 1, list36)) {\r
+ MetaphAdd(primary, 'F');\r
+ } else {\r
+ if ((current > 0) && (in.charAt(current - 1) != 'I'))\r
+ MetaphAdd(primary, 'K');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if (in.charAt(current + 1) == 'N') {\r
+ if ((current == 1) && isVowel(in, 0, length) && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "KN");\r
+ } else {\r
+ if (!stringAt(in, current + 2, 2, list37) && (in.charAt(current + 1) != 'Y') && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "N");\r
+ } else {\r
+ MetaphAdd(primary, "KN");\r
+ }\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 2, list38) && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "KL");\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && ((in.charAt(current + 1) == 'Y') || stringAt(in, current + 1, 2, list39))) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((stringAt(in, current + 1, 2, list40) || (in.charAt(current + 1) == 'Y')) && !stringAt(in, 0, 6, list41) && !stringAt(in, current - 1, 1, list42) && !stringAt(in, current - 1, 3, list43)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list44) || stringAt(in, current - 1, 4, list45)) {\r
+ if (stringAt(in, 0, 4, list46) || stringAt(in, 0, 3, list47) || stringAt(in, current + 1, 2, list48)) {\r
+ MetaphAdd(primary, 'K');\r
+ } else {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (in.charAt(current + 1) == 'G')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'H':\r
+ if (((current == 0) || isVowel(in, current - 1, length)) && isVowel(in, current + 1, length)) {\r
+ MetaphAdd(primary, 'H');\r
+ current += 2;\r
+ } else {\r
+ current += 1;\r
+ }\r
+ break;\r
+ case 'J':\r
+ if (stringAt(in, current, 4, list50) || stringAt(in, 0, 4, list51)) {\r
+ if ((current == 0) && (in.charAt(current + 4) == ' ') || stringAt(in, 0, 4, list52)) {\r
+ MetaphAdd(primary, 'H');\r
+ } else {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ current += 1;\r
+ break;\r
+ }\r
+ if ((current == 0) && !stringAt(in, current, 4, list53)) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (isVowel(in, current - 1, length) && !isSlavoGermaic && ((in.charAt(current + 1) == 'A') || in.charAt(current + 1) == 'O')) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (current == last) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (!stringAt(in, current + 1, 1, list54) && !stringAt(in, current - 1, 1, list55)) {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ }\r
+ }\r
+ }\r
+ if (in.charAt(current + 1) == 'J')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'K':\r
+ if (in.charAt(current + 1) == 'K')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'L':\r
+ if (in.charAt(current + 1) == 'L') {\r
+ if (((current == (length - 3)) && stringAt(in, current - 1, 4, list56)) || ((stringAt(in, last - 1, 2, list57) || stringAt(in, last, 1, list58)) && stringAt(in, current - 1, 4, list59))) {\r
+ MetaphAdd(primary, 'L');\r
+ current += 2;\r
+ break;\r
+ }\r
+ current += 2;\r
+ } else\r
+ current += 1;\r
+ MetaphAdd(primary, 'L');\r
+ break;\r
+ case 'M':\r
+ if ((stringAt(in, current - 1, 3, list60) && (((current + 1) == last) || stringAt(in, current + 2, 2, list61))) || (in.charAt(current + 1) == 'M'))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'M');\r
+ break;\r
+ case 'N':\r
+ if (in.charAt(current + 1) == 'N')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'N');\r
+ break;\r
+ case '\u00D1':\r
+ current += 1;\r
+ MetaphAdd(primary, 'N');\r
+ break;\r
+ case 'P':\r
+ if (in.charAt(current + 1) == 'N') {\r
+ MetaphAdd(primary, 'F');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list62))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'P');\r
+ break;\r
+ case 'Q':\r
+ if (in.charAt(current + 1) == 'Q')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'R':\r
+ if ((current == last) && !isSlavoGermaic && stringAt(in, current - 2, 2, list63) && !stringAt(in, current - 4, 2, list64)) {\r
+// MetaphAdd(primary, "");\r
+ } else\r
+ MetaphAdd(primary, 'R');\r
+ if (in.charAt(current + 1) == 'R')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'S':\r
+ if (stringAt(in, current - 1, 3, list65)) {\r
+ current += 1;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, current, 5, list66)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list67)) {\r
+ if (stringAt(in, current + 1, 4, list68))\r
+ MetaphAdd(primary, 'S');\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 3, list69) || stringAt(in, current, 4, list70)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (((current == 0) && stringAt(in, current + 1, 1, list71)) || stringAt(in, current + 1, 1, list72)) {\r
+ MetaphAdd(primary, 'S');\r
+ if (stringAt(in, current + 1, 1, list73))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list74)) {\r
+ if (in.charAt(current + 2) == 'H')\r
+ if (stringAt(in, current + 3, 2, list75)) {\r
+ if (stringAt(in, current + 3, 2, list76)) {\r
+ MetaphAdd(primary, "X");\r
+ } else {\r
+ MetaphAdd(primary, "SK");\r
+ }\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 2, 1, list77)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 3;\r
+ break;\r
+ }\r
+ MetaphAdd(primary, "SK");\r
+ current += 3;\r
+ break;\r
+ }\r
+ if ((current == last) && stringAt(in, current - 2, 2, list78)) {\r
+ //MetaphAdd(primary, "");\r
+ } else\r
+ MetaphAdd(primary, 'S');\r
+ if (stringAt(in, current + 1, 1, list79))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'T':\r
+ if (stringAt(in, current, 4, list80)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 3, list81)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list82) || stringAt(in, current, 3, list83)) {\r
+ if (stringAt(in, (current + 2), 2, list84) || stringAt(in, 0, 4, list85) || stringAt(in, 0, 3, list86)) {\r
+ MetaphAdd(primary, 'T');\r
+ } else {\r
+ MetaphAdd(primary, '0');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list87)) {\r
+ current += 2;\r
+ } else\r
+ current += 1;\r
+ MetaphAdd(primary, 'T');\r
+ break;\r
+ case 'V':\r
+ if (in.charAt(current + 1) == 'V')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'F');\r
+ break;\r
+ case 'W':\r
+ if (stringAt(in, current, 2, list88)) {\r
+ MetaphAdd(primary, 'R');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && (isVowel(in, current + 1, length) || stringAt(in, current, 2, list89))) {\r
+ MetaphAdd(primary, 'A');\r
+ }\r
+ if (((current == last) && isVowel(in, current - 1, length)) || stringAt(in, current - 1, 5, list90) || stringAt(in, 0, 3, list91)) {\r
+ MetaphAdd(primary, 'F');\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 4, list92)) {\r
+ MetaphAdd(primary, "TS");\r
+ current += 4;\r
+ break;\r
+ }\r
+ current += 1;\r
+ break;\r
+ case 'X':\r
+ if (!((current == last) && (stringAt(in, current - 3, 3, list93) || stringAt(in, current - 2, 2, list94))))\r
+ MetaphAdd(primary, "KS");\r
+ if (stringAt(in, current + 1, 1, list95))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'Z':\r
+ if (in.charAt(current + 1) == 'H') {\r
+ MetaphAdd(primary, 'J');\r
+ current += 2;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'S');\r
+ }\r
+ if (in.charAt(current + 1) == 'Z')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ default:\r
+ current += 1;\r
+ }\r
+ }\r
+ return primary.toString();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Transformator#getReplaceList()\r
+ */\r
+ public char[] getReplaceList() {\r
+ return replaceList;\r
+ }\r
+}\r
+\r
+\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.InputStreamReader;\r
+\r
+/**\r
+ * This class is based on Levenshtein Distance algorithms, and it calculates how similar two words are.\r
+ * If the words are identical, then the distance is 0. The more that the words have in common, the lower the distance value.\r
+ * The distance value is based on how many operations it takes to get from one word to the other. Possible operations are\r
+ * swapping characters, adding a character, deleting a character, and substituting a character.\r
+ * The resulting distance is the sum of these operations weighted by their cost, which can be set in the Configuration object.\r
+ * When there are multiple ways to convert one word into the other, the lowest cost distance is returned.\r
+ * <br/>\r
+ * Another way to think about this: what are the cheapest operations that would have to be done on the "original" word to end up\r
+ * with the "similar" word? Each operation has a cost, and these are added up to get the distance.\r
+ * <br/>\r
+ *\r
+ * @see com.swabunga.spell.engine.Configuration#COST_REMOVE_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_INSERT_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SUBST_CHARS\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SWAP_CHARS\r
+ *\r
+ */\r
+\r
+public class EditDistance {\r
+\r
+ /**\r
+ * Fetches the spell engine configuration properties.\r
+ */\r
+ public static Configuration config = Configuration.getConfiguration();\r
+\r
+ /**\r
+ * get the weights for each possible operation\r
+ */\r
+ static final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);\r
+ static final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);\r
+ static final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);\r
+ static final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);\r
+ static final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); \r
+\r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar) {\r
+ return getDistance(word,similar,null);\r
+ } \r
+ \r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar, int[][] matrix) {\r
+ /* JMH Again, there is no need to have a global class matrix variable\r
+ * in this class. I have removed it and made the getDistance static final\r
+ * DMV: I refactored this method to make it more efficient, more readable, and simpler.\r
+ * I also fixed a bug with how the distance was being calculated. You could get wrong\r
+ * distances if you compared ("abc" to "ab") depending on what you had setup your\r
+ * COST_REMOVE_CHAR and EDIT_INSERTION_COST values to - that is now fixed.\r
+ * WRS: I added a distance for case comparison, so a misspelling of "i" would be closer to "I" than\r
+ * to "a".\r
+ */\r
+\r
+ //Allocate memory outside of the loops. \r
+ int i;\r
+ int j;\r
+ int costOfSubst;\r
+ int costOfSwap;\r
+ int costOfDelete;\r
+ int costOfInsertion;\r
+ int costOfCaseChange;\r
+ \r
+ boolean isSwap;\r
+ char sourceChar = 0;\r
+ char otherChar = 0;\r
+ \r
+ int a_size = word.length() + 1;\r
+ int b_size = similar.length() + 1;\r
+ \r
+ \r
+ //Only allocate new memory if we need a bigger matrix. \r
+ if (matrix == null || matrix.length < a_size || matrix[0].length < b_size)\r
+ matrix = new int[a_size][b_size];\r
+ \r
+ matrix[0][0] = 0;\r
+\r
+ for (i = 1; i != a_size; ++i)\r
+ matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column\r
+\r
+ for (j = 1; j != b_size; ++j)\r
+ matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row\r
+\r
+ for (i = 1; i != a_size; ++i) {\r
+ sourceChar = word.charAt(i-1);\r
+ for (j = 1; j != b_size; ++j) {\r
+\r
+ otherChar = similar.charAt(j-1);\r
+ if (sourceChar == otherChar) {\r
+ matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up\r
+ continue;\r
+ }\r
+\r
+ costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];\r
+ //if needed, add up the cost of doing a swap\r
+ costOfSwap = Integer.MAX_VALUE;\r
+\r
+ isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 2) && word.charAt(i - 2) == otherChar;\r
+ if (isSwap)\r
+ costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];\r
+\r
+ costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];\r
+ costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];\r
+\r
+ costOfCaseChange = Integer.MAX_VALUE;\r
+ \r
+ if (equalIgnoreCase(sourceChar, otherChar))\r
+ costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];\r
+ \r
+ matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);\r
+ }\r
+ }\r
+\r
+ return matrix[a_size - 1][b_size - 1];\r
+ }\r
+\r
+ /**\r
+ * checks to see if the two charactors are equal ignoring case. \r
+ * @param ch1\r
+ * @param ch2\r
+ * @return boolean\r
+ */\r
+ private static boolean equalIgnoreCase(char ch1, char ch2) {\r
+ if (ch1 == ch2)\r
+ {\r
+ return true;\r
+ }\r
+ else\r
+ {\r
+ return (Character.toLowerCase(ch1) == Character.toLowerCase(ch2));\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * For debugging, this creates a string that represents the matrix. To read the matrix, look at any square. That is the cost to get from\r
+ * the partial letters along the top to the partial letters along the side.\r
+ * @param src - the source string that the matrix columns are based on\r
+ * @param dest - the dest string that the matrix rows are based on\r
+ * @param matrix - a two dimensional array of costs (distances)\r
+ * @return String\r
+ */\r
+ @SuppressWarnings("unused")\r
+static private String dumpMatrix(String src, String dest, int matrix[][]) {\r
+ StringBuffer s = new StringBuffer("");\r
+\r
+ int cols = matrix.length -1;\r
+ int rows = matrix[0].length -1;\r
+\r
+ for (int i = 0; i < cols + 1; i++) {\r
+ for (int j = 0; j < rows + 1; j++) {\r
+ if (i == 0 && j == 0) {\r
+ s.append("\n ");\r
+ continue;\r
+\r
+ }\r
+ if (i == 0) {\r
+ s.append("| ");\r
+ s.append(dest.charAt(j - 1));\r
+ continue;\r
+ }\r
+ if (j == 0) {\r
+ s.append(src.charAt(i - 1));\r
+ continue;\r
+ }\r
+ String num = Integer.toString(matrix[i - 1][j - 1]);\r
+ int padding = 4 - num.length();\r
+ s.append("|");\r
+ for (int k = 0; k < padding; k++)\r
+ s.append(' ');\r
+ s.append(num);\r
+ }\r
+ s.append('\n');\r
+ }\r
+ return s.toString();\r
+\r
+ }\r
+\r
+\r
+ static private int minimum(int a, int b, int c, int d, int e) {\r
+ int mi = a;\r
+ if (b < mi)\r
+ mi = b;\r
+ if (c < mi)\r
+ mi = c;\r
+ if (d < mi)\r
+ mi = d;\r
+ if (e < mi)\r
+ mi = e;\r
+\r
+ return mi;\r
+ }\r
+\r
+ /**\r
+ * For testing edit distances\r
+ * @param args an array of two strings we want to evaluate their distances.\r
+ * @throws java.lang.Exception when problems occurs during reading args.\r
+ */\r
+ public static void main(String[] args) throws Exception {\r
+ BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));\r
+ int[][] matrix = new int[0][0]; \r
+ while (true) {\r
+\r
+ String input1 = stdin.readLine();\r
+ if (input1 == null || input1.length() == 0)\r
+ break;\r
+\r
+ String input2 = stdin.readLine();\r
+ if (input2 == null || input2.length() == 0)\r
+ break;\r
+\r
+ System.out.println(EditDistance.getDistance(input1, input2,matrix));\r
+ }\r
+ System.out.println("done");\r
+ }\r
+}\r
+\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.util.HashMap;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * The SpellDictionary class holds the instance of the dictionary.\r
+ * <p>\r
+ * This class is thread safe. Derived classes should ensure that this preserved.\r
+ * </p>\r
+ * <p>\r
+ * There are many open source dictionary files. For just a few see:\r
+ * http://wordlist.sourceforge.net/\r
+ * </p>\r
+ * <p>\r
+ * This dictionary class reads words one per line. Make sure that your word list\r
+ * is formatted in this way (most are).\r
+ * </p>\r
+ */\r
+public class GenericSpellDictionary extends SpellDictionaryASpell {\r
+\r
+//tech_monkey: the alphabet / replace list stuff has been moved into the Transformator classes,\r
+//since they are so closely tied to how the phonetic transformations are done.\r
+// /**\r
+// * This replace list is used if no phonetic file is supplied or it doesn't\r
+// * contain the alphabet.\r
+// */\r
+// protected static final char[] englishAlphabet =\r
+\r
+\r
+ /** A field indicating the initial hash map capacity (16KB) for the main\r
+ * dictionary hash map. Interested to see what the performance of a\r
+ * smaller initial capacity is like.\r
+ */\r
+ private final static int INITIAL_CAPACITY = 16 * 1024;\r
+\r
+ /**\r
+ * The hashmap that contains the word dictionary. The map is hashed on the doublemeta\r
+ * code. The map entry contains a LinkedList of words that have the same double meta code.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected HashMap mainDictionary = new HashMap(INITIAL_CAPACITY);\r
+\r
+ /** Holds the dictionary file for appending*/\r
+ private File dictFile = null;\r
+\r
+\r
+ /**\r
+ * Dictionary constructor that uses the DoubleMeta class with the\r
+ * English alphabet.\r
+ * @param wordList The file containing dictionary as a words list.\r
+ * @throws java.io.FileNotFoundException when the words list file could not \r
+ * be located on the system.\r
+ * @throws java.io.IOException when problems occurs while reading the words \r
+ * list file\r
+ */\r
+ public GenericSpellDictionary(File wordList) throws FileNotFoundException, IOException {\r
+ this(wordList, (File) null);\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * If phonetic is null, then DoubleMeta is used with the English alphabet\r
+ * @param wordList The file containing dictionary as a words list.\r
+ * @param phonetic The file containing the phonetic transformation \r
+ * information.\r
+ * @throws java.io.FileNotFoundException when the words list or phonetic \r
+ * file could not be located on the system\r
+ * @throws java.io.IOException when problems occurs while reading the \r
+ * words list or phonetic file\r
+ */\r
+ public GenericSpellDictionary(File wordList, File phonetic) throws FileNotFoundException, IOException {\r
+\r
+ super(phonetic);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <p>This needs to be made thread safe (synchronized)</p>\r
+ * @param word The word to add to the dictionary\r
+ */\r
+ public void addWord(String word) {\r
+ putWord(word);\r
+ if (dictFile == null)\r
+ return;\r
+ try {\r
+ FileWriter w = new FileWriter(dictFile.toString(), true);\r
+ // Open with append.\r
+ w.write(word);\r
+ w.write("\n");\r
+ w.close();\r
+ } catch (IOException ex) {\r
+ System.out.println("Error writing to dictionary file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the dictionary from a word list file.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * This is a very slow function. On my machine it takes quite a while to\r
+ * load the data in. I suspect that we could speed this up quite allot.\r
+ */\r
+ protected void createDictionary(BufferedReader in) throws IOException {\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null) {\r
+ line = new String(line.toCharArray());\r
+ putWord(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word in the dictionary\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWord(String word) {\r
+ String code = getCode(word);\r
+ LinkedList list = (LinkedList) mainDictionary.get(code);\r
+ if (list != null) {\r
+ list.add(word);\r
+ } else {\r
+ list = new LinkedList();\r
+ list.add(word);\r
+ mainDictionary.put(code, list);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ * @param code The phonetic code we want to find words for\r
+ * @return the list of words having the same phonetic code\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+@Override\r
+public List getWords(String code) {\r
+ //Check the main dictionary.\r
+ List mainDictResult = (List) mainDictionary.get(code);\r
+ if (mainDictResult == null)\r
+ return new Vector();\r
+ return mainDictResult;\r
+ }\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ * @param word The word to checked in the dictionary\r
+ * @return indication if the word is in the dictionary\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+@Override\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileReader;\r
+import java.io.IOException;\r
+import java.io.InputStreamReader;\r
+import java.io.Reader;\r
+import java.util.HashMap;\r
+import java.util.Vector;\r
+\r
+import com.swabunga.util.StringUtility;\r
+\r
+/**\r
+ * A Generic implementation of a transformator takes an \r
+ * <a href="http://aspell.net/man-html/Phonetic-Code.html">\r
+ * aspell phonetics file</a> and constructs some sort of transformation \r
+ * table using the inner class TransformationRule.\r
+ * </p>\r
+ * Basically, each transformation rule represent a line in the phonetic file.\r
+ * One line contains two groups of characters separated by white space(s).\r
+ * The first group is the <em>match expression</em>. \r
+ * The <em>match expression</em> describe letters to associate with a syllable.\r
+ * The second group is the <em>replacement expression</em> giving the phonetic \r
+ * equivalent of the <em>match expression</em>.\r
+ *\r
+ * @see SpellDictionaryASpell SpellDictionaryASpell for information on getting\r
+ * phonetic files for aspell.\r
+ *\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r
+ */\r
+public class GenericTransformator implements Transformator {\r
+\r
+\r
+ /**\r
+ * This replace list is used if no phonetic file is supplied or it doesn't\r
+ * contain the alphabet.\r
+ */\r
+ private static final char[] defaultEnglishAlphabet = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'};\r
+\r
+ /**\r
+ * The alphabet start marker.\r
+ * @see GenericTransformator#KEYWORD_ALPHBET KEYWORD_ALPHBET\r
+ */\r
+ public static final char ALPHABET_START = '[';\r
+ /**\r
+ * The alphabet end marker.\r
+ * @see GenericTransformator#KEYWORD_ALPHBET KEYWORD_ALPHBET\r
+ */\r
+ public static final char ALPHABET_END = ']';\r
+ /**\r
+ * Phonetic file keyword indicating that a different alphabet is used \r
+ * for this language. The keyword must be followed an\r
+ * {@link GenericTransformator#ALPHABET_START ALPHABET_START} marker, \r
+ * a list of characters defining the alphabet and a\r
+ * {@link GenericTransformator#ALPHABET_END ALPHABET_END} marker.\r
+ */\r
+ public static final String KEYWORD_ALPHBET = "alphabet";\r
+ /**\r
+ * Phonetic file lines starting with the keywords are skipped. \r
+ * The key words are: version, followup, collapse_result.\r
+ * Comments, starting with '#', are also skipped to the end of line.\r
+ */\r
+ public static final String[] IGNORED_KEYWORDS = {"version", "followup", "collapse_result"};\r
+\r
+ /**\r
+ * Start a group of characters which can be appended to the match expression\r
+ * of the phonetic file.\r
+ */\r
+ public static final char STARTMULTI = '(';\r
+ /**\r
+ * End a group of characters which can be appended to the match expression\r
+ * of the phonetic file.\r
+ */\r
+ public static final char ENDMULTI = ')';\r
+ /**\r
+ * During phonetic transformation of a word each numeric character is\r
+ * replaced by this DIGITCODE.\r
+ */\r
+ public static final String DIGITCODE = "0";\r
+ /**\r
+ * Phonetic file character code indicating that the replace expression\r
+ * is empty.\r
+ */\r
+ public static final String REPLACEVOID = "_";\r
+\r
+ private Object[] ruleArray = null;\r
+ private char[] alphabetString = defaultEnglishAlphabet;\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic file\r
+ */\r
+ public GenericTransformator(File phonetic) throws IOException {\r
+ buildRules(new BufferedReader(new FileReader(phonetic)));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+\r
+ }\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell\r
+ * @param encoding the character set required\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic file\r
+ */\r
+ public GenericTransformator(File phonetic, String encoding) throws IOException {\r
+ buildRules(new BufferedReader(new InputStreamReader(new FileInputStream(phonetic), encoding)));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+ }\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell. The file is\r
+ * supplied as a reader.\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic information\r
+ */\r
+ public GenericTransformator(Reader phonetic) throws IOException {\r
+ buildRules(new BufferedReader(phonetic));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+ }\r
+\r
+ /**\r
+ * Goes through an alphabet and makes sure that only one of those letters\r
+ * that are coded equally will be in the replace list.\r
+ * In other words, it removes any letters in the alphabet\r
+ * that are redundant phonetically.\r
+ *\r
+ * This is done to improve speed in the getSuggestion method.\r
+ *\r
+ * @param alphabet The complete alphabet to wash.\r
+ * @return The washed alphabet to be used as replace list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private char[] washAlphabetIntoReplaceList(char[] alphabet) {\r
+\r
+ HashMap letters = new HashMap(alphabet.length);\r
+\r
+ for (char element : alphabet) {\r
+ String tmp = String.valueOf(element);\r
+ String code = transform(tmp);\r
+ if (!letters.containsKey(code)) {\r
+ letters.put(code, new Character(element));\r
+ }\r
+ }\r
+\r
+ Object[] tmpCharacters = letters.values().toArray();\r
+ char[] washedArray = new char[tmpCharacters.length];\r
+\r
+ for (int i = 0; i < tmpCharacters.length; i++) {\r
+ washedArray[i] = ((Character) tmpCharacters[i]).charValue();\r
+ }\r
+\r
+ return washedArray;\r
+ }\r
+\r
+\r
+ /**\r
+ * Takes out all single character replacements and put them in a char array.\r
+ * This array can later be used for adding or changing letters in getSuggestion().\r
+ * @return char[] An array of chars with replacements characters\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public char[] getCodeReplaceList() {\r
+ char[] replacements;\r
+ TransformationRule rule;\r
+ Vector tmp = new Vector();\r
+\r
+ if (ruleArray == null)\r
+ return null;\r
+ for (Object element : ruleArray) {\r
+ rule = (TransformationRule) element;\r
+ if (rule.getReplaceExp().length() == 1)\r
+ tmp.addElement(rule.getReplaceExp());\r
+ }\r
+ replacements = new char[tmp.size()];\r
+ for (int i = 0; i < tmp.size(); i++) {\r
+ replacements[i] = ((String) tmp.elementAt(i)).charAt(0);\r
+ }\r
+ return replacements;\r
+ }\r
+\r
+ /**\r
+ * Builds up an char array with the chars in the alphabet of the language as it was read from the\r
+ * alphabet tag in the phonetic file.\r
+ * @return char[] An array of chars representing the alphabet or null if no alphabet was available.\r
+ */\r
+ public char[] getReplaceList() {\r
+ return alphabetString;\r
+ }\r
+\r
+ /**\r
+ * Builds the phonetic code of the word.\r
+ * @param word the word to transform\r
+ * @return the phonetic transformation of the word\r
+ */\r
+ public String transform(String word) {\r
+\r
+ if (ruleArray == null)\r
+ return null;\r
+\r
+ TransformationRule rule;\r
+ StringBuffer str = new StringBuffer(word.toUpperCase());\r
+ int strLength = str.length();\r
+ int startPos = 0, add = 1;\r
+\r
+ while (startPos < strLength) {\r
+\r
+ add = 1;\r
+ if (Character.isDigit(str.charAt(startPos))) {\r
+ StringUtility.replace(str, startPos, startPos + DIGITCODE.length(), DIGITCODE);\r
+ startPos += add;\r
+ continue;\r
+ }\r
+\r
+ for (Object element : ruleArray) {\r
+ //System.out.println("Testing rule#:"+i);\r
+ rule = (TransformationRule) element;\r
+ if (rule.startsWithExp() && startPos > 0)\r
+ continue;\r
+ if (startPos + rule.lengthOfMatch() > strLength) {\r
+ continue;\r
+ }\r
+ if (rule.isMatching(str, startPos)) {\r
+ String replaceExp = rule.getReplaceExp();\r
+\r
+ add = replaceExp.length();\r
+ StringUtility.replace(str, startPos, startPos + rule.getTakeOut(), replaceExp);\r
+ strLength -= rule.getTakeOut();\r
+ strLength += add;\r
+ //System.out.println("Replacing with rule#:"+i+" add="+add);\r
+ break;\r
+ }\r
+ }\r
+ startPos += add;\r
+ }\r
+ //System.out.println(word);\r
+ //System.out.println(str.toString());\r
+ return str.toString();\r
+ }\r
+\r
+ // Used to build up the transformastion table.\r
+ @SuppressWarnings("unchecked")\r
+private void buildRules(BufferedReader in) throws IOException {\r
+ String read = null;\r
+ Vector ruleList = new Vector();\r
+ while ((read = in.readLine()) != null) {\r
+ buildRule(realTrimmer(read), ruleList);\r
+ }\r
+ ruleArray = new TransformationRule[ruleList.size()];\r
+ ruleList.copyInto(ruleArray);\r
+ }\r
+\r
+ // Here is where the real work of reading the phonetics file is done.\r
+ @SuppressWarnings("unchecked")\r
+private void buildRule(String str, Vector ruleList) {\r
+ if (str.length() < 1)\r
+ return;\r
+ for (String element : IGNORED_KEYWORDS) {\r
+ if (str.startsWith(element))\r
+ return;\r
+ }\r
+\r
+ // A different alphabet is used for this language, will be read into\r
+ // the alphabetString variable.\r
+ if (str.startsWith(KEYWORD_ALPHBET)) {\r
+ int start = str.indexOf(ALPHABET_START);\r
+ int end = str.lastIndexOf(ALPHABET_END);\r
+ if (end != -1 && start != -1) {\r
+ alphabetString = str.substring(++start, end).toCharArray();\r
+ }\r
+ return;\r
+ }\r
+\r
+ // str contains two groups of characters separated by white space(s).\r
+ // The fisrt group is the "match expression". The second group is the \r
+ // "replacement expression" giving the phonetic equivalent of the \r
+ // "match expression".\r
+ TransformationRule rule = null;\r
+ StringBuffer matchExp = new StringBuffer();\r
+ StringBuffer replaceExp = new StringBuffer();\r
+ boolean start = false,\r
+ end = false;\r
+ int takeOutPart = 0,\r
+ matchLength = 0;\r
+ boolean match = true,\r
+ inMulti = false;\r
+ for (int i = 0; i < str.length(); i++) {\r
+ if (Character.isWhitespace(str.charAt(i))) {\r
+ match = false;\r
+ } else {\r
+ if (match) {\r
+ if (!isReservedChar(str.charAt(i))) {\r
+ matchExp.append(str.charAt(i));\r
+ if (!inMulti) {\r
+ takeOutPart++;\r
+ matchLength++;\r
+ }\r
+ if (str.charAt(i) == STARTMULTI || str.charAt(i) == ENDMULTI)\r
+ inMulti = !inMulti;\r
+ }\r
+ if (str.charAt(i) == '-')\r
+ takeOutPart--;\r
+ if (str.charAt(i) == '^')\r
+ start = true;\r
+ if (str.charAt(i) == '$')\r
+ end = true;\r
+ } else {\r
+ replaceExp.append(str.charAt(i));\r
+ }\r
+ }\r
+ }\r
+ if (replaceExp.toString().equals(REPLACEVOID)) {\r
+ replaceExp = new StringBuffer("");\r
+ //System.out.println("Changing _ to \"\" for "+matchExp.toString());\r
+ }\r
+ rule = new TransformationRule(matchExp.toString(), replaceExp.toString(), takeOutPart, matchLength, start, end);\r
+ //System.out.println(rule.toString());\r
+ ruleList.addElement(rule);\r
+ }\r
+\r
+ // Chars with special meaning to aspell. Not everyone is implemented here.\r
+ private boolean isReservedChar(char ch) {\r
+ if (ch == '<' || ch == '>' || ch == '^' || ch == '$' || ch == '-' || Character.isDigit(ch))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ // Trims off everything we don't care about.\r
+ private String realTrimmer(String row) {\r
+ int pos = row.indexOf('#');\r
+ if (pos != -1) {\r
+ row = row.substring(0, pos);\r
+ }\r
+ return row.trim();\r
+ }\r
+\r
+ // Inner Classes\r
+ /*\r
+ * Holds the match string and the replace string and all the rule attributes.\r
+ * Is responsible for indicating matches.\r
+ */\r
+ private class TransformationRule {\r
+\r
+ private final String replace;\r
+ private final char[] match;\r
+ // takeOut=number of chars to replace;\r
+ // matchLength=length of matching string counting multies as one.\r
+ private final int takeOut, matchLength;\r
+ private final boolean start, end;\r
+\r
+ // Construktor\r
+ public TransformationRule(String match, String replace, int takeout, int matchLength, boolean start, boolean end) {\r
+ this.match = match.toCharArray();\r
+ this.replace = replace;\r
+ this.takeOut = takeout;\r
+ this.matchLength = matchLength;\r
+ this.start = start;\r
+ this.end = end;\r
+ }\r
+\r
+ /*\r
+ * Returns true if word from pos and forward matches the match string.\r
+ * Precondition: wordPos+matchLength<word.length()\r
+ */\r
+ public boolean isMatching(StringBuffer word, int wordPos) {\r
+ boolean matching = true, inMulti = false, multiMatch = false;\r
+ char matchCh;\r
+\r
+ for (char element : match) {\r
+ matchCh = element;\r
+ if (matchCh == STARTMULTI || matchCh == ENDMULTI) {\r
+ inMulti = !inMulti;\r
+ if (!inMulti)\r
+ matching = matching & multiMatch;\r
+ else\r
+ multiMatch = false;\r
+ } else {\r
+ if (matchCh != word.charAt(wordPos)) {\r
+ if (inMulti)\r
+ multiMatch = multiMatch | false;\r
+ else\r
+ matching = false;\r
+ } else {\r
+ if (inMulti)\r
+ multiMatch = multiMatch | true;\r
+ else\r
+ matching = true;\r
+ }\r
+ if (!inMulti)\r
+ wordPos++;\r
+ if (!matching)\r
+ break;\r
+ }\r
+ }\r
+ if (end && wordPos != word.length())\r
+ matching = false;\r
+ return matching;\r
+ }\r
+\r
+ public String getReplaceExp() {\r
+ return replace;\r
+ }\r
+\r
+ public int getTakeOut() {\r
+ return takeOut;\r
+ }\r
+\r
+ public boolean startsWithExp() {\r
+ return start;\r
+ }\r
+\r
+ public int lengthOfMatch() {\r
+ return matchLength;\r
+ }\r
+\r
+ // Just for debugging purposes.\r
+ @Override\r
+ public String toString() {\r
+ return "Match:" + String.valueOf(match) + " Replace:" + replace + " TakeOut:" + takeOut + " MatchLength:" + matchLength + " Start:" + start + " End:" + end;\r
+ }\r
+\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.*;\r
+import java.net.URL;\r
+import java.util.Properties;\r
+\r
+\r
+/**\r
+ * Implementation class to read the properties controlling the spell engine. \r
+ * The properties are read form the <code>configuration.properties</code> file.\r
+ *\r
+ * @author aim4min\r
+ */\r
+public class PropertyConfiguration extends Configuration {\r
+\r
+ /**\r
+ * The persistent set of properties supported by the spell engine\r
+ */\r
+ public Properties prop;\r
+ /**\r
+ * The name of the file containing spell engine properties\r
+ */\r
+ public URL filename;\r
+\r
+ /**\r
+ * Constructs and loads spell engine properties configuration.\r
+ */\r
+ public PropertyConfiguration() {\r
+ prop = new Properties();\r
+ try {\r
+ filename = getClass().getClassLoader().getResource("com/swabunga/spell/engine/configuration.properties");\r
+ InputStream in = filename.openStream();\r
+ prop.load(in);\r
+ } catch (Exception e) {\r
+ System.out.println("Could not load Properties file :\n" + e);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#getBoolean(String)\r
+ */\r
+ public boolean getBoolean(String key) {\r
+ return new Boolean(prop.getProperty(key)).booleanValue();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#getInteger(String)\r
+ */\r
+ public int getInteger(String key) {\r
+ return new Integer(prop.getProperty(key)).intValue();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#setBoolean(String, boolean)\r
+ */\r
+ public void setBoolean(String key, boolean value) {\r
+ String string = null;\r
+ if (value)\r
+ string = "true";\r
+ else\r
+ string = "false";\r
+\r
+ prop.setProperty(key, string);\r
+ save();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#setInteger(String, int)\r
+ */\r
+ public void setInteger(String key, int value) {\r
+ prop.setProperty(key, Integer.toString(value));\r
+ save();\r
+ }\r
+\r
+ /**\r
+ * Writes the property list (key and element pairs) in the \r
+ * PropertyConfiguration file.\r
+ */\r
+ public void save() {\r
+ try {\r
+ File file = new File(filename.getFile());\r
+ FileOutputStream fout = new FileOutputStream(file);\r
+ prop.store(fout, "HEADER");\r
+ } catch (FileNotFoundException e) {\r
+ } catch (IOException e) {\r
+ }\r
+ }\r
+\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.util.List;\r
+\r
+/**\r
+ * An interface for all dictionary implementations. It defines the most basic\r
+ * operations on a dictionary: adding words, checking if a word is correct, and getting a list\r
+ * of suggestions for misspelled words.\r
+ */\r
+public interface SpellDictionary {\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary.\r
+ * @param word The word to add to the dictionary\r
+ */\r
+ public void addWord(String word);\r
+\r
+ /**\r
+ * Evaluates if the word is correctly spelled against the dictionary.\r
+ * @param word The word to verify if it's spelling is OK.\r
+ * @return Indicates if the word is present in the dictionary.\r
+ */\r
+ public boolean isCorrect(String word);\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to any word.\r
+ * If the word is correctly spelled, then this method\r
+ * could return just that one word, or it could still return a list\r
+ * of words with similar spellings.\r
+ * <br/>\r
+ * Each suggested word has a score, which is an integer\r
+ * that represents how different the suggested word is from the sourceWord.\r
+ * If the words are the exactly the same, then the score is 0.\r
+ * You can get the dictionary to only return the most similar words by setting\r
+ * an appropriately low threshold value.\r
+ * If you set the threshold value too low, you may get no suggestions for a given word.\r
+ * <p>\r
+ * This method is only needed to provide backward compatibility. \r
+ * @see #getSuggestions(String, int, int[][])\r
+ * \r
+ * @param sourceWord the string that we want to get a list of spelling suggestions for\r
+ * @param scoreThreshold Any words that have score less than this number are returned.\r
+ * @return List a List of suggested words\r
+ * @see com.swabunga.spell.engine.Word\r
+ * \r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String sourceWord, int scoreThreshold);\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to any word.\r
+ * If the word is correctly spelled, then this method\r
+ * could return just that one word, or it could still return a list\r
+ * of words with similar spellings.\r
+ * <br/>\r
+ * Each suggested word has a score, which is an integer\r
+ * that represents how different the suggested word is from the sourceWord.\r
+ * If the words are the exactly the same, then the score is 0.\r
+ * You can get the dictionary to only return the most similar words by setting\r
+ * an appropriately low threshold value.\r
+ * If you set the threshold value too low, you may get no suggestions for a given word.\r
+ * <p>\r
+ * @param sourceWord the string that we want to get a list of spelling suggestions for\r
+ * @param scoreThreshold Any words that have score less than this number are returned.\r
+ * @param Two dimensional int array used to calculate edit distance. Allocating \r
+ * this memory outside of the function will greatly improve efficiency. \r
+ * @return List a List of suggested words\r
+ * @see com.swabunga.spell.engine.Word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String sourceWord, int scoreThreshold , int[][] matrix);\r
+\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/* Created by bgalbs on Jan 30, 2003 at 11:45:25 PM */\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.File;\r
+import java.io.IOException;\r
+import java.io.Reader;\r
+import java.security.InvalidParameterException;\r
+import java.util.Collections;\r
+import java.util.Enumeration;\r
+import java.util.Hashtable;\r
+import java.util.Iterator;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * Container for various methods that any <code>SpellDictionary</code> will use.\r
+ * This class is based on the original Jazzy aspell port.\r
+ * <p/>\r
+ * Derived classes will need words list files as spell checking reference. \r
+ * Words list file is a dictionary with one word per line. There are many \r
+ * open source dictionary files, see: \r
+ * <a href="http://wordlist.sourceforge.net/">\r
+ * http://wordlist.sourceforge.net/</a>\r
+ * <p/>\r
+ * You can choose words lists form <a href="http://aspell.net/">aspell</a> \r
+ * many differents languages dictionaries. To grab some, install \r
+ * <code>aspell</code> and the dictionaries you require. Then run aspell \r
+ * specifying the name of the dictionary and the words list file to dump it \r
+ * into, for example:\r
+ * <pre>\r
+ * aspell --master=fr-40 dump master > fr-40.txt\r
+ * </pre>\r
+ * Note: the number following the language is the size indicator. A bigger\r
+ * number gives a more extensive language coverage. Size 40 is more than \r
+ * adequate for many usages.\r
+ * <p/>\r
+ * For some languages, Aspell can also supply you with the phonetic file. \r
+ * On Windows, go into aspell <code>data</code> directory and copy the \r
+ * phonetic file corresponding to your language, for example the \r
+ * <code>fr_phonet.dat</code> for the <code>fr</code> language. The phonetic\r
+ * file should be in directory <code>/usr/share/aspell</code> on Unix.\r
+ *\r
+ * @see GenericTransformator GenericTransformator for information on \r
+ * phonetic files.\r
+ */\r
+public abstract class SpellDictionaryASpell implements SpellDictionary {\r
+\r
+\r
+ /** The reference to a Transformator, used to transform a word into it's phonetic code. */\r
+ protected Transformator tf;\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(File phonetic) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic);\r
+ }\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(File phonetic, String encoding) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic, encoding);\r
+ }\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The Reader to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(Reader phonetic) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic);\r
+ }\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to an\r
+ * incorrect word. \r
+ * <p>\r
+ * This method is only needed to provide backward compatibility.\r
+ * @see #getSuggestions(String, int, int[][])\r
+ * @param word Suggestions for given misspelt word\r
+ * @param threshold The lower boundary of similarity to misspelt word\r
+ * @return Vector a List of suggestions\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String word, int threshold) {\r
+ \r
+ return getSuggestions(word,threshold,null);\r
+ \r
+ }\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to an\r
+ * incorrect word.\r
+ * <p>\r
+ * @param word Suggestions for given misspelt word\r
+ * @param threshold The lower boundary of similarity to misspelt word\r
+ * @param matrix Two dimensional int array used to calculate\r
+ * edit distance. Allocating this memory outside of the function will greatly improve efficiency. \r
+ * @return Vector a List of suggestions\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String word, int threshold, int[][] matrix) {\r
+\r
+ int i;\r
+ int j;\r
+ \r
+ if(matrix == null)\r
+ matrix = new int[0][0];\r
+ \r
+ Hashtable nearmisscodes = new Hashtable();\r
+ String code = getCode(word);\r
+\r
+ // add all words that have the same phonetics\r
+ nearmisscodes.put(code, code);\r
+ Vector phoneticList = getWordsFromCode(word, nearmisscodes);\r
+\r
+ // do some tranformations to pick up more results\r
+ //interchange\r
+ nearmisscodes = new Hashtable();\r
+ char[] charArray = word.toCharArray();\r
+ char a;\r
+ char b ;\r
+ \r
+ for (i = 0; i < word.length() - 1; i++) {\r
+ a = charArray[i];\r
+ b = charArray[i + 1];\r
+ charArray[i] = b;\r
+ charArray[i + 1] = a;\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ charArray[i] = a;\r
+ charArray[i + 1] = b;\r
+ }\r
+\r
+ char[] replacelist = tf.getReplaceList();\r
+\r
+ //change\r
+ charArray = word.toCharArray();\r
+ char original; \r
+ for (i = 0; i < word.length(); i++) {\r
+ original = charArray[i];\r
+ for (j = 0; j < replacelist.length; j++) {\r
+ charArray[i] = replacelist[j];\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ }\r
+ charArray[i] = original;\r
+ }\r
+\r
+ //add\r
+ charArray = (word += " ").toCharArray();\r
+ int iy = charArray.length - 1;\r
+ while (true) {\r
+ for (j = 0; j < replacelist.length; j++) {\r
+ charArray[iy] = replacelist[j];\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ }\r
+ if (iy == 0)\r
+ break;\r
+ charArray[iy] = charArray[iy - 1];\r
+ --iy;\r
+ }\r
+\r
+ //delete\r
+ word = word.trim();\r
+ charArray = word.toCharArray();\r
+ char[] charArray2 = new char[charArray.length - 1];\r
+ for (int ix = 0; ix < charArray2.length; ix++) {\r
+ charArray2[ix] = charArray[ix];\r
+ }\r
+ \r
+ a = charArray[charArray.length - 1];\r
+ int ii = charArray2.length;\r
+ while (true) {\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ if (ii == 0)\r
+ break;\r
+ b = a;\r
+ a = charArray2[ii - 1];\r
+ charArray2[ii - 1] = b;\r
+ --ii;\r
+ }\r
+\r
+ nearmisscodes.remove(code); //already accounted for in phoneticList\r
+\r
+ Vector wordlist = getWordsFromCode(word, nearmisscodes);\r
+\r
+ if (wordlist.size() == 0 && phoneticList.size() == 0)\r
+ addBestGuess(word, phoneticList, matrix);\r
+\r
+\r
+ // We sort a Vector at the end instead of maintaining a\r
+ // continously sorted TreeSet because everytime you add a collection\r
+ // to a treeset it has to be resorted. It's better to do this operation\r
+ // once at the end.\r
+\r
+ Collections.sort(phoneticList, new Word()); //always sort phonetic matches along the top\r
+ Collections.sort(wordlist, new Word()); //the non-phonetic matches can be listed below\r
+\r
+ phoneticList.addAll(wordlist);\r
+ return phoneticList;\r
+ }\r
+\r
+ /**\r
+ * When we don't come up with any suggestions (probably because the threshold was too strict),\r
+ * then pick the best guesses from the those words that have the same phonetic code.\r
+ * <p>\r
+ * This method is only needed to provide backward compatibility.\r
+ * @see addBestGuess(String word, Vector wordList, int[][] matrix)\r
+ * @param word - the word we are trying spell correct\r
+ * @param wordList - the linked list that will get the best guess\r
+ */\r
+ @SuppressWarnings({ "unused", "unchecked" })\r
+private void addBestGuess(String word, Vector wordList) {\r
+ addBestGuess(word,wordList,null);\r
+ }\r
+ \r
+ /**\r
+ * When we don't come up with any suggestions (probably because the threshold was too strict),\r
+ * then pick the best guesses from the those words that have the same phonetic code.\r
+ * @param word - the word we are trying spell correct\r
+ * @param Two dimensional array of int used to calculate \r
+ * edit distance. Allocating this memory outside of the function will greatly improve efficiency. \r
+ * @param wordList - the linked list that will get the best guess\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private void addBestGuess(String word, Vector wordList, int[][] matrix) {\r
+ if(matrix == null)\r
+ matrix = new int[0][0];\r
+ \r
+ if (wordList.size() != 0)\r
+ throw new InvalidParameterException("the wordList vector must be empty");\r
+\r
+ int bestScore = Integer.MAX_VALUE;\r
+ \r
+ String code = getCode(word);\r
+ List simwordlist = getWords(code);\r
+\r
+ LinkedList candidates = new LinkedList();\r
+\r
+ for (Iterator j = simwordlist.iterator(); j.hasNext();) {\r
+ String similar = (String) j.next();\r
+ int distance = EditDistance.getDistance(word, similar, matrix);\r
+ if (distance <= bestScore) {\r
+ bestScore = distance;\r
+ Word goodGuess = new Word(similar, distance);\r
+ candidates.add(goodGuess);\r
+ }\r
+ }\r
+\r
+ //now, only pull out the guesses that had the best score\r
+ for (Iterator iter = candidates.iterator(); iter.hasNext();) {\r
+ Word candidate = (Word) iter.next();\r
+ if (candidate.getCost() == bestScore)\r
+ wordList.add(candidate);\r
+ }\r
+\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private Vector getWordsFromCode(String word, Hashtable codes) {\r
+ Configuration config = Configuration.getConfiguration();\r
+ Vector result = new Vector();\r
+ int[][] matrix = new int[0][0]; \r
+ final int configDistance = config.getInteger(Configuration.SPELL_THRESHOLD);\r
+\r
+ for (Enumeration i = codes.keys(); i.hasMoreElements();) {\r
+ String code = (String) i.nextElement();\r
+\r
+ List simwordlist = getWords(code);\r
+ for (Iterator iter = simwordlist.iterator(); iter.hasNext();) {\r
+ String similar = (String) iter.next();\r
+ int distance = EditDistance.getDistance(word, similar, matrix);\r
+ if (distance < configDistance) {\r
+ Word w = new Word(similar, distance);\r
+ result.addElement(w);\r
+ }\r
+ }\r
+ }\r
+ return result;\r
+ }\r
+\r
+ /**\r
+ * Returns the phonetic code representing the word.\r
+ * @param word The word we want the phonetic code.\r
+ * @return The value of the phonetic code for the word.\r
+ */\r
+ public String getCode(String word) {\r
+ return tf.transform(word);\r
+ }\r
+\r
+ /**\r
+ * Returns a list of words that have the same phonetic code.\r
+ * @param phoneticCode The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected abstract List getWords(String phoneticCode);\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\rJazzy - a Java library for Spell Checking\rCopyright (C) 2001 Mindaugas Idzelis\rFull text of license can be found in LICENSE.txt\r\rThis library is free software; you can redistribute it and/or\rmodify it under the terms of the GNU Lesser General Public\rLicense as published by the Free Software Foundation; either\rversion 2.1 of the License, or (at your option) any later version.\r\rThis library is distributed in the hope that it will be useful,\rbut WITHOUT ANY WARRANTY; without even the implied warranty of\rMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\rLesser General Public License for more details.\r\rYou should have received a copy of the GNU Lesser General Public\rLicense along with this library; if not, write to the Free Software\rFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r*/\rpackage com.swabunga.spell.engine;\r\rimport java.io.File;\rimport java.io.FileInputStream;\rimport java.io.FileNotFoundException;\rimport java.io.FileOutputStream;\rimport java.io.IOException;\rimport java.io.ObjectInputStream;\rimport java.io.ObjectOutputStream;\rimport java.io.Serializable;\rimport java.util.HashMap;\rimport java.util.Iterator;\rimport java.util.List;\r\r/**\r * Yet another <code>SpellDictionary</code> this one is based on Damien Guillaume's\r * Diskbased dictionary but adds a cache to try to improve abit on performance.\r *\r * @author Robert Gustavsson\r * @version 0.01\r */\r\rpublic class SpellDictionaryCachedDichoDisk extends SpellDictionaryDichoDisk {\r \r // Only used for testing to measure the effectiveness of the cache.\r static public int hits=0;\r static public int codes=0;\r\r public static final String JAZZY_DIR=".jazzy";\r public static final String PRE_CACHE_FILE_EXT=".pre";\r\r private static int MAX_CACHED=10000;\r\r @SuppressWarnings("unchecked")\r private final HashMap suggestionCache=new HashMap(MAX_CACHED);\r private String preCacheFileName;\r private String preCacheDir;\r\r /**\r * Dictionary Convienence Constructor.\r */\r public SpellDictionaryCachedDichoDisk(File wordList)\r throws FileNotFoundException, IOException {\r super(wordList);\r loadPreCache(wordList);\r }\r \r /**\r * Dictionary Convienence Constructor.\r */\r public SpellDictionaryCachedDichoDisk(File wordList, String encoding)\r throws FileNotFoundException, IOException {\r super(wordList, encoding);\r loadPreCache(wordList);\r }\r\r /**\r * Dictionary constructor that uses an aspell phonetic file to\r * build the transformation table.\r */\r\r public SpellDictionaryCachedDichoDisk(File wordList, File phonetic)\r throws FileNotFoundException, IOException {\r super(wordList, phonetic);\r loadPreCache(wordList);\r }\r\r /**\r * Dictionary constructor that uses an aspell phonetic file to\r * build the transformation table.\r */\r public SpellDictionaryCachedDichoDisk(File wordList, File phonetic, String encoding)\r throws FileNotFoundException, IOException {\r super(wordList, phonetic, encoding);\r loadPreCache(wordList);\r }\r\r /**\r * Add a word permanantly to the dictionary (and the dictionary file).\r * <i>not implemented !</i>\r */\r @Override\r public void addWord(String word) {\r System.err.println("error: addWord is not implemented for SpellDictionaryCachedDichoDisk");\r }\r\r /**\r * Clears the cache.\r */\r public void clearCache(){\r suggestionCache.clear();\r }\r\r /**\r * Returns a list of strings (words) for the code.\r */\r @Override\r @SuppressWarnings("unchecked")\r public List getWords(String code) {\r List list;\r codes++;\r if(suggestionCache.containsKey(code)){\r hits++;\r list=getCachedList(code);\r return list;\r }\r list=super.getWords(code);\r addToCache(code,list);\r \r return list;\r }\r /**\r * This method returns the cached suggestionlist and also moves the code to\r * the top of the codeRefQueue to indicate this code has resentlly been\r * referenced.\r */\r @SuppressWarnings("unchecked")\r private List getCachedList(String code){\r CacheObject obj=(CacheObject)suggestionCache.get(code);\r obj.setRefTime();\r return obj.getSuggestionList();\r }\r\r /**\r * Adds a code and it's suggestion list to the cache.\r */\r @SuppressWarnings("unchecked")\r private void addToCache(String code, List l){\r String c=null;\r String lowestCode=null;\r long lowestTime=Long.MAX_VALUE;\r Iterator it;\r CacheObject obj;\r\r if(suggestionCache.size()>=MAX_CACHED){\r it=suggestionCache.keySet().iterator();\r while(it.hasNext()){\r c=(String)it.next();\r obj=(CacheObject)suggestionCache.get(c);\r if(obj.getRefTime()==0){\r lowestCode=c;\r break;\r }\r if(lowestTime>obj.getRefTime()){\r lowestCode=c;\r lowestTime=obj.getRefTime();\r }\r }\r suggestionCache.remove(lowestCode);\r } \r suggestionCache.put(code,new CacheObject(l));\r }\r\r /**\r * Load the cache from file. The cach file has the same name as the \r * dico file with the .pre extension added.\r */\r @SuppressWarnings("unchecked")\r private void loadPreCache(File dicoFile)throws IOException{\r String code;\r List suggestions;\r long size,\r time;\r File preFile;\r ObjectInputStream in;\r\r preCacheDir=System.getProperty("user.home")+"/"+JAZZY_DIR;\r preCacheFileName=preCacheDir+"/"+dicoFile.getName()+PRE_CACHE_FILE_EXT;\r //System.out.println(preCacheFileName);\r preFile=new File(preCacheFileName);\r if(!preFile.exists()){\r System.err.println("No precache file");\r return;\r }\r //System.out.println("Precaching...");\r in=new ObjectInputStream(new FileInputStream(preFile));\r try{\r size=in.readLong();\r for(int i=0;i<size;i++){\r code=(String)in.readObject();\r time=in.readLong();\r suggestions=(List)in.readObject();\r suggestionCache.put(code,new CacheObject(suggestions,time));\r }\r }catch(ClassNotFoundException ex){\r System.out.println(ex.getMessage());\r }\r in.close();\r }\r\r /**\r * Saves the current cache to file.\r */\r @SuppressWarnings("unchecked")\r public void saveCache() throws IOException{\r String code;\r CacheObject obj;\r File preFile,\r preDir;\r ObjectOutputStream out;\r Iterator it;\r\r if(preCacheFileName==null || preCacheDir==null){\r System.err.println("Precache filename has not been set.");\r return;\r }\r //System.out.println("Saving cache to precache file...");\r preDir=new File(preCacheDir);\r if(!preDir.exists())\r preDir.mkdir();\r preFile=new File(preCacheFileName);\r out=new ObjectOutputStream(new FileOutputStream(preFile));\r it=suggestionCache.keySet().iterator();\r out.writeLong(suggestionCache.size());\r while(it.hasNext()){\r code=(String)it.next();\r obj=(CacheObject)suggestionCache.get(code);\r out.writeObject(code);\r out.writeLong(obj.getRefTime());\r out.writeObject(obj.getSuggestionList());\r }\r out.close();\r }\r\r // INNER CLASSES\r // ------------------------------------------------------------------------\r @SuppressWarnings("serial")\r private class CacheObject implements Serializable{\r \r @SuppressWarnings("unchecked")\r private List suggestions=null;\r private long refTime=0;\r\r @SuppressWarnings("unchecked")\r public CacheObject(List list){\r this.suggestions=list;\r }\r\r @SuppressWarnings("unchecked")\r public CacheObject(List list, long time){\r this.suggestions=list;\r this.refTime=time;\r }\r \r @SuppressWarnings("unchecked")\r public List getSuggestionList(){\r return suggestions;\r }\r\r public void setRefTime(){\r refTime=System.currentTimeMillis();\r }\r\r public long getRefTime(){\r return refTime;\r }\r }\r}\r
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.EOFException;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.io.RandomAccessFile;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+\r
+/**\r
+ * Another implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge\r
+ * footprint of <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version\r
+ * of this class that implements some caching strategies might be a good idea in the future, if there's any\r
+ * demand for it.\r
+ *\r
+ * This implementation requires a special dictionary file, with "code*word" lines sorted by code.\r
+ * It's using a dichotomy algorithm to search for words in the dictionary\r
+ *\r
+ * @author Damien Guillaume\r
+ * @version 0.1\r
+ */\r
+public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {\r
+\r
+ /** Holds the dictionary file for reading*/\r
+ private RandomAccessFile dictFile = null;\r
+\r
+ /** dictionary and phonetic file encoding */\r
+ private String encoding = null;\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList)\r
+ throws FileNotFoundException, IOException {\r
+ super((File) null);\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, String encoding)\r
+ throws FileNotFoundException, IOException {\r
+ super((File) null);\r
+ this.encoding = encoding;\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, File phonetic)\r
+ throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+ \r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding)\r
+ throws FileNotFoundException, IOException {\r
+ super(phonetic, encoding);\r
+ this.encoding = encoding;\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+ \r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <i>not implemented !</i>\r
+ * @param word The word to add.\r
+ */\r
+ public void addWord(String word) {\r
+ System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk");\r
+ }\r
+\r
+ /**\r
+ * Search the dictionary file for the words corresponding to the code\r
+ * within positions p1 - p2\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private LinkedList dichoFind(String code, long p1, long p2) throws IOException {\r
+ //System.out.println("dichoFind("+code+","+p1+","+p2+")");\r
+ long pm = (p1 + p2) / 2;\r
+ dictFile.seek(pm);\r
+ String l;\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ pm = dictFile.getFilePointer();\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ long pm2 = dictFile.getFilePointer();\r
+ if (pm2 >= p2)\r
+ return(seqFind(code, p1, p2));\r
+ int istar = l.indexOf('*');\r
+ if (istar == -1)\r
+ throw new IOException("bad format: no * !");\r
+ String testcode = l.substring(0, istar);\r
+ int comp = code.compareTo(testcode);\r
+ if (comp < 0)\r
+ return(dichoFind(code, p1, pm-1));\r
+ else if (comp > 0)\r
+ return(dichoFind(code, pm2, p2));\r
+ else {\r
+ LinkedList l1 = dichoFind(code, p1, pm-1);\r
+ LinkedList l2 = dichoFind(code, pm2, p2);\r
+ String word = l.substring(istar+1);\r
+ l1.add(word);\r
+ l1.addAll(l2);\r
+ return(l1);\r
+ }\r
+ }\r
+ \r
+ @SuppressWarnings("unchecked")\r
+private LinkedList seqFind(String code, long p1, long p2) throws IOException {\r
+ //System.out.println("seqFind("+code+","+p1+","+p2+")");\r
+ LinkedList list = new LinkedList();\r
+ dictFile.seek(p1);\r
+ while (dictFile.getFilePointer() < p2) {\r
+ String l;\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ int istar = l.indexOf('*');\r
+ if (istar == -1)\r
+ throw new IOException("bad format: no * !");\r
+ String testcode = l.substring(0, istar);\r
+ if (testcode.equals(code)) {\r
+ String word = l.substring(istar+1);\r
+ list.add(word);\r
+ }\r
+ }\r
+ return(list);\r
+ }\r
+ \r
+ /**\r
+ * Read a line of dictFile with a specific encoding\r
+ */\r
+ private String dictReadLine() throws IOException {\r
+ int max = 255;\r
+ byte b=0;\r
+ byte[] buf = new byte[max];\r
+ int i=0;\r
+ try {\r
+ for (; b != '\n' && b != '\r' && i<max-1; i++) {\r
+ b = dictFile.readByte();\r
+ buf[i] = b;\r
+ }\r
+ } catch (EOFException ex) {\r
+ }\r
+ if (i == 0)\r
+ return("");\r
+ String s = new String(buf, 0, i-1, encoding);\r
+ return(s);\r
+ }\r
+ \r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ * @param code The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ //System.out.println("getWords("+code+")");\r
+ LinkedList list;\r
+ try {\r
+ list = dichoFind(code, 0, dictFile.length()-1);\r
+ //System.out.println(list);\r
+ } catch (IOException ex) {\r
+ System.err.println("IOException: " + ex.getMessage());\r
+ list = new LinkedList();\r
+ }\r
+ return list;\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/* Created by bgalbs on Jan 30, 2003 at 11:38:39 PM */\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedOutputStream;\r
+import java.io.BufferedReader;\r
+import java.io.BufferedWriter;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileOutputStream;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.ArrayList;\r
+import java.util.Collections;\r
+import java.util.HashMap;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.StringTokenizer;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * An implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge\r
+ * footprint of <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version\r
+ * of this class that implements some caching strategies might be a good idea in the future, if there's any\r
+ * demand for it.\r
+ * <p>\r
+ * This class makes use of the "classic" Java IO library (java.io). However, it could probably benefit from\r
+ * the new IO APIs (java.nio) and it is anticipated that a future version of this class, probably called\r
+ * <code>SpellDictionaryDiskNIO</code> will appear at some point.\r
+ *\r
+ * @author Ben Galbraith (ben@galbraiths.org)\r
+ * @version 0.1\r
+ * @since 0.5\r
+ */\r
+public class SpellDictionaryDisk extends SpellDictionaryASpell {\r
+ private final static String DIRECTORY_WORDS = "words";\r
+ private final static String DIRECTORY_DB = "db";\r
+ private final static String FILE_CONTENTS = "contents";\r
+ private final static String FILE_DB = "words.db";\r
+ private final static String FILE_INDEX = "words.idx";\r
+\r
+ /* maximum number of words an index entry can represent */\r
+ private final static int INDEX_SIZE_MAX = 200;\r
+\r
+ private final File base;\r
+ private final File words;\r
+ private final File db;\r
+ @SuppressWarnings("unchecked")\r
+private Map index;\r
+ /**\r
+ * The flag indicating if the initial preparation or loading of the on \r
+ * disk dictionary is complete.\r
+ */\r
+ protected boolean ready;\r
+\r
+ /* used at time of creation of index to speed up determining the number of words per index entry */\r
+ @SuppressWarnings("unchecked")\r
+private List indexCodeCache = null;\r
+\r
+ /**\r
+ * Construct a spell dictionary on disk. \r
+ * The spell dictionary is created from words list(s) contained in file(s).\r
+ * A words list file is a file with one word per line. Words list files are\r
+ * located in a <code>base/words</code> dictionary where <code>base</code> \r
+ * is the path to <code>words</code> dictionary. The on disk spell \r
+ * dictionary is created in <code>base/db</code> dictionary and contains \r
+ * files:\r
+ * <ul>\r
+ * <li><code>contents</code> list the words files used for spelling.</li>\r
+ * <li><code>words.db</code> the content of words files organized as\r
+ * a <em>database</em> of words.</li>\r
+ * <li><code>words.idx</code> an index file to the <code>words.db</code>\r
+ * file content.</li>\r
+ * </ul>\r
+ * The <code>contents</code> file has a list of \r
+ * <code>filename, size</code> indicating the name and length of each files\r
+ * in the <code>base/words</code> dictionary. If one of theses files was \r
+ * changed, added or deleted before the call to the constructor, the process \r
+ * of producing new or updated <code>words.db</code> and \r
+ * <code>words.idx</code> files is started again.\r
+ * <p/>\r
+ * The spellchecking process is then worked upon the <code>words.db</code>\r
+ * and <code>words.idx</code> files.\r
+ * <p/>\r
+ * \r
+ * NOTE: Do *not* create two instances of this class pointing to the same <code>base</code> unless\r
+ * you are sure that a new dictionary does not have to be created. In the future, some sort of\r
+ * external locking mechanism may be created that handles this scenario gracefully.\r
+ * \r
+ * @param base the base directory in which <code>SpellDictionaryDisk</code> can expect to find\r
+ * its necessary files.\r
+ * @param phonetic the phonetic file used by the spellchecker.\r
+ * @param block if a new word db needs to be created, there can be a considerable delay before\r
+ * the constructor returns. If block is true, this method will block while the db is created\r
+ * and return when done. If block is false, this method will create a thread to create the new\r
+ * dictionary and return immediately.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * files on the system\r
+ * @throws java.io.IOException indicates problems reading the files\r
+ */\r
+ public SpellDictionaryDisk(File base, File phonetic, boolean block) throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ this.ready = false;\r
+\r
+ this.base = base;\r
+ this.words = new File(base, DIRECTORY_WORDS);\r
+ this.db = new File(base, DIRECTORY_DB);\r
+\r
+ if (!this.base.exists()) throw new FileNotFoundException("Couldn't find required path '" + this.base + "'");\r
+ if (!this.words.exists()) throw new FileNotFoundException("Couldn't find required path '" + this.words + "'");\r
+ if (!this.db.exists()) db.mkdirs();\r
+\r
+ if (newDictionaryFiles()) {\r
+ if (block) {\r
+ buildNewDictionaryDatabase();\r
+ loadIndex();\r
+ ready = true;\r
+ } else {\r
+ Thread t = new Thread() {\r
+ @Override\r
+ public void run() {\r
+ try {\r
+ buildNewDictionaryDatabase();\r
+ loadIndex();\r
+ ready = true;\r
+ } catch (Exception e) {\r
+ e.printStackTrace();\r
+ }\r
+ }\r
+ };\r
+ t.start();\r
+ }\r
+ } else {\r
+ loadIndex();\r
+ ready = true;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Builds the file words database file and the contents file for the on\r
+ * disk dictionary.\r
+ */\r
+ protected void buildNewDictionaryDatabase() throws FileNotFoundException, IOException {\r
+ /* combine all dictionary files into one sorted file */\r
+ File sortedFile = buildSortedFile();\r
+\r
+ /* create the db for the sorted file */\r
+ buildCodeDb(sortedFile);\r
+ sortedFile.delete();\r
+\r
+ /* build contents file */\r
+ buildContentsFile();\r
+ }\r
+\r
+ /**\r
+ * Adds another word to the dictionary. <em>This method is not yet implemented\r
+ * for this class</em>.\r
+ * @param word The word to add.\r
+ */\r
+ public void addWord(String word) {\r
+ throw new UnsupportedOperationException("addWord not yet implemented (sorry)");\r
+ }\r
+\r
+ /**\r
+ * Returns a list of words that have the same phonetic code.\r
+ * @param code The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ Vector words = new Vector();\r
+\r
+ int[] posLen = getStartPosAndLen(code);\r
+ if (posLen != null) {\r
+ try {\r
+ InputStream input = new FileInputStream(new File(db, FILE_DB));\r
+ input.skip(posLen[0]);\r
+ byte[] bytes = new byte[posLen[1]];\r
+ input.read(bytes, 0, posLen[1]);\r
+ input.close();\r
+\r
+ String data = new String(bytes);\r
+ String[] lines = split(data, "\n");\r
+ for (String line : lines) {\r
+ String[] s = split(line, ",");\r
+ if (s[0].equals(code)) words.addElement(s[1]);\r
+ }\r
+ } catch (Exception e) {\r
+ e.printStackTrace();\r
+ }\r
+ }\r
+\r
+ return words;\r
+ }\r
+\r
+ /**\r
+ * Indicates if the initial preparation or loading of the on disk dictionary\r
+ * is complete.\r
+ * @return the indication that the dictionary initial setup is done.\r
+ */\r
+ public boolean isReady() {\r
+ return ready;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private boolean newDictionaryFiles() throws FileNotFoundException, IOException {\r
+ /* load in contents file, which indicates the files and sizes of the last db build */\r
+ List contents = new ArrayList();\r
+ File c = new File(db, FILE_CONTENTS);\r
+ if (c.exists()) {\r
+ BufferedReader reader = null;\r
+ try {\r
+ reader = new BufferedReader(new FileReader(c));\r
+ String line;\r
+ while ((line = reader.readLine()) != null) {\r
+ // format of file should be [filename],[size]\r
+ String[] s = split(line, ",");\r
+ contents.add(new FileSize(s[0], Integer.parseInt(s[1])));\r
+ }\r
+ } catch (FileNotFoundException e) {\r
+ throw e;\r
+ } catch (IOException e) {\r
+ throw e;\r
+ } finally {\r
+ if (reader != null) reader.close();\r
+ }\r
+ }\r
+\r
+ /* compare this to the actual directory */\r
+ boolean changed = false;\r
+ File[] wordFiles = words.listFiles();\r
+ if (contents.size() != wordFiles.length) {\r
+ // if the size of the contents list and the number of word files are different, it\r
+ // means we've definitely got to reindex\r
+ changed = true;\r
+ } else {\r
+ // check and make sure that all the word files haven't changed on us\r
+ for (File wordFile : wordFiles) {\r
+ FileSize fs = new FileSize(wordFile.getName(), wordFile.length());\r
+ if (!contents.contains(fs)) {\r
+ changed = true;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ return changed;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private File buildSortedFile() throws FileNotFoundException, IOException {\r
+ List w = new ArrayList();\r
+\r
+ /*\r
+ * read every single word into the list. eeek. if this causes problems,\r
+ * we may wish to explore disk-based sorting or more efficient memory-based storage\r
+ */\r
+ File[] wordFiles = words.listFiles();\r
+ for (File wordFile : wordFiles) {\r
+ BufferedReader r = new BufferedReader(new FileReader(wordFile));\r
+ String word;\r
+ while ((word = r.readLine()) != null) {\r
+ if (!word.equals("")) {\r
+ w.add(word.trim());\r
+ }\r
+ }\r
+ r.close();\r
+ }\r
+\r
+ Collections.sort(w);\r
+\r
+ // FIXME - error handling for running out of disk space would be nice.\r
+ File file = File.createTempFile("jazzy", "sorted");\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(file));\r
+ String prev = null;\r
+ for (int i = 0; i < w.size(); i++) {\r
+ String word = (String) w.get(i);\r
+ if (prev == null || !prev.equals(word)) {\r
+ writer.write(word);\r
+ writer.newLine();\r
+ }\r
+ prev = word;\r
+ }\r
+ writer.close();\r
+\r
+ return file;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private void buildCodeDb(File sortedWords) throws FileNotFoundException, IOException {\r
+ List codeList = new ArrayList();\r
+\r
+ BufferedReader reader = new BufferedReader(new FileReader(sortedWords));\r
+ String word;\r
+ while ((word = reader.readLine()) != null) {\r
+ codeList.add(new CodeWord(this.getCode(word), word));\r
+ }\r
+ reader.close();\r
+\r
+ Collections.sort(codeList);\r
+\r
+ List index = new ArrayList();\r
+\r
+ BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(new File(db, FILE_DB)));\r
+ String currentCode = null;\r
+ int currentPosition = 0;\r
+ int currentLength = 0;\r
+ for (int i = 0; i < codeList.size(); i++) {\r
+ CodeWord cw = (CodeWord) codeList.get(i);\r
+ String thisCode = cw.getCode();\r
+// if (thisCode.length() > 3) thisCode = thisCode.substring(0, 3);\r
+ thisCode = getIndexCode(thisCode, codeList);\r
+ String toWrite = cw.getCode() + "," + cw.getWord() + "\n";\r
+ byte[] bytes = toWrite.getBytes();\r
+\r
+ if (currentCode == null) currentCode = thisCode;\r
+ if (!currentCode.equals(thisCode)) {\r
+ index.add(new Object[]{currentCode, new int[]{currentPosition, currentLength}});\r
+ currentPosition += currentLength;\r
+ currentLength = bytes.length;\r
+ currentCode = thisCode;\r
+ } else {\r
+ currentLength += bytes.length;\r
+ }\r
+ out.write(bytes);\r
+ }\r
+ out.close();\r
+\r
+ // Output the last iteration\r
+ if (currentCode != null && currentPosition != 0 && currentLength != 0)\r
+ index.add(new Object[]{currentCode, new int[]{currentPosition, currentLength}});\r
+\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(new File(db, FILE_INDEX)));\r
+ for (int i = 0; i < index.size(); i++) {\r
+ Object[] o = (Object[]) index.get(i);\r
+ writer.write(o[0].toString());\r
+ writer.write(",");\r
+ writer.write(String.valueOf(((int[]) o[1])[0]));\r
+ writer.write(",");\r
+ writer.write(String.valueOf(((int[]) o[1])[1]));\r
+ writer.newLine();\r
+ }\r
+ writer.close();\r
+ }\r
+\r
+ private void buildContentsFile() throws IOException {\r
+ File[] wordFiles = words.listFiles();\r
+ if (wordFiles.length > 0) {\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(new File(db, FILE_CONTENTS)));\r
+ for (File wordFile : wordFiles) {\r
+ writer.write(wordFile.getName());\r
+ writer.write(",");\r
+ writer.write(String.valueOf(wordFile.length()));\r
+ writer.newLine();\r
+ }\r
+ writer.close();\r
+ } else {\r
+ new File(db, FILE_CONTENTS).delete();\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Loads the index file from disk. The index file accelerates words lookup\r
+ * into the dictionary db file.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void loadIndex() throws IOException {\r
+ index = new HashMap();\r
+ File idx = new File(db, FILE_INDEX);\r
+ BufferedReader reader = new BufferedReader(new FileReader(idx));\r
+ String line;\r
+ while ((line = reader.readLine()) != null) {\r
+ String[] fields = split(line, ",");\r
+ index.put(fields[0], new int[]{Integer.parseInt(fields[1]), Integer.parseInt(fields[2])});\r
+ }\r
+ reader.close();\r
+ }\r
+\r
+ private int[] getStartPosAndLen(String code) {\r
+ while (code.length() > 0) {\r
+ int[] posLen = (int[]) index.get(code);\r
+ if (posLen == null) {\r
+ code = code.substring(0, code.length() - 1);\r
+ } else {\r
+ return posLen;\r
+ }\r
+ }\r
+ return null;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private String getIndexCode(String code, List codes) {\r
+ if (indexCodeCache == null) indexCodeCache = new ArrayList();\r
+\r
+ if (code.length() <= 1) return code;\r
+\r
+ for (int i = 0; i < indexCodeCache.size(); i++) {\r
+ String c = (String) indexCodeCache.get(i);\r
+ if (code.startsWith(c)) return c;\r
+ }\r
+\r
+ int foundSize = -1;\r
+ boolean cacheable = false;\r
+ for (int z = 1; z < code.length(); z++) {\r
+ String thisCode = code.substring(0, z);\r
+ int count = 0;\r
+ for (int i = 0; i < codes.size();) {\r
+ if (i == 0) {\r
+ i = Collections.binarySearch(codes, new CodeWord(thisCode, ""));\r
+ if (i < 0) i = 0;\r
+ }\r
+\r
+ CodeWord cw = (CodeWord) codes.get(i);\r
+ if (cw.getCode().startsWith(thisCode)) {\r
+ count++;\r
+ if (count > INDEX_SIZE_MAX) break;\r
+ } else if (cw.getCode().compareTo(thisCode) > 0) break;\r
+ i++;\r
+ }\r
+ if (count <= INDEX_SIZE_MAX) {\r
+ cacheable = true;\r
+ foundSize = z;\r
+ break;\r
+ }\r
+ }\r
+\r
+ String newCode = (foundSize == -1) ? code : code.substring(0, foundSize);\r
+ if (cacheable) indexCodeCache.add(newCode);\r
+ return newCode;\r
+ }\r
+\r
+ private static String[] split(String input, String delimiter) {\r
+ StringTokenizer st = new StringTokenizer(input, delimiter);\r
+ int count = st.countTokens();\r
+ String[] out = new String[count];\r
+\r
+ for (int i = 0; i < count; i++) {\r
+ out[i] = st.nextToken();\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private class CodeWord implements Comparable {\r
+ private final String code;\r
+ private final String word;\r
+\r
+ public CodeWord(String code, String word) {\r
+ this.code = code;\r
+ this.word = word;\r
+ }\r
+\r
+ public String getCode() {\r
+ return code;\r
+ }\r
+\r
+ public String getWord() {\r
+ return word;\r
+ }\r
+\r
+ @Override\r
+ public boolean equals(Object o) {\r
+ if (this == o) return true;\r
+ if (!(o instanceof CodeWord)) return false;\r
+\r
+ final CodeWord codeWord = (CodeWord) o;\r
+\r
+ if (!word.equals(codeWord.word)) return false;\r
+\r
+ return true;\r
+ }\r
+\r
+ @Override\r
+ public int hashCode() {\r
+ return word.hashCode();\r
+ }\r
+\r
+ public int compareTo(Object o) {\r
+ return code.compareTo(((CodeWord) o).getCode());\r
+ }\r
+ }\r
+\r
+ private class FileSize {\r
+ private final String filename;\r
+ private final long size;\r
+\r
+ public FileSize(String filename, long size) {\r
+ this.filename = filename;\r
+ this.size = size;\r
+ }\r
+\r
+ @SuppressWarnings("unused")\r
+ public String getFilename() {\r
+ return filename;\r
+ }\r
+\r
+ @SuppressWarnings("unused")\r
+ public long getSize() {\r
+ return size;\r
+ }\r
+\r
+ @Override\r
+ public boolean equals(Object o) {\r
+ if (this == o) return true;\r
+ if (!(o instanceof FileSize)) return false;\r
+\r
+ final FileSize fileSize = (FileSize) o;\r
+\r
+ if (size != fileSize.size) return false;\r
+ if (!filename.equals(fileSize.filename)) return false;\r
+\r
+ return true;\r
+ }\r
+\r
+ @Override\r
+ public int hashCode() {\r
+ int result;\r
+ result = filename.hashCode();\r
+ result = (int) (29 * result + size);\r
+ return result;\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/*\r
+ * put your module comment here\r
+ * formatted with JxBeauty (c) johann.langhofer@nextra.at\r
+ */\r
+\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.io.Reader;\r
+import java.util.Hashtable;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * The SpellDictionaryHashMap holds the dictionary\r
+ * <p/>\r
+ * This class is thread safe. Derived classes should ensure that this preserved.\r
+ * <p/>\r
+ * There are many open source dictionary files. For just a few see:\r
+ * http://wordlist.sourceforge.net/\r
+ * <p/>\r
+ * This dictionary class reads words one per line. Make sure that your word list\r
+ * is formatted in this way (most are).\r
+ * <p/>\r
+ * Note that you must create the dictionary with a word list for the added\r
+ * words to persist.\r
+ */\r
+public class SpellDictionaryHashMap extends SpellDictionaryASpell {\r
+ /** A field indicating the initial hash map capacity (16KB) for the main\r
+ * dictionary hash map. Interested to see what the performance of a\r
+ * smaller initial capacity is like.\r
+ */\r
+ private final static int INITIAL_CAPACITY = 16 * 1024;\r
+\r
+ /**\r
+ * The hashmap that contains the word dictionary. The map is hashed on the doublemeta\r
+ * code. The map entry contains a LinkedList of words that have the same double meta code.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected Hashtable mainDictionary = new Hashtable(INITIAL_CAPACITY);\r
+\r
+ /** Holds the dictionary file for appending*/\r
+ private File dictFile = null;\r
+\r
+ /**\r
+ * Dictionary Constructor.\r
+ * @throws java.io.IOException indicates a problem with the file system\r
+ */\r
+ public SpellDictionaryHashMap() throws IOException {\r
+ super((File) null);\r
+ }\r
+\r
+ /**\r
+ * Dictionary Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(Reader wordList) throws IOException {\r
+ super((File) null);\r
+ createDictionary(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException {\r
+ this(new FileReader(wordList));\r
+ dictFile = wordList;\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table. Encoding is used for phonetic file only; \r
+ * default encoding is used for wordList\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @param phoneticEncoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * or phonetic information\r
+ */\r
+ public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException {\r
+ super(phonetic, phoneticEncoding);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The reader to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * or phonetic information\r
+ */\r
+ public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException {\r
+ super(phonetic);\r
+ dictFile = null;\r
+ createDictionary(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Add words from a file to existing dictionary hashmap.\r
+ * This function can be called as many times as needed to\r
+ * build the internal word list. Duplicates are not added.\r
+ * <p>\r
+ * Note that adding a dictionary does not affect the target\r
+ * dictionary file for the addWord method. That is, addWord() continues\r
+ * to make additions to the dictionary file specified in createDictionary()\r
+ * <P>\r
+ * @param wordList a File object that contains the words, on word per line.\r
+ * @throws FileNotFoundException\r
+ * @throws IOException\r
+ */\r
+ public void addDictionary(File wordList) throws FileNotFoundException, IOException {\r
+ addDictionaryHelper(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Add words from a Reader to existing dictionary hashmap.\r
+ * This function can be called as many times as needed to\r
+ * build the internal word list. Duplicates are not added.\r
+ * <p>\r
+ * Note that adding a dictionary does not affect the target\r
+ * dictionary file for the addWord method. That is, addWord() continues\r
+ * to make additions to the dictionary file specified in createDictionary()\r
+ * <P>\r
+ * @param wordList a Reader object that contains the words, on word per line.\r
+ * @throws IOException\r
+ */\r
+ public void addDictionary(Reader wordList) throws IOException {\r
+ addDictionaryHelper(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <p>This needs to be made thread safe (synchronized)</p>\r
+ */\r
+ public void addWord(String word) {\r
+ putWord(word);\r
+ if (dictFile == null)\r
+ return;\r
+ try {\r
+ FileWriter w = new FileWriter(dictFile.toString(), true);\r
+ // Open with append.\r
+ w.write(word);\r
+ w.write("\n");\r
+ w.close();\r
+ } catch (IOException ex) {\r
+ System.out.println("Error writing to dictionary file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the dictionary from a word list file.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * This is a very slow function. On my machine it takes quite a while to\r
+ * load the data in. I suspect that we could speed this up quite allot.\r
+ */\r
+ protected void createDictionary(BufferedReader in) throws IOException {\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null && line.length() > 0) {\r
+ line = new String(line.toCharArray());\r
+ putWord(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Adds to the existing dictionary from a word list file. If the word\r
+ * already exists in the dictionary, a new entry is not added.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * Note: for whatever reason that I haven't yet looked into, the phonetic codes\r
+ * for a particular word map to a vector of words rather than a hash table.\r
+ * This is a drag since in order to check for duplicates you have to iterate\r
+ * through all the words that use the phonetic code.\r
+ * If the vector-based implementation is important, it may be better\r
+ * to subclass for the cases where duplicates are bad.\r
+ */\r
+ protected void addDictionaryHelper(BufferedReader in) throws IOException {\r
+\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null && line.length() > 0) {\r
+ line = new String(line.toCharArray());\r
+ putWordUnique(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word in the dictionary\r
+ * @param word The word to add\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWord(String word) {\r
+ String code = getCode(word);\r
+ Vector list = (Vector) mainDictionary.get(code);\r
+ if (list != null) {\r
+ list.addElement(word);\r
+ } else {\r
+ list = new Vector();\r
+ list.addElement(word);\r
+ mainDictionary.put(code, list);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word, if it is not already present in the dictionary. A word\r
+ * with a different case is considered the same.\r
+ * @param word The word to add\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWordUnique(String word) {\r
+\r
+ String code = getCode(word);\r
+ Vector list = (Vector) mainDictionary.get(code);\r
+\r
+ if (list != null) {\r
+\r
+ boolean isAlready = false;\r
+\r
+ for (int i = 0; i < list.size(); i++) {\r
+\r
+ if (word.equalsIgnoreCase((String) list.elementAt(i))) {\r
+ isAlready = true;\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (!isAlready)\r
+ list.addElement(word);\r
+\r
+ } else {\r
+\r
+ list = new Vector();\r
+ list.addElement(word);\r
+ mainDictionary.put(code, list);\r
+\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ //Check the main dictionary.\r
+ Vector mainDictResult = (Vector) mainDictionary.get(code);\r
+ if (mainDictResult == null)\r
+ return new Vector();\r
+ return mainDictResult;\r
+ }\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+/**\r
+ * An interface for all Transformators - which take a dictionary word and converts into its\r
+ * phonetic hash. These phonetic hashes are useful for determining what other words are\r
+ * similar to it, and then list those words as suggestions.\r
+ *\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r
+ */\r
+public interface Transformator {\r
+\r
+ /**\r
+ * Take the given word, and return the best phonetic hash for it.\r
+ * @param word the word to transform\r
+ * @return the phonetic transformation of the word\r
+ */\r
+ public String transform(String word);\r
+\r
+ /**\r
+ * gets the list of characters that should be swapped in to the misspelled word\r
+ * in order to try to find more suggestions.\r
+ * In general, this list represents all of the unique phonetic characters\r
+ * for this Transformator.\r
+ * <p/>\r
+ * The replace list is used in the getSuggestions method.\r
+ * All of the letters in the misspelled word are replaced with the characters from\r
+ * this list to try and generate more suggestions, which implies l*n tries,\r
+ * if l is the size of the string, and n is the size of this list.\r
+ * <p/>\r
+ * In addition to that, each of these letters is added to the misspelled word.\r
+ * <p/>\r
+ * @return char[] misspelled words should try replacing with these characters to get more suggestions\r
+ */\r
+ public char[] getReplaceList();\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.util.Comparator;\r
+\r
+/**\r
+ * The Word object holds information for one suggested spelling.\r
+ * It contains both the suggested word string and the distance cost, which represents how different the suggested\r
+ * word is from the misspelling.\r
+ * <p>This class is now immutable.\r
+ * </p>\r
+ */\r
+@SuppressWarnings("unchecked")\r
+public class Word implements Comparator {\r
+ private String word;\r
+ private final int score;\r
+\r
+ /**\r
+ * Constructs a new Word.\r
+ * @param word The text of a word.\r
+ * @param score The word's distance cost\r
+ */\r
+ public Word(String word, int score) {\r
+ this.word = word;\r
+ this.score = score;\r
+ }\r
+\r
+ /**\r
+ * Constructs a new Word.\r
+ */\r
+ public Word() {\r
+ this.word = "";\r
+ this.score = 0;\r
+ }\r
+\r
+ /**\r
+ * Compares two words, mostly for the purpose of sorting words.\r
+ * @param o1 the first word\r
+ * @param o2 the second word\r
+ * @return -1 if the first word is more similar to the misspelled word\r
+ * <br>1 if the second word is more similar to the misspelled word\r
+ * <br>0 if both words are equally similar\r
+ *\r
+ */\r
+ public int compare(Object o1, Object o2) {\r
+ if (((Word) o1).getCost() < ((Word) o2).getCost()) return -1;\r
+ if (((Word) o1).getCost() == ((Word) o2).getCost()) return 0;\r
+ return 1;\r
+ }\r
+\r
+ /**\r
+ * Indicates if this word is equal to another one.\r
+ * @param o The other word to compare\r
+ * @return The indication of equality\r
+ */\r
+ @Override\r
+public boolean equals(Object o) {\r
+ if (o instanceof Word) // added by bd\r
+ return(((Word)o).getWord().equals(getWord()));\r
+ return false;\r
+ }\r
+ \r
+ /**\r
+ * gets suggested spelling\r
+ * @return the actual text of the suggest spelling\r
+ */\r
+ public String getWord() {\r
+ return word;\r
+ }\r
+\r
+ /**\r
+ * sets suggested spelling\r
+ * @param word The text to set for suggestd spelling\r
+ */\r
+ public void setWord(String word) {\r
+ this.word = word;\r
+ }\r
+\r
+ /**\r
+ * A cost measures how close a match this word was to the original word\r
+ * @return 0 if an exact match. Higher numbers are worse matches.\r
+ * @see EditDistance\r
+ */\r
+ public int getCost() {\r
+ return score;\r
+ }\r
+\r
+ /**\r
+ * returns the suggested spelling\r
+ * @return The word's text \r
+ */\r
+ @Override\r
+public String toString() {\r
+ return word;\r
+ }\r
+}\r
+\r
--- /dev/null
+EDIT_DEL1=95\r
+EDIT_DEL2=95\r
+EDIT_SWAP=90\r
+EDIT_SUB=100\r
+EDIT_CASE=10\r
+\r
+#DMV: the following commented out settings do not seem to be used at all\r
+#EDIT_SIMILAR=10\r
+#EDIT_MIN=90\r
+#EDIT_MAX=100\r
+\r
+SPELL_THRESHOLD=140\r
+SPELL_IGNOREUPPERCASE=true\r
+SPELL_IGNOREMIXEDCASE=false\r
+SPELL_IGNOREINTERNETADDRESS=true\r
+SPELL_IGNOREDIGITWORDS=true\r
+SPELL_IGNOREMULTIPLEWORDS=false\r
+SPELL_IGNORESENTENCECAPTILIZATION=true\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.text.BreakIterator;\r
+\r
+/**\r
+ * Defines common methods and behaviour for the various word finding\r
+ * subclasses.\r
+ *\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public abstract class AbstractWordFinder implements WordFinder {\r
+\r
+ //~ Instance/static variables .............................................\r
+\r
+ /** The word being analyzed */\r
+ protected Word currentWord;\r
+ /** The word following the current one */\r
+ protected Word nextWord;\r
+ /** Indicate if the current word starts a new sentence */\r
+ protected boolean startsSentence;\r
+ /** Holds the text to analyze */\r
+ protected String text;\r
+ /** An iterator to work through the sentence */\r
+ protected BreakIterator sentenceIterator;\r
+\r
+ //~ Constructors ..........................................................\r
+\r
+ /**\r
+ * Creates a new AbstractWordFinder object.\r
+ *\r
+ * @param inText the String to iterate through.\r
+ */\r
+ public AbstractWordFinder(String inText) {\r
+ text = inText;\r
+ setup();\r
+ }\r
+\r
+ /**\r
+ * Creates a new default AbstractWordFinder object.\r
+ */\r
+ public AbstractWordFinder() {\r
+ text = "";\r
+ setup();\r
+ }\r
+ //~ Methods ...............................................................\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns\r
+ * a new Word object corresponding to the next word.\r
+ *\r
+ * @return the following word.\r
+ */\r
+ public abstract Word next();\r
+\r
+ /**\r
+ * Return the text being searched. May have changed since first set\r
+ * through calls to replace.\r
+ *\r
+ * @return the text being searched.\r
+ */\r
+ public String getText() {\r
+\r
+ return text;\r
+ }\r
+ \r
+ /**\r
+ * Defines the text to search.\r
+ * @param newText The text to be analyzed\r
+ */\r
+ public void setText(String newText) {\r
+ text = newText;\r
+ setup();\r
+ }\r
+\r
+ /**\r
+ * Returns the current word in the iteration .\r
+ *\r
+ * @return the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public Word current() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Indicates if there is some more word to analyze\r
+ * @return true if there are further words in the string.\r
+ */\r
+ public boolean hasNext() {\r
+\r
+ return nextWord != null;\r
+\r
+ }\r
+\r
+ /**\r
+ * Replace the current word in the search with a replacement string.\r
+ *\r
+ * @param newWord the replacement string.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public void replace(String newWord) {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ StringBuffer sb = new StringBuffer(text.substring(0, currentWord.getStart()));\r
+ sb.append(newWord);\r
+ sb.append(text.substring(currentWord.getEnd()));\r
+ int diff = newWord.length() - currentWord.getText().length();\r
+ currentWord.setText(newWord);\r
+ /* Added Conditional to ensure a NullPointerException is avoided (11 Feb 2003) */\r
+ if (nextWord != null) {\r
+ nextWord.setStart(nextWord.getStart() + diff);\r
+ }\r
+ text = sb.toString();\r
+\r
+ sentenceIterator.setText(text);\r
+ int start = currentWord.getStart();\r
+ sentenceIterator.following(start);\r
+ startsSentence = sentenceIterator.current() == start;\r
+\r
+ }\r
+\r
+ /**\r
+ * @return true if the current word starts a new sentence.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public boolean startsSentence() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return startsSentence;\r
+ }\r
+\r
+ /**\r
+ * Return the text being searched. May have changed since first set\r
+ * through calls to replace.\r
+ *\r
+ * @return the text being searched.\r
+ */\r
+ public String toString() {\r
+\r
+ return text;\r
+ }\r
+\r
+ /**\r
+ * Adjusts the sentence iterator and the startSentence flag according to the\r
+ * currentWord.\r
+ * @param wd the wd parameter is not presently used.\r
+ */\r
+ protected void setSentenceIterator(Word wd) {\r
+ int current = sentenceIterator.current();\r
+\r
+ if (current == currentWord.getStart())\r
+ startsSentence = true;\r
+ else {\r
+ startsSentence = false;\r
+\r
+ if (currentWord.getEnd() > current) {\r
+ sentenceIterator.next();\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Indicates if the character at the specified position is acceptable as\r
+ * part of a word. To be acceptable, the character need to be a letter\r
+ * or a digit. It is also acceptable if the character is one of ''', '@',\r
+ * '.' or '_' and is preceded and followed by letter or digit.\r
+ * @param posn The character position to analyze.\r
+ * @return true if the character is a letter or digit\r
+ */\r
+ //Added more intelligent character recognition (11 Feb '03)\r
+ protected boolean isWordChar(int posn) {\r
+ boolean out = false;\r
+\r
+ char curr = text.charAt(posn);\r
+\r
+ if ((posn == 0) || (posn == text.length() - 1)) {\r
+ return Character.isLetterOrDigit(curr);\r
+ }\r
+\r
+ char prev = text.charAt(posn - 1);\r
+ char next = text.charAt(posn + 1);\r
+\r
+\r
+ switch (curr) {\r
+ case '\'':\r
+ case '@':\r
+ case '.':\r
+ case '_':\r
+ out = (Character.isLetterOrDigit(prev) && Character.isLetterOrDigit(next));\r
+ break;\r
+ default :\r
+ out = Character.isLetterOrDigit(curr);\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ /**\r
+ * Indicates if the character at the specified character is acceptable as\r
+ * part of a word. To be acceptable, the character need to be a letter\r
+ * or a digit or a ' (an apostrophe).\r
+ * @param c The character to evaluates if it can be part of a word\r
+ * @return true if the character is a letter, digit or a ' (an apostrophe).\r
+ */\r
+ protected boolean isWordChar(char c) {\r
+ boolean out = false;\r
+\r
+ if (Character.isLetterOrDigit(c) || (c == '\'')) {\r
+ out = true;\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * first non letter or digit character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, char startIgnore) {\r
+ return ignore(index, new Character(startIgnore), null);\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * <code>endIgnore</code> character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The character which mark the end of skipping through. If\r
+ * the value of endIgnore is <code>null</code>, skipping characters stop\r
+ * at first non letter or digit character.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, char startIgnore, char endIgnore) {\r
+ return ignore(index, new Character(startIgnore), new Character(endIgnore));\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * <code>endIgnore</code> character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The character which mark the end of skipping through. If\r
+ * the value of endIgnore is <code>null</code>, skipping characters stop\r
+ * at first non letter or digit character.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, Character startIgnore, Character endIgnore) {\r
+ int newIndex = index;\r
+\r
+ if (newIndex < text.length()) {\r
+ Character curChar = new Character(text.charAt(newIndex));\r
+\r
+ if (curChar.equals(startIgnore)) {\r
+ newIndex++;\r
+ while (newIndex < text.length()) {\r
+ curChar = new Character(text.charAt(newIndex));\r
+ if (endIgnore != null && curChar.equals(endIgnore)){\r
+ newIndex++;\r
+ break;\r
+ } else if (endIgnore == null && !Character.isLetterOrDigit(curChar.charValue())){\r
+ break;\r
+ }\r
+ newIndex++;\r
+ }\r
+ }\r
+ }\r
+\r
+ return newIndex;\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code> string, and until the \r
+ * <code>endIgnore</code> string is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The string that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The string which mark the end of skipping through.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, String startIgnore, String endIgnore) {\r
+\r
+ //{{{\r
+ int newIndex = index;\r
+ int len = text.length();\r
+ int slen = startIgnore.length();\r
+ int elen = endIgnore.length();\r
+\r
+ if (!((newIndex + slen) >= len)) {\r
+ String seg = text.substring(newIndex, newIndex + slen);\r
+\r
+ // System.out.println(seg + ":" + seg.length()+ ":" + startIgnore + ":" + slen);\r
+ if (seg.equals(startIgnore)) {\r
+ newIndex += slen;\r
+ cycle: while (true) {\r
+\r
+ if (newIndex == (text.length() - elen)) {\r
+\r
+ break cycle;\r
+ }\r
+\r
+ String ss = text.substring(newIndex, newIndex + elen);\r
+\r
+ if (ss.equals(endIgnore)) {\r
+ newIndex += elen;\r
+\r
+ break cycle;\r
+ } else {\r
+ newIndex++;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ return newIndex;\r
+ } //}}}\r
+\r
+ /**\r
+ * Initializes the sentenseIterator\r
+ */\r
+ protected void init() {\r
+ sentenceIterator = BreakIterator.getSentenceInstance();\r
+ sentenceIterator.setText(text);\r
+ }\r
+ \r
+ /**\r
+ * Defines the starting positions for text analysis\r
+ */\r
+ private void setup() {\r
+ currentWord = new Word("", 0);\r
+ nextWord = new Word("", 0);\r
+ startsSentence = true;\r
+\r
+ init();\r
+\r
+ try {\r
+ next();\r
+ } catch (WordNotFoundException e) {\r
+ currentWord = null;\r
+ nextWord = null;\r
+ }\r
+ }\r
+\r
+ \r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.text.BreakIterator;\r
+\r
+\r
+/**\r
+ * This class tokenizes a input string.\r
+ *\r
+ * <p>\r
+ * It also allows for the string to be mutated. The result after the spell\r
+ * checking is completed is available to the call to getFinalText\r
+ * </p>\r
+ *\r
+ * @author Jason Height(jheight@chariot.net.au)\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public abstract class AbstractWordTokenizer implements WordTokenizer {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ /** The word being analyzed */\r
+ protected Word currentWord;\r
+ /** The word finder used to filter out words which are non pertinent to\r
+ * spell checking */\r
+ protected WordFinder finder;\r
+ /** An iterator to work through the sentence */\r
+ protected BreakIterator sentenceIterator;\r
+\r
+ /** The cumulative word count that have been processed */\r
+ protected int wordCount = 0;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new AbstractWordTokenizer object.\r
+ *\r
+ * @param text the text to process.\r
+ */\r
+ public AbstractWordTokenizer(String text) {\r
+ this(new DefaultWordFinder(text));\r
+ }\r
+\r
+ /**\r
+ * Creates a new AbstractWordTokenizer object.\r
+ *\r
+ * @param wf the custom WordFinder to use in searching for words.\r
+ */\r
+ public AbstractWordTokenizer(WordFinder wf) {\r
+ this.finder = wf;\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * Returns the current number of words that have been processed\r
+ *\r
+ * @return number of words so far iterated.\r
+ */\r
+ public int getCurrentWordCount() {\r
+\r
+ return wordCount;\r
+ }\r
+\r
+ /**\r
+ * Returns the end of the current word in the text\r
+ *\r
+ * @return index in string of the end of the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public int getCurrentWordEnd() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord.getEnd();\r
+ }\r
+\r
+ /**\r
+ * Returns the index of the start of the current word in the text\r
+ *\r
+ * @return index in string of the start of the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public int getCurrentWordPosition() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord.getStart();\r
+ }\r
+\r
+ /**\r
+ * Returns true if there are more words that can be processed in the string\r
+ *\r
+ * @return true if there are further words in the text.\r
+ */\r
+ public boolean hasMoreWords() {\r
+\r
+ return finder.hasNext();\r
+ }\r
+\r
+ /**\r
+ * Returns searches for the next word in the text, and returns that word.\r
+ *\r
+ * @return the string representing the current word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ public String nextWord() {\r
+ currentWord = finder.next();\r
+\r
+ return currentWord.getText();\r
+ }\r
+\r
+ /**\r
+ * Replaces the current word token\r
+ *\r
+ * @param newWord replacement word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public abstract void replaceWord(String newWord);\r
+\r
+ /**\r
+ * Returns the current text that is being tokenized (includes any changes\r
+ * that have been made)\r
+ *\r
+ * @return the text being tokenized.\r
+ */\r
+ public String getContext() {\r
+\r
+ return finder.toString();\r
+ }\r
+\r
+ /**\r
+ * returns true if the current word is at the start of a sentence\r
+ *\r
+ * @return true if the current word starts a sentence.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public boolean isNewSentence() {\r
+\r
+ return finder.startsSentence();\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.List;\r
+\r
+/** This event is fired off by the SpellChecker and is passed to the\r
+ * registered SpellCheckListeners\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+class BasicSpellCheckEvent implements SpellCheckEvent {\r
+\r
+ /**The list holding the suggested Word objects for the misspelt word*/\r
+ @SuppressWarnings("unchecked")\r
+private final List suggestions;\r
+ /**The misspelt word*/\r
+ private final String invalidWord;\r
+ /**The action to be done when the event returns*/\r
+ private short action = INITIAL;\r
+ /**Contains the word to be replaced if the action is REPLACE or REPLACEALL*/\r
+ private String replaceWord = null;\r
+\r
+ @SuppressWarnings("unused")\r
+private final String context;\r
+ private final int startPosition;\r
+\r
+\r
+ /**Constructs the SpellCheckEvent\r
+ * @param invalidWord The word that is misspelt\r
+ * @param suggestions A list of Word objects that are suggested to replace the currently misspelt word\r
+ * @param tokenizer The reference to the tokenizer that caused this\r
+ * event to fire.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public BasicSpellCheckEvent(String invalidWord, List suggestions, WordTokenizer tokenizer) {\r
+ this.invalidWord = invalidWord;\r
+ this.suggestions = suggestions;\r
+ this.context = tokenizer.getContext();\r
+ this.startPosition = tokenizer.getCurrentWordPosition();\r
+ }\r
+\r
+ /** Returns the list of suggested Word objects\r
+ * @return A list of words phonetically close to the misspelt word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions() {\r
+ return suggestions;\r
+ }\r
+\r
+ /** Returns the currently misspelt word\r
+ * @return The text misspelt\r
+ */\r
+ public String getInvalidWord() {\r
+ return invalidWord;\r
+ }\r
+\r
+ /** Returns the context in which the misspelt word is used\r
+ * @return The text containing the context\r
+ */\r
+ public String getWordContext() {\r
+ //JMH TBD\r
+ return null;\r
+ }\r
+\r
+ /** Returns the start position of the misspelt word in the context\r
+ * @return The position of the word\r
+ */\r
+ public int getWordContextPosition() {\r
+ return startPosition;\r
+ }\r
+\r
+ /** Returns the action type the user has to handle\r
+ * @return The type of action the event is carrying\r
+ */\r
+ public short getAction() {\r
+ return action;\r
+ }\r
+\r
+ /** Returns the text to replace\r
+ * @return the text of the word to replace\r
+ */\r
+ public String getReplaceWord() {\r
+ return replaceWord;\r
+ }\r
+\r
+ /** Set the action to replace the currently misspelt word with the new word\r
+ * @param newWord The word to replace the currently misspelt word\r
+ * @param replaceAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void replaceWord(String newWord, boolean replaceAll) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ if (replaceAll)\r
+ action = REPLACEALL;\r
+ else\r
+ action = REPLACE;\r
+ replaceWord = newWord;\r
+ }\r
+\r
+ /**\r
+ * Set the action it ignore the currently misspelt word.\r
+ * @param ignoreAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void ignoreWord(boolean ignoreAll) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ if (ignoreAll)\r
+ action = IGNOREALL;\r
+ else\r
+ action = IGNORE;\r
+ }\r
+\r
+ /** Set the action to add a new word into the dictionary. This will also replace the\r
+ * currently misspelt word.\r
+ * @param newWord The new word to add to the dictionary.\r
+ */\r
+ public void addToDictionary(String newWord) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ action = ADDTODICT;\r
+ replaceWord = newWord;\r
+ }\r
+\r
+ /** Set the action to terminate processing of the spellchecker.\r
+ */\r
+ public void cancel() {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ action = CANCEL;\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+/**\r
+ * A basic word finder, which searches text for sequences of letters.\r
+ * \r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public class DefaultWordFinder extends AbstractWordFinder {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ * \r
+ * @param inText the String to search\r
+ */\r
+ public DefaultWordFinder(String inText) {\r
+ super(inText);\r
+ }\r
+\r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ */\r
+ public DefaultWordFinder() {\r
+ super();\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns a\r
+ * new Word object corresponding to the next word.\r
+ * \r
+ * @return the next word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ public Word next() {\r
+ if (nextWord == null) {\r
+ throw new WordNotFoundException("No more words found.");\r
+ }\r
+ currentWord.copy(nextWord);\r
+ setSentenceIterator(currentWord);\r
+\r
+ int i = currentWord.getEnd();\r
+ boolean finished = false;\r
+\r
+ while (i < text.length() && !finished) {\r
+ if (isWordChar(i)) {\r
+ nextWord.setStart(i);\r
+ int end = getNextWordEnd(text, i);\r
+ nextWord.setText(text.substring(i, end));\r
+ finished = true;\r
+ }\r
+ i++;\r
+ }\r
+ if (!finished)\r
+ nextWord = null;\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Returns the position in the string <em>after</em> the end of the next word.\r
+ * Note that this return value should not be used as an index into the string\r
+ * without checking first that it is in range, since it is possible for the\r
+ * value <code>text.length()</code> to be returned by this method.\r
+ */\r
+ private int getNextWordEnd(String text, int startPos) {\r
+ // If we're dealing with a possible 'internet word' we need to provide\r
+ // some special handling\r
+ if (SpellChecker.isINETWord(text.substring(startPos))) {\r
+ for (int i = startPos; i < text.length(); i++) {\r
+ char ch = text.charAt(i);\r
+ if (Character.isLetterOrDigit(ch))\r
+ continue;\r
+\r
+ if (ch == '\r' || ch == '\n')\r
+ return i;\r
+ // Chop off any characters that might be enclosing the 'internet word'. eg ',",),]\r
+ if (Character.isSpaceChar(ch))\r
+ if (i > 0 && Character.isLetterOrDigit(text.charAt(i - 1)))\r
+ return i;\r
+ else\r
+ return i - 1;\r
+ }\r
+ return text.length();\r
+ } else {\r
+ for (int i = startPos; i < text.length(); i++) {\r
+ if (!isWordChar(i))\r
+ return i;\r
+ }\r
+ return text.length();\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+\r
+import java.text.BreakIterator;\r
+\r
+import javax.swing.text.AttributeSet;\r
+import javax.swing.text.BadLocationException;\r
+import javax.swing.text.Document;\r
+import javax.swing.text.Segment;\r
+import javax.swing.text.StyledDocument;\r
+\r
+\r
+/** This class tokenizes a swing document model. It also allows for the\r
+ * document model to be changed when corrections occur.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public class DocumentWordTokenizer implements WordTokenizer {\r
+ /** Holds the start character position of the current word*/\r
+ private int currentWordPos = 0;\r
+ /** Holds the end character position of the current word*/\r
+ private int currentWordEnd = 0;\r
+ /** Holds the start character position of the next word*/\r
+ private int nextWordPos = -1;\r
+ /** The actual text that is being tokenized*/\r
+ private final Document document;\r
+ /** The character iterator over the document*/\r
+ private final Segment text;\r
+ /** The cumulative word count that have been processed*/\r
+ private int wordCount = 0;\r
+ /** Flag indicating if there are any more tokens (words) left*/\r
+ private boolean moreTokens = true;\r
+ /** Is this a special case where the currentWordStart, currntWordEnd and\r
+ * nextWordPos have already been calculated. (see nextWord)\r
+ */\r
+ private boolean first = true;\r
+ private final BreakIterator sentenceIterator;\r
+ private boolean startsSentence = true;\r
+\r
+ /**\r
+ * Creates a new DocumentWordTokenizer to work on a document\r
+ * @param document The document to spell check\r
+ */\r
+ public DocumentWordTokenizer(Document document) {\r
+ this.document = document;\r
+ //Create a text segment over the entire document\r
+ text = new Segment();\r
+ sentenceIterator = BreakIterator.getSentenceInstance();\r
+ try {\r
+ document.getText(0, document.getLength(), text);\r
+ sentenceIterator.setText(text);\r
+ currentWordPos = getNextWordStart(text, 0);\r
+ //If the current word pos is -1 then the string was all white space\r
+ if (currentWordPos != -1) {\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r
+ } else {\r
+ moreTokens = false;\r
+ }\r
+ } catch (BadLocationException ex) {\r
+ moreTokens = false;\r
+ }\r
+ }\r
+\r
+ /** This helper method will return the start character of the next\r
+ * word in the buffer from the start position\r
+ */\r
+ private static int getNextWordStart(Segment text, int startPos) {\r
+ if (startPos <= text.getEndIndex())\r
+ for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {\r
+ if (Character.isLetterOrDigit(ch)) {\r
+ return text.getIndex();\r
+ }\r
+ }\r
+ return -1;\r
+ }\r
+\r
+ /** This helper method will return the end of the next word in the buffer.\r
+ *\r
+ */\r
+ private static int getNextWordEnd(Segment text, int startPos) {\r
+ for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {\r
+ if (!Character.isLetterOrDigit(ch)) {\r
+ if (ch == '-' || ch == '\'') { // handle ' and - inside words\r
+ char ch2 = text.next();\r
+ text.previous();\r
+ if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))\r
+ continue;\r
+ }\r
+ return text.getIndex();\r
+ }\r
+ }\r
+ return text.getEndIndex();\r
+ }\r
+\r
+ /**\r
+ * Indicates if there are more words left\r
+ * @return true if more words can be found in the text.\r
+ */\r
+ public boolean hasMoreWords() {\r
+ return moreTokens;\r
+ }\r
+ \r
+ /**\r
+ * Sets the current word position at the start of the word containing\r
+ * the char at position pos. This way a call to nextWord() will return\r
+ * this word.\r
+ * \r
+ * @param pos position in the word we want to set as current.\r
+ */\r
+ public void posStartFullWordFrom(int pos){\r
+ currentWordPos=text.getBeginIndex();\r
+ if(pos>text.getEndIndex())\r
+ pos=text.getEndIndex();\r
+ for (char ch = text.setIndex(pos); ch != Segment.DONE; ch = text.previous()) {\r
+ if (!Character.isLetterOrDigit(ch)) {\r
+ if (ch == '-' || ch == '\'') { // handle ' and - inside words\r
+ char ch2 = text.previous();\r
+ text.next();\r
+ if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))\r
+ continue;\r
+ }\r
+ currentWordPos=text.getIndex()+1;\r
+ break;\r
+ }\r
+ }\r
+ //System.out.println("CurPos:"+currentWordPos);\r
+ if(currentWordPos==0)\r
+ first=true;\r
+ moreTokens=true;\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd + 1);\r
+ }\r
+\r
+ /**\r
+ * Returns the number of word tokens that have been processed thus far\r
+ * @return the number of words found so far.\r
+ */\r
+ public int getCurrentWordPosition() {\r
+ return currentWordPos;\r
+ }\r
+\r
+ /**\r
+ * Returns an index representing the end location of the current word in the text.\r
+ * @return index of the end of the current word in the text.\r
+ */\r
+ public int getCurrentWordEnd() {\r
+ return currentWordEnd;\r
+ }\r
+\r
+ /**\r
+ * This returns the next word in the iteration. Note that any implementation should return\r
+ * the current word, and then replace the current word with the next word found in the\r
+ * input text (if one exists).\r
+ * @return the next word in the iteration.\r
+ */\r
+ public String nextWord() {\r
+ if (!first) {\r
+ currentWordPos = nextWordPos;\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd + 1);\r
+ }\r
+ int current = sentenceIterator.current();\r
+ if (current == currentWordPos)\r
+ startsSentence = true;\r
+ else {\r
+ startsSentence = false;\r
+ if (currentWordEnd > current)\r
+ sentenceIterator.next();\r
+ }\r
+ //The nextWordPos has already been populated\r
+ String word = null;\r
+ try {\r
+ word = document.getText(currentWordPos, currentWordEnd - currentWordPos);\r
+ } catch (BadLocationException ex) {\r
+ moreTokens = false;\r
+ }\r
+ wordCount++;\r
+ first = false;\r
+ if (nextWordPos == -1)\r
+ moreTokens = false;\r
+ return word;\r
+ }\r
+\r
+ /**\r
+ * Returns the number of word tokens that have been processed thus far\r
+ * @return the number of words found so far.\r
+ */\r
+ public int getCurrentWordCount() {\r
+ return wordCount;\r
+ }\r
+\r
+ /** Replaces the current word token\r
+ * @param newWord The new word to replace the misspelt one\r
+ */\r
+ public void replaceWord(String newWord) {\r
+ @SuppressWarnings("unused")\r
+ AttributeSet attr=null;\r
+ if (currentWordPos != -1) {\r
+ try {\r
+ if(document instanceof StyledDocument)\r
+ attr=((StyledDocument)document).getCharacterElement(currentWordPos).getAttributes();\r
+ document.remove(currentWordPos, currentWordEnd - currentWordPos);\r
+ document.insertString(currentWordPos, newWord, null);\r
+ //Need to reset the segment\r
+ document.getText(0, document.getLength(), text);\r
+ } catch (BadLocationException ex) {\r
+ throw new RuntimeException(ex.getMessage());\r
+ }\r
+ //Position after the newly replaced word(s)\r
+ first = true;\r
+ currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());\r
+ if (currentWordPos != -1) {\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r
+ sentenceIterator.setText(text);\r
+ sentenceIterator.following(currentWordPos);\r
+ } else\r
+ moreTokens = false;\r
+ }\r
+ }\r
+\r
+ /** Returns the current text that is being tokenized (includes any changes\r
+ * that have been made)\r
+ * @return The text, including changes.\r
+ */\r
+ public String getContext() {\r
+ return text.toString();\r
+ }\r
+\r
+ /** Indicates if the current word is at the start of a sentence\r
+ * @return true if the current word is at the start of a sentence\r
+ */\r
+ public boolean isNewSentence() {\r
+ // BreakIterator doesn't work when the first word in a sentence is not capitalised,\r
+ // but we need to check for capitalisation\r
+ if (startsSentence || currentWordPos < 2)\r
+ return(true);\r
+ \r
+ String textBefore = null;\r
+ try {\r
+ textBefore = document.getText(currentWordPos-2, 2);\r
+ } catch (BadLocationException ex) {\r
+ return(false);\r
+ }\r
+ return(textBefore != null && ".".equals(textBefore.trim()));\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileReader;\r
+import java.io.IOException;\r
+\r
+\r
+/**\r
+ * This class tokenizes a input file.\r
+ *\r
+ * <p>\r
+ * Any takers to do this efficiently?? doesn't need to replace any words to\r
+ * start with. I need this to get an idea of how quick the spell checker is.\r
+ * </p>\r
+ */\r
+public class FileWordTokenizer extends AbstractWordTokenizer {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+// private File inFile;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new FileWordTokenizer object.\r
+ *\r
+ * @param inputFile the file to work upon\r
+ */\r
+ public FileWordTokenizer(File inputFile) {\r
+ super(stringValue(inputFile));\r
+ }\r
+\r
+ /**\r
+ * Creates a new FileWordTokenizer object and associate a WordFinder to it's\r
+ * processing.\r
+ *\r
+ * @param inputFile the file to word upon.\r
+ * @param finder the specialize processing for words.\r
+ */\r
+ public FileWordTokenizer(File inputFile, WordFinder finder) {\r
+ super(finder);\r
+ finder.setText(stringValue(inputFile));\r
+ }\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * Replaces the current word token\r
+ *\r
+ * @param s the new string\r
+ * @throws WordNotFoundException current word not yet set.\r
+ */\r
+ @Override\r
+public void replaceWord(String s) {\r
+ }\r
+\r
+ private static String stringValue(File inFile) {\r
+ @SuppressWarnings("unused")\r
+ File stringFile = inFile;\r
+ StringBuffer out = new StringBuffer("");\r
+\r
+ try{\r
+ BufferedReader in = new BufferedReader(new FileReader(inFile));\r
+ char[] c = new char[100];\r
+ int count;\r
+ while ((count = in.read(c, 0, c.length)) != -1){\r
+ out.append(c,0,count);\r
+ }\r
+ in.close();\r
+ } catch(IOException e){\r
+ System.err.println("File input error trying to open " + inFile.toString() + " : " + e);\r
+ }\r
+ return out.toString();\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+\r
+/**\r
+ * A word finder Java source files, which searches text for sequences of \r
+ * letters formated as Java comments.\r
+ *\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public class JavaWordFinder extends AbstractWordFinder {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ private boolean inComment;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new JavaWordFinder object.\r
+ *\r
+ * @param inText the String to search\r
+ */\r
+ public JavaWordFinder(String inText) {\r
+ super(inText);\r
+ }\r
+\r
+ /**\r
+ * Creates a new JavaWordFinder object.\r
+ */\r
+ public JavaWordFinder() {\r
+ super();\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns a\r
+ * new Word object corresponding to the next word.\r
+ *\r
+ * @return the next word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ @Override\r
+public Word next() {\r
+\r
+ if (nextWord == null) {\r
+ throw new WordNotFoundException("No more words found.");\r
+ }\r
+\r
+ currentWord.copy(nextWord);\r
+\r
+ @SuppressWarnings("unused")\r
+ int current = sentenceIterator.current();\r
+ setSentenceIterator(currentWord);\r
+\r
+ int i = currentWord.getEnd();\r
+ boolean finished = false;\r
+ boolean started = false;\r
+\r
+ search:\r
+ while (i < text.length() && !finished) {\r
+\r
+ i = ignore(i, '@');\r
+ i = ignore(i, "<code>", "</code>");\r
+ i = ignore(i, "<CODE>", "</CODE>");\r
+ i = ignore(i, '<', '>');\r
+\r
+ if (i >= text.length()) break search;\r
+\r
+ char currentLetter = text.charAt(i);\r
+ if (inComment) {\r
+ //Reset on new line.\r
+ if (currentLetter == '\n') {\r
+ inComment = false;\r
+ i++;\r
+ continue search;\r
+ } else if (!isWordChar(i)) {\r
+ i++;\r
+ continue search;\r
+ }\r
+ //Find words.\r
+ while (i < text.length() - 1) {\r
+ if (!started && isWordChar(i)) {\r
+ nextWord.setStart(i);\r
+ started = true;\r
+ } else if (started && !isWordChar(i)) {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ finished = true;\r
+ break search;\r
+ }\r
+\r
+ currentLetter = text.charAt(++i);\r
+ }\r
+ } else if (currentLetter == '*') {\r
+ inComment = true;\r
+ i++;\r
+ } else {\r
+ i++;\r
+ }\r
+ }\r
+\r
+ if (!started) {\r
+ nextWord = null;\r
+ } else if (!finished) {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ }\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Initializes this word finder\r
+ */\r
+\r
+ @Override\r
+protected void init() {\r
+// sentenceIterator = BreakIterator.getSentenceInstance();\r
+// sentenceIterator.setText(text);\r
+ super.init();\r
+ inComment = false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.List;\r
+\r
+/**\r
+ * This event is fired off by the SpellChecker and is passed to the\r
+ * registered SpellCheckListeners\r
+ * <p/>\r
+ * As far as I know, we will only require one implementation of the SpellCheckEvent\r
+ * (BasicSpellCheckEvent) but I have defined this interface just in case. The\r
+ * BasicSpellCheckEvent implementation is currently package private.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public interface SpellCheckEvent {\r
+ /** Field indicating that the incorrect word should be ignored*/\r
+ public static final short IGNORE = 0;\r
+ /** Field indicating that the incorrect word should be ignored forever*/\r
+ public static final short IGNOREALL = 1;\r
+ /** Field indicating that the incorrect word should be replaced*/\r
+ public static final short REPLACE = 2;\r
+ /** Field indicating that the incorrect word should be replaced always*/\r
+ public static final short REPLACEALL = 3;\r
+ /** Field indicating that the incorrect word should be added to the dictionary*/\r
+ public static final short ADDTODICT = 4;\r
+ /** Field indicating that the spell checking should be terminated*/\r
+ public static final short CANCEL = 5;\r
+ /** Initial case for the action */\r
+ public static final short INITIAL = -1;\r
+\r
+ /** Returns the list of suggested Word objects\r
+ * @return A list of words phonetically close to the misspelt word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions();\r
+\r
+ /** Returns the currently misspelt word\r
+ * @return The text misspelt\r
+ */\r
+ public String getInvalidWord();\r
+\r
+ /** Returns the context in which the misspelt word is used\r
+ * @return The text containing the context\r
+ */\r
+ public String getWordContext();\r
+\r
+ /** Returns the start position of the misspelt word in the context\r
+ * @return The position of the word\r
+ */\r
+ public int getWordContextPosition();\r
+\r
+ /** Returns the action type the user has to handle\r
+ * @return The type of action the event is carrying\r
+ */\r
+ public short getAction();\r
+\r
+ /** Returns the text to replace\r
+ * @return the text of the word to replace\r
+ */\r
+ public String getReplaceWord();\r
+\r
+ /** Set the action to replace the currently misspelt word with the new word\r
+ * @param newWord The word to replace the currently misspelt word\r
+ * @param replaceAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void replaceWord(String newWord, boolean replaceAll);\r
+\r
+ /** Set the action it ignore the currently misspelt word.\r
+ * @param ignoreAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void ignoreWord(boolean ignoreAll);\r
+\r
+ /** Set the action to add a new word into the dictionary. This will also replace the\r
+ * currently misspelt word.\r
+ *@param newWord The new word to add\r
+ */\r
+ public void addToDictionary(String newWord);\r
+\r
+ /** Set the action to terminate processing of the spell checker.\r
+ */\r
+ public void cancel();\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.EventListener;\r
+\r
+/**\r
+ * This is the event based listener interface.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public interface SpellCheckListener extends EventListener {\r
+ \r
+ /**\r
+ * Propagates the spelling errors to listeners.\r
+ * @param event The event to handle\r
+ */\r
+ public void spellingError(SpellCheckEvent event);\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.io.IOException;\r
+import java.util.ArrayList;\r
+import java.util.Enumeration;\r
+import java.util.HashMap;\r
+import java.util.Hashtable;\r
+import java.util.Iterator;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.Vector;\r
+\r
+import com.swabunga.spell.engine.Configuration;\r
+import com.swabunga.spell.engine.SpellDictionary;\r
+import com.swabunga.spell.engine.SpellDictionaryHashMap;\r
+import com.swabunga.spell.engine.Word;\r
+import com.swabunga.util.VectorUtility;\r
+\r
+\r
+/**\r
+ * This is the main class for spell checking (using the new event based spell\r
+ * checking). \r
+ * <p/>\r
+ * By default, the class makes a user dictionary to accumulate added words.\r
+ * Since this user directory has no file assign to persist added words, they\r
+ * will be retained for the duration of the spell checker instance.\r
+ * If you set a user dictionary like \r
+ * {@link com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap}\r
+ * to persist the added word, the user dictionary will have the possibility to\r
+ * grow and be available across differents invocations of the spell checker.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ * 19 June 2002\r
+ */\r
+public class SpellChecker {\r
+ /** Flag indicating that the Spell Check completed without any errors present*/\r
+ public static final int SPELLCHECK_OK = -1;\r
+ /** Flag indicating that the Spell Check completed due to user cancellation*/\r
+ public static final int SPELLCHECK_CANCEL = -2;\r
+\r
+ @SuppressWarnings("unchecked")\r
+private final Vector eventListeners = new Vector();\r
+ @SuppressWarnings("unchecked")\r
+private final Vector dictionaries = new Vector();\r
+ private SpellDictionary userdictionary;\r
+\r
+ private final Configuration config = Configuration.getConfiguration();\r
+\r
+ /**This variable holds all of the words that are to be always ignored */\r
+ @SuppressWarnings("unchecked")\r
+private Vector ignoredWords = new Vector();\r
+ @SuppressWarnings("unchecked")\r
+private Hashtable autoReplaceWords = new Hashtable();\r
+ \r
+ // added caching - bd\r
+ // For cached operation a separate user dictionary is required\r
+ @SuppressWarnings("unchecked")\r
+private Map cache;\r
+ private int threshold = 0;\r
+ private int cacheSize = 0;\r
+ \r
+\r
+ /**\r
+ * Constructs the SpellChecker.\r
+ */\r
+ public SpellChecker() {\r
+ try {\r
+ userdictionary = new SpellDictionaryHashMap();\r
+ } catch (IOException e) {\r
+ throw new RuntimeException("this exception should never happen because we are using null phonetic file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the SpellChecker. The default threshold is used\r
+ *\r
+ * @param dictionary The dictionary used for looking up words.\r
+ */\r
+ public SpellChecker(SpellDictionary dictionary) {\r
+ this();\r
+ addDictionary(dictionary);\r
+ }\r
+\r
+\r
+ /**\r
+ * Constructs the SpellChecker with a threshold\r
+ *\r
+ * @param dictionary the dictionary used for looking up words.\r
+ * @param threshold the cost value above which any suggestions are \r
+ * thrown away\r
+ */\r
+ public SpellChecker(SpellDictionary dictionary, int threshold) {\r
+ this(dictionary);\r
+ config.setInteger(Configuration.SPELL_THRESHOLD, threshold);\r
+ }\r
+\r
+ /**\r
+ * Accumulates a dictionary at the end of the dictionaries list used\r
+ * for looking up words. Adding a dictionary give the flexibility to\r
+ * assign the base language dictionary, then a more technical, then...\r
+ *\r
+ * @param dictionary the dictionary to add at the end of the dictionary list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void addDictionary(SpellDictionary dictionary) {\r
+ if (dictionary == null) {\r
+ throw new IllegalArgumentException("dictionary must be non-null");\r
+ }\r
+ this.dictionaries.addElement(dictionary);\r
+ }\r
+\r
+ /**\r
+ * Registers the user dictionary to which words are added.\r
+ *\r
+ * @param dictionary the dictionary to use when the user specify a new word\r
+ * to add.\r
+ */\r
+ public void setUserDictionary(SpellDictionary dictionary) {\r
+ userdictionary = dictionary;\r
+ }\r
+\r
+ /**\r
+ * Supply the instance of the configuration holding the spell checking engine\r
+ * parameters.\r
+ *\r
+ * @return Current Configuration\r
+ */\r
+ public Configuration getConfiguration() {\r
+ return config;\r
+ }\r
+\r
+ /**\r
+ * Adds a SpellCheckListener to the listeners list.\r
+ *\r
+ * @param listener The feature to be added to the SpellCheckListener attribute\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void addSpellCheckListener(SpellCheckListener listener) {\r
+ eventListeners.addElement(listener);\r
+ }\r
+\r
+\r
+ /**\r
+ * Removes a SpellCheckListener from the listeners list.\r
+ *\r
+ * @param listener The listener to be removed from the listeners list.\r
+ */\r
+ public void removeSpellCheckListener(SpellCheckListener listener) {\r
+ eventListeners.removeElement(listener);\r
+ }\r
+\r
+\r
+ /**\r
+ * Fires off a spell check event to the listeners.\r
+ *\r
+ * @param event The event that need to be processed by the spell checking\r
+ * system.\r
+ */\r
+ protected void fireSpellCheckEvent(SpellCheckEvent event) {\r
+ for (int i = eventListeners.size() - 1; i >= 0; i--) {\r
+ ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);\r
+ }\r
+ }\r
+\r
+\r
+ /**\r
+ * This method clears the words that are currently being remembered as\r
+ * <code>Ignore All</code> words and <code>Replace All</code> words.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void reset() {\r
+ ignoredWords = new Vector();\r
+ autoReplaceWords = new Hashtable();\r
+ }\r
+\r
+\r
+ /**\r
+ * Checks the text string.\r
+ * <p>\r
+ * Returns the corrected string.\r
+ *\r
+ * @param text The text that need to be spelled checked\r
+ * @return The text after spell checking\r
+ * @deprecated use checkSpelling(WordTokenizer)\r
+ */\r
+ @Deprecated\r
+public String checkString(String text) {\r
+ StringWordTokenizer tokens = new StringWordTokenizer(text);\r
+ checkSpelling(tokens);\r
+ return tokens.getContext();\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains at least a\r
+ * digit.\r
+ * Returns true if this word contains a digit.\r
+ *\r
+ * @param word The word to analyze for digit.\r
+ * @return true if the word contains at least a digit.\r
+ */\r
+ private final static boolean isDigitWord(String word) {\r
+ for (int i = word.length() - 1; i >= 0; i--) {\r
+ if (Character.isDigit(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ return false;\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains an Internet \r
+ * address. The method look for typical protocol or the habitual string \r
+ * in the word:\r
+ * <ul>\r
+ * <li>http://</li>\r
+ * <li>ftp://</li>\r
+ * <li>https://</li>\r
+ * <li>ftps://</li>\r
+ * <li>www.</li>\r
+ * </ul>\r
+ *\r
+ * One limitation is that this method cannot currently recognize email\r
+ * addresses. Since the 'word' that is passed in, may in fact contain\r
+ * the rest of the document to be checked, it is not (yet!) a good\r
+ * idea to scan for the @ character.\r
+ *\r
+ * @param word The word to analyze for an Internet address.\r
+ * @return true if this word looks like an Internet address.\r
+ */\r
+ public final static boolean isINETWord(String word) {\r
+ String lowerCaseWord = word.toLowerCase();\r
+ return lowerCaseWord.startsWith("http://") ||\r
+ lowerCaseWord.startsWith("www.") ||\r
+ lowerCaseWord.startsWith("ftp://") ||\r
+ lowerCaseWord.startsWith("https://") ||\r
+ lowerCaseWord.startsWith("ftps://");\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains all\r
+ * uppercases characters.\r
+ *\r
+ * @param word The word to analyze for uppercases characters\r
+ * @return true if this word contains all upper case characters\r
+ */\r
+ private final static boolean isUpperCaseWord(String word) {\r
+ for (int i = word.length() - 1; i >= 0; i--) {\r
+ if (Character.isLowerCase(word.charAt(i))) {\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains lower and\r
+ * upper cased characters. Note that a phrase beginning with an upper cased\r
+ * character is not considered a mixed case word.\r
+ *\r
+ * @param word The word to analyze for mixed cases characters\r
+ * @param startsSentence True if this word is at the start of a sentence\r
+ * @return true if this word contains mixed case characters\r
+ */\r
+ private final static boolean isMixedCaseWord(String word, boolean startsSentence) {\r
+ int strLen = word.length();\r
+ boolean isUpper = Character.isUpperCase(word.charAt(0));\r
+ //Ignore the first character if this word starts the sentence and the first\r
+ //character was upper cased, since this is normal behaviour\r
+ if ((startsSentence) && isUpper && (strLen > 1))\r
+ isUpper = Character.isUpperCase(word.charAt(1));\r
+ if (isUpper) {\r
+ for (int i = word.length() - 1; i > 0; i--) {\r
+ if (Character.isLowerCase(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ } else {\r
+ for (int i = word.length() - 1; i > 0; i--) {\r
+ if (Character.isUpperCase(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ }\r
+ return false;\r
+ }\r
+\r
+\r
+ /**\r
+ * This method will fire the spell check event and then handle the event\r
+ * action that has been selected by the user.\r
+ *\r
+ * @param tokenizer Description of the Parameter\r
+ * @param event The event to handle\r
+ * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {\r
+ fireSpellCheckEvent(event);\r
+ String word = event.getInvalidWord();\r
+ //Work out what to do in response to the event.\r
+ switch (event.getAction()) {\r
+ case SpellCheckEvent.INITIAL:\r
+ break;\r
+ case SpellCheckEvent.IGNORE:\r
+ break;\r
+ case SpellCheckEvent.IGNOREALL:\r
+ ignoreAll(word);\r
+ break;\r
+ case SpellCheckEvent.REPLACE:\r
+ tokenizer.replaceWord(event.getReplaceWord());\r
+ break;\r
+ case SpellCheckEvent.REPLACEALL:\r
+ String replaceAllWord = event.getReplaceWord();\r
+ if (!autoReplaceWords.containsKey(word)) {\r
+ autoReplaceWords.put(word, replaceAllWord);\r
+ }\r
+ tokenizer.replaceWord(replaceAllWord);\r
+ break;\r
+ case SpellCheckEvent.ADDTODICT:\r
+ String addWord = event.getReplaceWord();\r
+ if (!addWord.equals(word))\r
+ tokenizer.replaceWord(addWord);\r
+ userdictionary.addWord(addWord);\r
+ break;\r
+ case SpellCheckEvent.CANCEL:\r
+ return true;\r
+ default:\r
+ throw new IllegalArgumentException("Unhandled case.");\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Adds a word to the list of ignored words\r
+ * @param word The text of the word to ignore\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void ignoreAll(String word) {\r
+ if (!ignoredWords.contains(word)) {\r
+ ignoredWords.addElement(word);\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * Adds a word to the user dictionary\r
+ * @param word The text of the word to add\r
+ */\r
+ public void addToDictionary(String word) {\r
+ if (!userdictionary.isCorrect(word))\r
+ userdictionary.addWord(word);\r
+ }\r
+ \r
+ /**\r
+ * Indicates if a word is in the list of ignored words\r
+ * @param word The text of the word check\r
+ */\r
+ public boolean isIgnored(String word){\r
+ return ignoredWords.contains(word);\r
+ }\r
+ \r
+ /**\r
+ * Verifies if the word to analyze is contained in dictionaries. The order \r
+