+++ /dev/null
-/*\r
-Jazzy - a Java library for Spell Checking\r
-Copyright (C) 2001 Mindaugas Idzelis\r
-Full text of license can be found in LICENSE.txt\r
-\r
-This library is free software; you can redistribute it and/or\r
-modify it under the terms of the GNU Lesser General Public\r
-License as published by the Free Software Foundation; either\r
-version 2.1 of the License, or (at your option) any later version.\r
-\r
-This library is distributed in the hope that it will be useful,\r
-but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-Lesser General Public License for more details.\r
-\r
-You should have received a copy of the GNU Lesser General Public\r
-License along with this library; if not, write to the Free Software\r
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
-*/\r
-package com.swabunga.spell.event;\r
-\r
-import java.io.IOException;\r
-import java.util.ArrayList;\r
-import java.util.Enumeration;\r
-import java.util.HashMap;\r
-import java.util.Hashtable;\r
-import java.util.Iterator;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Vector;\r
-\r
-import com.swabunga.spell.engine.Configuration;\r
-import com.swabunga.spell.engine.SpellDictionary;\r
-import com.swabunga.spell.engine.SpellDictionaryHashMap;\r
-import com.swabunga.spell.engine.Word;\r
-import com.swabunga.util.VectorUtility;\r
-\r
-\r
-/**\r
- * This is the main class for spell checking (using the new event based spell\r
- * checking). \r
- * <p/>\r
- * By default, the class makes a user dictionary to accumulate added words.\r
- * Since this user directory has no file assign to persist added words, they\r
- * will be retained for the duration of the spell checker instance.\r
- * If you set a user dictionary like \r
- * {@link com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap}\r
- * to persist the added word, the user dictionary will have the possibility to\r
- * grow and be available across differents invocations of the spell checker.\r
- *\r
- * @author Jason Height (jheight@chariot.net.au)\r
- * 19 June 2002\r
- */\r
-public class SpellChecker {\r
- /** Flag indicating that the Spell Check completed without any errors present*/\r
- public static final int SPELLCHECK_OK = -1;\r
- /** Flag indicating that the Spell Check completed due to user cancellation*/\r
- public static final int SPELLCHECK_CANCEL = -2;\r
-\r
- @SuppressWarnings("unchecked")\r
-private final Vector eventListeners = new Vector();\r
- @SuppressWarnings("unchecked")\r
-private final Vector dictionaries = new Vector();\r
- private SpellDictionary userdictionary;\r
-\r
- private final Configuration config = Configuration.getConfiguration();\r
-\r
- /**This variable holds all of the words that are to be always ignored */\r
- @SuppressWarnings("unchecked")\r
-private Vector ignoredWords = new Vector();\r
- @SuppressWarnings("unchecked")\r
-private Hashtable autoReplaceWords = new Hashtable();\r
- \r
- // added caching - bd\r
- // For cached operation a separate user dictionary is required\r
- @SuppressWarnings("unchecked")\r
-private Map cache;\r
- private int threshold = 0;\r
- private int cacheSize = 0;\r
- \r
-\r
- /**\r
- * Constructs the SpellChecker.\r
- */\r
- public SpellChecker() {\r
- try {\r
- userdictionary = new SpellDictionaryHashMap();\r
- } catch (IOException e) {\r
- throw new RuntimeException("this exception should never happen because we are using null phonetic file");\r
- }\r
- }\r
-\r
- /**\r
- * Constructs the SpellChecker. The default threshold is used\r
- *\r
- * @param dictionary The dictionary used for looking up words.\r
- */\r
- public SpellChecker(SpellDictionary dictionary) {\r
- this();\r
- addDictionary(dictionary);\r
- }\r
-\r
-\r
- /**\r
- * Constructs the SpellChecker with a threshold\r
- *\r
- * @param dictionary the dictionary used for looking up words.\r
- * @param threshold the cost value above which any suggestions are \r
- * thrown away\r
- */\r
- public SpellChecker(SpellDictionary dictionary, int threshold) {\r
- this(dictionary);\r
- config.setInteger(Configuration.SPELL_THRESHOLD, threshold);\r
- }\r
-\r
- /**\r
- * Accumulates a dictionary at the end of the dictionaries list used\r
- * for looking up words. Adding a dictionary give the flexibility to\r
- * assign the base language dictionary, then a more technical, then...\r
- *\r
- * @param dictionary the dictionary to add at the end of the dictionary list.\r
- */\r
- @SuppressWarnings("unchecked")\r
-public void addDictionary(SpellDictionary dictionary) {\r
- if (dictionary == null) {\r
- throw new IllegalArgumentException("dictionary must be non-null");\r
- }\r
- this.dictionaries.addElement(dictionary);\r
- }\r
-\r
- /**\r
- * Registers the user dictionary to which words are added.\r
- *\r
- * @param dictionary the dictionary to use when the user specify a new word\r
- * to add.\r
- */\r
- public void setUserDictionary(SpellDictionary dictionary) {\r
- userdictionary = dictionary;\r
- }\r
-\r
- /**\r
- * Supply the instance of the configuration holding the spell checking engine\r
- * parameters.\r
- *\r
- * @return Current Configuration\r
- */\r
- public Configuration getConfiguration() {\r
- return config;\r
- }\r
-\r
- /**\r
- * Adds a SpellCheckListener to the listeners list.\r
- *\r
- * @param listener The feature to be added to the SpellCheckListener attribute\r
- */\r
- @SuppressWarnings("unchecked")\r
-public void addSpellCheckListener(SpellCheckListener listener) {\r
- eventListeners.addElement(listener);\r
- }\r
-\r
-\r
- /**\r
- * Removes a SpellCheckListener from the listeners list.\r
- *\r
- * @param listener The listener to be removed from the listeners list.\r
- */\r
- public void removeSpellCheckListener(SpellCheckListener listener) {\r
- eventListeners.removeElement(listener);\r
- }\r
-\r
-\r
- /**\r
- * Fires off a spell check event to the listeners.\r
- *\r
- * @param event The event that need to be processed by the spell checking\r
- * system.\r
- */\r
- protected void fireSpellCheckEvent(SpellCheckEvent event) {\r
- for (int i = eventListeners.size() - 1; i >= 0; i--) {\r
- ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);\r
- }\r
- }\r
-\r
-\r
- /**\r
- * This method clears the words that are currently being remembered as\r
- * <code>Ignore All</code> words and <code>Replace All</code> words.\r
- */\r
- @SuppressWarnings("unchecked")\r
-public void reset() {\r
- ignoredWords = new Vector();\r
- autoReplaceWords = new Hashtable();\r
- }\r
-\r
-\r
- /**\r
- * Checks the text string.\r
- * <p>\r
- * Returns the corrected string.\r
- *\r
- * @param text The text that need to be spelled checked\r
- * @return The text after spell checking\r
- * @deprecated use checkSpelling(WordTokenizer)\r
- */\r
- @Deprecated\r
-public String checkString(String text) {\r
- StringWordTokenizer tokens = new StringWordTokenizer(text);\r
- checkSpelling(tokens);\r
- return tokens.getContext();\r
- }\r
-\r
-\r
- /**\r
- * Verifies if the word that is being spell checked contains at least a\r
- * digit.\r
- * Returns true if this word contains a digit.\r
- *\r
- * @param word The word to analyze for digit.\r
- * @return true if the word contains at least a digit.\r
- */\r
- private final static boolean isDigitWord(String word) {\r
- for (int i = word.length() - 1; i >= 0; i--) {\r
- if (Character.isDigit(word.charAt(i))) {\r
- return true;\r
- }\r
- }\r
- return false;\r
- }\r
-\r
-\r
- /**\r
- * Verifies if the word that is being spell checked contains an Internet \r
- * address. The method look for typical protocol or the habitual string \r
- * in the word:\r
- * <ul>\r
- * <li>http://</li>\r
- * <li>ftp://</li>\r
- * <li>https://</li>\r
- * <li>ftps://</li>\r
- * <li>www.</li>\r
- * </ul>\r
- *\r
- * One limitation is that this method cannot currently recognize email\r
- * addresses. Since the 'word' that is passed in, may in fact contain\r
- * the rest of the document to be checked, it is not (yet!) a good\r
- * idea to scan for the @ character.\r
- *\r
- * @param word The word to analyze for an Internet address.\r
- * @return true if this word looks like an Internet address.\r
- */\r
- public final static boolean isINETWord(String word) {\r
- String lowerCaseWord = word.toLowerCase();\r
- return lowerCaseWord.startsWith("http://") ||\r
- lowerCaseWord.startsWith("www.") ||\r
- lowerCaseWord.startsWith("ftp://") ||\r
- lowerCaseWord.startsWith("https://") ||\r
- lowerCaseWord.startsWith("ftps://");\r
- }\r
-\r
-\r
- /**\r
- * Verifies if the word that is being spell checked contains all\r
- * uppercases characters.\r
- *\r
- * @param word The word to analyze for uppercases characters\r
- * @return true if this word contains all upper case characters\r
- */\r
- private final static boolean isUpperCaseWord(String word) {\r
- for (int i = word.length() - 1; i >= 0; i--) {\r
- if (Character.isLowerCase(word.charAt(i))) {\r
- return false;\r
- }\r
- }\r
- return true;\r
- }\r
-\r
-\r
- /**\r
- * Verifies if the word that is being spell checked contains lower and\r
- * upper cased characters. Note that a phrase beginning with an upper cased\r
- * character is not considered a mixed case word.\r
- *\r
- * @param word The word to analyze for mixed cases characters\r
- * @param startsSentence True if this word is at the start of a sentence\r
- * @return true if this word contains mixed case characters\r
- */\r
- private final static boolean isMixedCaseWord(String word, boolean startsSentence) {\r
- int strLen = word.length();\r
- boolean isUpper = Character.isUpperCase(word.charAt(0));\r
- //Ignore the first character if this word starts the sentence and the first\r
- //character was upper cased, since this is normal behaviour\r
- if ((startsSentence) && isUpper && (strLen > 1))\r
- isUpper = Character.isUpperCase(word.charAt(1));\r
- if (isUpper) {\r
- for (int i = word.length() - 1; i > 0; i--) {\r
- if (Character.isLowerCase(word.charAt(i))) {\r
- return true;\r
- }\r
- }\r
- } else {\r
- for (int i = word.length() - 1; i > 0; i--) {\r
- if (Character.isUpperCase(word.charAt(i))) {\r
- return true;\r
- }\r
- }\r
- }\r
- return false;\r
- }\r
-\r
-\r
- /**\r
- * This method will fire the spell check event and then handle the event\r
- * action that has been selected by the user.\r
- *\r
- * @param tokenizer Description of the Parameter\r
- * @param event The event to handle\r
- * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue\r
- */\r
- @SuppressWarnings("unchecked")\r
-protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {\r
- fireSpellCheckEvent(event);\r
- String word = event.getInvalidWord();\r
- //Work out what to do in response to the event.\r
- switch (event.getAction()) {\r
- case SpellCheckEvent.INITIAL:\r
- break;\r
- case SpellCheckEvent.IGNORE:\r
- break;\r
- case SpellCheckEvent.IGNOREALL:\r
- ignoreAll(word);\r
- break;\r
- case SpellCheckEvent.REPLACE:\r
- tokenizer.replaceWord(event.getReplaceWord());\r
- break;\r
- case SpellCheckEvent.REPLACEALL:\r
- String replaceAllWord = event.getReplaceWord();\r
- if (!autoReplaceWords.containsKey(word)) {\r
- autoReplaceWords.put(word, replaceAllWord);\r
- }\r
- tokenizer.replaceWord(replaceAllWord);\r
- break;\r
- case SpellCheckEvent.ADDTODICT:\r
- String addWord = event.getReplaceWord();\r
- if (!addWord.equals(word))\r
- tokenizer.replaceWord(addWord);\r
- userdictionary.addWord(addWord);\r
- break;\r
- case SpellCheckEvent.CANCEL:\r
- return true;\r
- default:\r
- throw new IllegalArgumentException("Unhandled case.");\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * Adds a word to the list of ignored words\r
- * @param word The text of the word to ignore\r
- */\r
- @SuppressWarnings("unchecked")\r
-public void ignoreAll(String word) {\r
- if (!ignoredWords.contains(word)) {\r
- ignoredWords.addElement(word);\r
- }\r
- }\r
- \r
- /**\r
- * Adds a word to the user dictionary\r
- * @param word The text of the word to add\r
- */\r
- public void addToDictionary(String word) {\r
- if (!userdictionary.isCorrect(word))\r
- userdictionary.addWord(word);\r
- }\r
- \r
- /**\r
- * Indicates if a word is in the list of ignored words\r
- * @param word The text of the word check\r
- */\r
- public boolean isIgnored(String word){\r
- return ignoredWords.contains(word);\r
- }\r
- \r
- /**\r
- * Verifies if the word to analyze is contained in dictionaries. The order \r
- * of dictionary lookup is:\r
- * <ul>\r
- * <li>The default user dictionary or the one set through \r
- * {@link SpellChecker#setUserDictionary}</li>\r
- * <li>The dictionary specified at construction time, if any.</li>\r
- * <li>Any dictionary in the order they were added through \r
- * {@link SpellChecker#addDictionary}</li>\r
- * </ul>\r
- *\r
- * @param word The word to verify that it's spelling is known.\r
- * @return true if the word is in a dictionary.\r
- */\r
- @SuppressWarnings("unchecked")\r
-public boolean isCorrect(String word) {\r
- if (userdictionary.isCorrect(word)) return true;\r
- for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {\r
- SpellDictionary dictionary = (SpellDictionary) e.nextElement();\r
- if (dictionary.isCorrect(word)) return true;\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * Produces a list of suggested word after looking for suggestions in various\r
- * dictionaries. The order of dictionary lookup is:\r
- * <ul>\r
- * <li>The default user dictionary or the one set through \r
- * {@link SpellChecker#setUserDictionary}</li>\r
- * <li>The dictionary specified at construction time, if any.</li>\r
- * <li>Any dictionary in the order they were added through \r
- * {@link SpellChecker#addDictionary}</li>\r
- * </ul>\r
- *\r
- * @param word The word for which we want to gather suggestions\r
- * @param threshold the cost value above which any suggestions are \r
- * thrown away\r
- * @return the list of words suggested\r
- */\r
- @SuppressWarnings("unchecked")\r
-public List getSuggestions(String word, int threshold) {\r
- if (this.threshold != threshold && cache != null) {\r
- this.threshold = threshold;\r
- cache.clear();\r
- }\r
- \r
- ArrayList suggestions = null;\r
- \r
- if (cache != null)\r
- suggestions = (ArrayList) cache.get(word);\r
-\r
- if (suggestions == null) {\r
- suggestions = new ArrayList(50);\r
- \r
- for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {\r
- SpellDictionary dictionary = (SpellDictionary) e.nextElement();\r
- \r
- if (dictionary != userdictionary)\r
- VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false);\r
- }\r
-\r
- if (cache != null && cache.size() < cacheSize)\r
- cache.put(word, suggestions);\r
- }\r
- \r
- VectorUtility.addAll(suggestions, userdictionary.getSuggestions(word, threshold), false);\r
- suggestions.trimToSize();\r
- \r
- return suggestions;\r
- }\r
-\r
- /**\r
- * Activates a cache with the maximum number of entries set to 300\r
- */\r
- public void setCache() {\r
- setCache(300);\r
- }\r
-\r
- /**\r
- * Activates a cache with specified size\r
- * @param size - max. number of cache entries (0 to disable chache)\r
- */\r
- @SuppressWarnings("unchecked")\r
-public void setCache(int size) {\r
- cacheSize = size;\r
- if (size == 0)\r
- cache = null;\r
- else\r
- cache = new HashMap((size + 2) / 3 * 4);\r
- }\r
-\r
- /**\r
- * This method is called to check the spelling of the words that are returned\r
- * by the WordTokenizer.\r
- * <p/>\r
- * For each invalid word the action listeners will be informed with a new \r
- * SpellCheckEvent.<p>\r
- *\r
- * @param tokenizer The media containing the text to analyze.\r
- * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that\r
- * are found BEFORE any corrections are made.\r
- */\r
- @SuppressWarnings("unchecked")\r
-public final int checkSpelling(WordTokenizer tokenizer) {\r
- int errors = 0;\r
- boolean terminated = false;\r
- //Keep track of the previous word\r
-// String previousWord = null;\r
- while (tokenizer.hasMoreWords() && !terminated) {\r
- String word = tokenizer.nextWord();\r
- //Check the spelling of the word\r
- if (!isCorrect(word)) {\r
- if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) ||\r
- (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||\r
- (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||\r
- (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {\r
- //Null event. Since we are ignoring this word due\r
- //to one of the above cases.\r
- } else {\r
- //We cant ignore this misspelt word\r
- //For this invalid word are we ignoring the misspelling?\r
- if (!isIgnored(word)) {\r
- errors++;\r
- //Is this word being automagically replaced\r
- if (autoReplaceWords.containsKey(word)) {\r
- tokenizer.replaceWord((String) autoReplaceWords.get(word));\r
- } else {\r
- //JMH Need to somehow capitalise the suggestions if\r
- //ignoreSentenceCapitalisation is not set to true\r
- //Fire the event.\r
- List suggestions = getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD));\r
- if (capitalizeSuggestions(word, tokenizer))\r
- suggestions = makeSuggestionsCapitalized(suggestions);\r
- SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer);\r
- terminated = fireAndHandleEvent(tokenizer, event);\r
- }\r
- }\r
- }\r
- } else {\r
- //This is a correctly spelt word. However perform some extra checks\r
- /*\r
- * JMH TBD //Check for multiple words\r
- * if (!ignoreMultipleWords &&) {\r
- * }\r
- */\r
- //Check for capitalisation\r
- if (isSupposedToBeCapitalized(word, tokenizer)) {\r
- errors++;\r
- StringBuffer buf = new StringBuffer(word);\r
- buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));\r
- Vector suggestion = new Vector();\r
- suggestion.addElement(new Word(buf.toString(), 0));\r
- SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);\r
- terminated = fireAndHandleEvent(tokenizer, event);\r
- }\r
- }\r
- }\r
- if (terminated)\r
- return SPELLCHECK_CANCEL;\r
- else if (errors == 0)\r
- return SPELLCHECK_OK;\r
- else\r
- return errors;\r
- }\r
- \r
- \r
- @SuppressWarnings("unchecked")\r
-private List makeSuggestionsCapitalized(List suggestions) {\r
- Iterator iterator = suggestions.iterator();\r
- while(iterator.hasNext()) {\r
- Word word = (Word)iterator.next();\r
- String suggestion = word.getWord();\r
- StringBuffer stringBuffer = new StringBuffer(suggestion);\r
- stringBuffer.setCharAt(0, Character.toUpperCase(suggestion.charAt(0)));\r
- word.setWord(stringBuffer.toString());\r
- }\r
- return suggestions;\r
- }\r
-\r
- \r
- private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) {\r
- boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);\r
- return configCapitalize && wordTokenizer.isNewSentence() && Character.isLowerCase(word.charAt(0));\r
- } \r
-\r
- private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) {\r
- // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should also be capitalized\r
- // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a sentence\r
- boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);\r
- boolean uppercase = Character.isUpperCase(word.charAt(0));\r
- return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase);\r
- }\r
-}\r