--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.InputStreamReader;\r
+\r
+/**\r
+ * This class is based on Levenshtein Distance algorithms, and it calculates how similar two words are.\r
+ * If the words are identical, then the distance is 0. The more that the words have in common, the lower the distance value.\r
+ * The distance value is based on how many operations it takes to get from one word to the other. Possible operations are\r
+ * swapping characters, adding a character, deleting a character, and substituting a character.\r
+ * The resulting distance is the sum of these operations weighted by their cost, which can be set in the Configuration object.\r
+ * When there are multiple ways to convert one word into the other, the lowest cost distance is returned.\r
+ * <br/>\r
+ * Another way to think about this: what are the cheapest operations that would have to be done on the "original" word to end up\r
+ * with the "similar" word? Each operation has a cost, and these are added up to get the distance.\r
+ * <br/>\r
+ *\r
+ * @see com.swabunga.spell.engine.Configuration#COST_REMOVE_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_INSERT_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SUBST_CHARS\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SWAP_CHARS\r
+ *\r
+ */\r
+\r
+public class EditDistance {\r
+\r
+ /**\r
+ * Fetches the spell engine configuration properties.\r
+ */\r
+ public static Configuration config = Configuration.getConfiguration();\r
+\r
+ /**\r
+ * get the weights for each possible operation\r
+ */\r
+ static final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);\r
+ static final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);\r
+ static final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);\r
+ static final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);\r
+ static final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); \r
+\r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar) {\r
+ return getDistance(word,similar,null);\r
+ } \r
+ \r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar, int[][] matrix) {\r
+ /* JMH Again, there is no need to have a global class matrix variable\r
+ * in this class. I have removed it and made the getDistance static final\r
+ * DMV: I refactored this method to make it more efficient, more readable, and simpler.\r
+ * I also fixed a bug with how the distance was being calculated. You could get wrong\r
+ * distances if you compared ("abc" to "ab") depending on what you had setup your\r
+ * COST_REMOVE_CHAR and EDIT_INSERTION_COST values to - that is now fixed.\r
+ * WRS: I added a distance for case comparison, so a misspelling of "i" would be closer to "I" than\r
+ * to "a".\r
+ */\r
+\r
+ //Allocate memory outside of the loops. \r
+ int i;\r
+ int j;\r
+ int costOfSubst;\r
+ int costOfSwap;\r
+ int costOfDelete;\r
+ int costOfInsertion;\r
+ int costOfCaseChange;\r
+ \r
+ boolean isSwap;\r
+ char sourceChar = 0;\r
+ char otherChar = 0;\r
+ \r
+ int a_size = word.length() + 1;\r
+ int b_size = similar.length() + 1;\r
+ \r
+ \r
+ //Only allocate new memory if we need a bigger matrix. \r
+ if (matrix == null || matrix.length < a_size || matrix[0].length < b_size)\r
+ matrix = new int[a_size][b_size];\r
+ \r
+ matrix[0][0] = 0;\r
+\r
+ for (i = 1; i != a_size; ++i)\r
+ matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column\r
+\r
+ for (j = 1; j != b_size; ++j)\r
+ matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row\r
+\r
+ for (i = 1; i != a_size; ++i) {\r
+ sourceChar = word.charAt(i-1);\r
+ for (j = 1; j != b_size; ++j) {\r
+\r
+ otherChar = similar.charAt(j-1);\r
+ if (sourceChar == otherChar) {\r
+ matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up\r
+ continue;\r
+ }\r
+\r
+ costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];\r
+ //if needed, add up the cost of doing a swap\r
+ costOfSwap = Integer.MAX_VALUE;\r
+\r
+ isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 2) && word.charAt(i - 2) == otherChar;\r
+ if (isSwap)\r
+ costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];\r
+\r
+ costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];\r
+ costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];\r
+\r
+ costOfCaseChange = Integer.MAX_VALUE;\r
+ \r
+ if (equalIgnoreCase(sourceChar, otherChar))\r
+ costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];\r
+ \r
+ matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);\r
+ }\r
+ }\r
+\r
+ return matrix[a_size - 1][b_size - 1];\r
+ }\r
+\r
+ /**\r
+ * checks to see if the two charactors are equal ignoring case. \r
+ * @param ch1\r
+ * @param ch2\r
+ * @return boolean\r
+ */\r
+ private static boolean equalIgnoreCase(char ch1, char ch2) {\r
+ if (ch1 == ch2)\r
+ {\r
+ return true;\r
+ }\r
+ else\r
+ {\r
+ return (Character.toLowerCase(ch1) == Character.toLowerCase(ch2));\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * For debugging, this creates a string that represents the matrix. To read the matrix, look at any square. That is the cost to get from\r
+ * the partial letters along the top to the partial letters along the side.\r
+ * @param src - the source string that the matrix columns are based on\r
+ * @param dest - the dest string that the matrix rows are based on\r
+ * @param matrix - a two dimensional array of costs (distances)\r
+ * @return String\r
+ */\r
+ @SuppressWarnings("unused")\r
+static private String dumpMatrix(String src, String dest, int matrix[][]) {\r
+ StringBuffer s = new StringBuffer("");\r
+\r
+ int cols = matrix.length -1;\r
+ int rows = matrix[0].length -1;\r
+\r
+ for (int i = 0; i < cols + 1; i++) {\r
+ for (int j = 0; j < rows + 1; j++) {\r
+ if (i == 0 && j == 0) {\r
+ s.append("\n ");\r
+ continue;\r
+\r
+ }\r
+ if (i == 0) {\r
+ s.append("| ");\r
+ s.append(dest.charAt(j - 1));\r
+ continue;\r
+ }\r
+ if (j == 0) {\r
+ s.append(src.charAt(i - 1));\r
+ continue;\r
+ }\r
+ String num = Integer.toString(matrix[i - 1][j - 1]);\r
+ int padding = 4 - num.length();\r
+ s.append("|");\r
+ for (int k = 0; k < padding; k++)\r
+ s.append(' ');\r
+ s.append(num);\r
+ }\r
+ s.append('\n');\r
+ }\r
+ return s.toString();\r
+\r
+ }\r
+\r
+\r
+ static private int minimum(int a, int b, int c, int d, int e) {\r
+ int mi = a;\r
+ if (b < mi)\r
+ mi = b;\r
+ if (c < mi)\r
+ mi = c;\r
+ if (d < mi)\r
+ mi = d;\r
+ if (e < mi)\r
+ mi = e;\r
+\r
+ return mi;\r
+ }\r
+\r
+ /**\r
+ * For testing edit distances\r
+ * @param args an array of two strings we want to evaluate their distances.\r
+ * @throws java.lang.Exception when problems occurs during reading args.\r
+ */\r
+ public static void main(String[] args) throws Exception {\r
+ BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));\r
+ int[][] matrix = new int[0][0]; \r
+ while (true) {\r
+\r
+ String input1 = stdin.readLine();\r
+ if (input1 == null || input1.length() == 0)\r
+ break;\r
+\r
+ String input2 = stdin.readLine();\r
+ if (input2 == null || input2.length() == 0)\r
+ break;\r
+\r
+ System.out.println(EditDistance.getDistance(input1, input2,matrix));\r
+ }\r
+ System.out.println("done");\r
+ }\r
+}\r
+\r
+\r