-NeverNote:: Evernote client clone for Linux, Mac OS X and Windows
+NixNote:: Evernote client clone for Linux, Mac OS X and Windows
Copyright 2009-2010, Randy Baumgarte
Licensed under GNU General Public Lisence version 2
<?xml version='1.0' encoding='utf-8'?>
<project default="jar" basedir=".">
<!-- program name -->
- <property name="project.name" value="nevernote"/>
+ <property name="project.name" value="nixnote"/>
<property name="application.name" value="sandbox"/>
<!-- targeted QtJambi library version -->
<property name="jvm.gcs" value="incgc"/><!-- GC strategy -->
<!-- product jar file -->
- <property name="jar.name" value="nevernote.jar"/>
+ <property name="jar.name" value="nixnote.jar"/>
<!-- product exec command -->
- <property name="shell.command" value="nevernote.sh"/>
- <property name="bat.command" value="nevernote.bat"/>
+ <property name="shell.command" value="nixnote.sh"/>
+ <property name="bat.command" value="nixnote.bat"/>
<!-- qt utils -->
<property name="linguist.update" value="lupdate"/>
<property name="linguist.release" value="lrelease"/>
<!-- translations -->
- <property name="linguist.project" value="nevernote.pro"/>
+ <property name="linguist.project" value="nixnote.pro"/>
<!-- documents -->
<property name="doc.changelog" value="changelog.txt"/>
</target>
<target name="resources" depends="init,release-tr">
- <mkdir dir="${classes.dir}/cx/fbn/nevernote/icons"/>
- <copy todir="${classes.dir}/cx/fbn/nevernote/icons">
- <fileset dir="${src.dir}/cx/fbn/nevernote/icons"/>
+ <mkdir dir="${classes.dir}/cx/fbn/nixnote/icons"/>
+ <copy todir="${classes.dir}/cx/fbn/nixnote/icons">
+ <fileset dir="${src.dir}/cx/fbn/nixnote/icons"/>
</copy>
<mkdir dir="${classes.dir}/translations"/>
<copy todir="${classes.dir}/translations">
<fileset dir="${doc.dir}"/>
</copy>
<exec dir="dist" executable="tar">
- <arg line="czf ../../nevernote-bin.tar.gz ."/>
+ <arg line="czf ../../nixnote-bin.tar.gz ."/>
</exec>
</target>
+NixNote 1.0
+Changes from 0.99
+
+- Changed from NeverNote to NixNote
+- Added support for note linking. It is still feels a little clunky but it seems usable.
+- Added indexing options to restrict what is indexed and to permit special characters that won't be removed from words. You need to reindex your database when these values change.
+- Cleaned up the Android HTML Entities fix. It is still disabled by default and has a few issues, but it seems to be a little better now.
+- Fixed a problem where the LaTeX editor wouldn't be shown under some circumstances.
+- Fixed a problem where opening the current note in an external window might cause recent edits to be lost.
+- Fixed a problem where linked notebooks were not authorizing properly.
+- Fixed a problem where linked notebooks would interrupt a sync if authority for the user was revoked. It will now prompt on what should be done.
+- Fixed a problem where changes made to automatic import folders were not saved.
+- Fixed a problem where Cut was not Ctrl+C, but center text was. Center text was changed to Ctrl+E.
+- Fixed a problem where encrypting the database resulted in problems with images & attachments and the index table.
+- Added the ability to edit a note's HTML source (VERY EXPERIMENTAL).
+
+-------------------------------------------------------------
+
+
NeverNote 0.98
Changes from 0.97
exit 1
fi
-cp $package_dir/usr/share/applications/nevernote.desktop /usr/share/applications/nevernote.desktop
-mkdir /usr/share/nevernote
-cp -r $package_dir/usr/share/nevernote/* /usr/share/nevernote/
+cp $package_dir/usr/share/applications/nixnote.desktop /usr/share/applications/nixnote.desktop
+mkdir /usr/share/nixnote
+cp -r $package_dir/usr/share/nixnote/* /usr/share/nixnote/
echo "Install complete"
-Welcome to NeverNote.
+Welcome to NixNote.
This is a very basic clone of Evernote designed to run on Linux.
It is written in Java so it will also run on other platforms as well but the primary focus has been to try
-- NOTE: I don't have OS-X so I can't verify the stability or usability of this under OS-X
1.) Download the Mac install version.
2.) Run the install program.
-3.) Run nevernote.sh from the installation directory.
+3.) Run nixnote.sh from the installation directory.
=====================
There are additional options if you wish to run multiple copies under the same userid and options which impact how Java works. These settings are optional and, depending upon your needs, you probably don't need to touch them.
-To run under multiple IDs, you need to pass a parameter NN_NAME="<name>" to the nevernote.sh shell script where <name> is whatever name you want to identify this instance as. For example, ./nevernote.sh NN_NAME="test" will create a separate database called "test". Anything you put in there will be separate from the default NeverNote database, so the username can also be different.
+To run under multiple IDs, you need to pass a parameter NN_NAME="<name>" to the nixnote.sh shell script where <name> is whatever name you want to identify this instance as. For example, ./nixnote.sh NN_NAME="test" will create a separate database called "test". Anything you put in there will be separate from the default NeverNote database, so the username can also be different.
<html><head></head>
<body>
-NeverNote Version 0.90
+NixNote 1.0
<p>
-NeverNote is licensed under the Gnu Public License (GPL) version 2.
+NixNote is licensed under the Gnu Public License (GPL) version 2.
<p>
Evernote is Copyright © 2000-2010 Evernote Corporation.All rights reserved.
<p>
-SQLite database engine is in the public domain.
-<p>
Qt and Jambi the licensed property of Nokia Corporation and/or its subsidiaries. Nokia, Qt and their respective logos are trademarks of Nokia Corporation in Finland and/or other countries worldwide.
<p>
All other trademarks are property of their respective owners.
-NeverNote Version 0.80
+NixNote
NeverNote is licensed under the Gnu Public License (GPL) version 2.
Evernote is Copyright © 2000-2010 Evernote Corporation.All rights reserved.
-SQLite database engine is in the public domain.
-
Qt and Jambi the licensed property of Nokia Corporation and/or its subsidiaries. Nokia, Qt and their respective logos are trademarks of Nokia Corporation in Finland and/or other countries worldwide.
All other trademarks are property of their respective owners.
+++ /dev/null
-#! /bin/sh
-/usr/share/nevernote/nevernote.sh $*
#! /bin/sh
###########################################
-# NeverNote Startup script for OS-X
+# NixNote Startup script for OS-X
###########################################
# The ones below are examples only. #
###########################################
-NEVERNOTE=$(cd `dirname $0` && pwd)
-# NEVERNOTE=/usr/share/nevernote
+NIXNOTE=$(cd `dirname $0` && pwd)
+# NIXNOTE=/usr/share/nixnote
########################################
# Memory settings. These can be tuned #
########################################
# This next variable is optional. It #
# is only needed if you want to run #
-# multiple copies of NeverNote under #
+# multiple copies of NixNote under #
# the same Linux user id. Each #
# additional copy (after the first) #
# should have a unique name. This #
#####################
# Setup environment #
#####################
-NN_CLASSPATH=$NEVERNOTE/nevernote.jar
-
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/PDFRenderer.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/apache-mime4j-0.6.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-codec-1.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-compress-1.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-lang-2.4.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-logging-1.1.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/evernote.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/h2-1.2.147.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpclient-4.0.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpcore-4.0.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpmime-4.0.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jaxen-1.1.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jazzy.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jtidy-r938.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/libthrift.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/log4j-1.2.14.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/pdfbox-app-1.3.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-ooxml-3.7.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-ooxml-schemas-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-scratchpad-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/tika.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/xmlbeans-2.3.0.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/xsdlib-20060615.jar
-
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-macosx-4.5.2_01.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-macosx-gcc-4.5.2_01.jar
+NN_CLASSPATH=$NIXNOTE/nixnote.jar
+
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/PDFRenderer.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/apache-mime4j-0.6.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-codec-1.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-compress-1.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-lang-2.4.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-logging-1.1.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/evernote.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/h2-1.2.147.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpclient-4.0.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpcore-4.0.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpmime-4.0.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jaxen-1.1.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jazzy.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jtidy-r938.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/libthrift.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/log4j-1.2.14.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/pdfbox-app-1.3.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-ooxml-3.7.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-ooxml-schemas-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-scratchpad-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/tika.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/xmlbeans-2.3.0.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/xsdlib-20060615.jar
+
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-macosx-4.5.2_01.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-macosx-gcc-4.5.2_01.jar
###################
# Run the program #
###################
-cd $NEVERNOTE
+cd $NIXNOTE
java -Xmx$NN_XMX -Xms$NN_XMS -XX:NewRatio=$NN_NEW_RATIO $NN_GC_OPT $NN_DEBUG -classpath $NN_CLASSPATH cx.fbn.nevernote.NeverNote --name=$NN_NAME -XstartOnFirstThread -d32 -client
rem #####################\r
rem # Install variables #\r
rem #####################\r
-set NEVERNOTE=%~dp0\r
+set NIXNOTE=%~dp0\r
\r
rem ########################################\r
rem # Memory settings. These can be tuned #\r
rem ########################################\r
rem # This next variable is optional. It #\r
rem # is only needed if you want to run #\r
-rem # multiple copies of NeverNote under #\r
-rem # the same Linux user id. Each #\r
+rem # multiple copies of NixNote under #\r
+rem # the same user id. Each #\r
rem # additional copy (after the first) #\r
rem # should have a unique name. This #\r
rem # permits the settings to be saved #\r
rem #####################\r
rem # Setup environment #\r
rem #####################\r
-set NN_CLASSPATH=%NEVERNOTE%nevernote.jar\r
-\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\PDFRenderer.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\apache-mime4j-0.6.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\commons-codec-1.3.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\commons-compress-1.1.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\commons-lang-2.4.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\commons-logging-1.1.1.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\evernote.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\h2-1.2.147.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\httpclient-4.0.3.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\httpcore-4.0.1.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\httpmime-4.0.3.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\jaxen-1.1.3.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\jazzy.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\jtidy-r938.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\libthrift.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\log4j-1.2.14.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\pdfbox-app-1.3.1.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\poi-3.7-20101029.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\poi-ooxml-3.7.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\poi-ooxml-schemas-3.7-20101029.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\poi-scratchpad-3.7-20101029.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\tika.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\xmlbeans-2.3.0.jar\r
-set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\xsdlib-20060615.jar\r
-\r
-if exist "%NEVERNOTE%lib\qtjambi-win32-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\qtjambi-win32-4.5.2_01.jar\r
-if exist "%NEVERNOTE%lib\qtjambi-win32-msvc2005-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\qtjambi-win32-msvc2005-4.5.2_01.jar\r
-if exist "%NEVERNOTE%lib\qtjambi-win64-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\qtjambi-win64-4.5.2_01.jar\r
-if exist "%NEVERNOTE%lib\qtjambi-win64-msvc2005x64-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NEVERNOTE%lib\qtjambi-win64-msvc2005x64-4.5.2_01.jar\r
+set NN_CLASSPATH=%NIXNOTE%nixnote.jar\r
+\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\PDFRenderer.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\apache-mime4j-0.6.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\commons-codec-1.3.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\commons-compress-1.1.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\commons-lang-2.4.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\commons-logging-1.1.1.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\evernote.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\h2-1.2.147.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\httpclient-4.0.3.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\httpcore-4.0.1.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\httpmime-4.0.3.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\jaxen-1.1.3.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\jazzy.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\jtidy-r938.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\libthrift.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\log4j-1.2.14.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\pdfbox-app-1.3.1.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\poi-3.7-20101029.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\poi-ooxml-3.7.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\poi-ooxml-schemas-3.7-20101029.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\poi-scratchpad-3.7-20101029.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\tika.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\xmlbeans-2.3.0.jar\r
+set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\xsdlib-20060615.jar\r
+\r
+if exist "%NIXNOTE%lib\qtjambi-win32-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\qtjambi-win32-4.5.2_01.jar\r
+if exist "%NIXNOTE%lib\qtjambi-win32-msvc2005-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\qtjambi-win32-msvc2005-4.5.2_01.jar\r
+if exist "%NIXNOTE%lib\qtjambi-win64-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\qtjambi-win64-4.5.2_01.jar\r
+if exist "%NIXNOTE%lib\qtjambi-win64-msvc2005x64-4.5.2_01.jar" set NN_CLASSPATH=%NN_CLASSPATH%;%NIXNOTE%lib\qtjambi-win64-msvc2005x64-4.5.2_01.jar\r
\r
rem set NN_CLASSPATH="%NN_CLASSPATH%"\r
\r
[Desktop Entry]
-Name=NeverNote
+Name=NixNote
Comment=Use with Evernote to remember everything
GenericName=Evernote-clone
-Exec=/usr/share/nevernote/nevernote.sh
-Icon=/usr/share/nevernote/nevernote.png
+Exec=/usr/share/nixnote/nixnote.sh
+Icon=/usr/share/nixnote/nixnote.png
StartupNotify=true
Terminal=false
Type=Application
# The ones below are examples only. #
###########################################
-NEVERNOTE=$(cd `dirname $0` && pwd)
-# NEVERNOTE=/usr/share/nevernote
+NIXNOTE=$(cd `dirname $0` && pwd)
+# NIXNOTE=/usr/share/nixnote
########################################
# Memory settings. These can be tuned #
########################################
# This next variable is optional. It #
# is only needed if you want to run #
-# multiple copies of NeverNote under #
+# multiple copies of NixNote under #
# the same Linux user id. Each #
# additional copy (after the first) #
# should have a unique name. This #
#####################
# Setup environment #
#####################
-NN_CLASSPATH=$NEVERNOTE/nevernote.jar
-
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/PDFRenderer.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/apache-mime4j-0.6.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-codec-1.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-compress-1.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-lang-2.4.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/commons-logging-1.1.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/evernote.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/h2-1.2.147.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpclient-4.0.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpcore-4.0.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/httpmime-4.0.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jaxen-1.1.3.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jazzy.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/jtidy-r938.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/libthrift.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/log4j-1.2.14.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/pdfbox-app-1.3.1.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-ooxml-3.7.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-ooxml-schemas-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/poi-scratchpad-3.7-20101029.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/tika.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/xmlbeans-2.3.0.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/xsdlib-20060615.jar
-
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-linux32-4.5.2_01.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-linux32-gcc-4.5.2_01.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-linux64-4.5.2_01.jar
-NN_CLASSPATH=$NN_CLASSPATH:$NEVERNOTE/lib/qtjambi-linux64-gcc-4.5.2_01.jar
+NN_CLASSPATH=$NIXNOTE/nixnote.jar
+
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/PDFRenderer.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/apache-mime4j-0.6.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-codec-1.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-compress-1.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-lang-2.4.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/commons-logging-1.1.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/evernote.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/h2-1.2.147.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpclient-4.0.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpcore-4.0.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/httpmime-4.0.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jaxen-1.1.3.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jazzy.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/jtidy-r938.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/libthrift.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/log4j-1.2.14.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/pdfbox-app-1.3.1.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-ooxml-3.7.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-ooxml-schemas-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/poi-scratchpad-3.7-20101029.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/tika.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/xmlbeans-2.3.0.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/xsdlib-20060615.jar
+
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-linux32-4.5.2_01.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-linux32-gcc-4.5.2_01.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-linux64-4.5.2_01.jar
+NN_CLASSPATH=$NN_CLASSPATH:$NIXNOTE/lib/qtjambi-linux64-gcc-4.5.2_01.jar
###################
# Run the program #
###################
-cd $NEVERNOTE
+cd $NIXNOTE
java -Xmx$NN_XMX -Xms$NN_XMS -XX:NewRatio=$NN_NEW_RATIO $NN_GC_OPT $NN_DEBUG -classpath $NN_CLASSPATH cx.fbn.nevernote.NeverNote --name=$NN_NAME
cd -
--- /dev/null
+#! /bin/sh
+/usr/share/nixnote/nixnote.sh $*
-Package: nevernote
-Version: 0.98
+Package: nixnote
+Version: 1.0
Section: x11
Priority: optional
Architecture: amd64
#!/bin/sh
-version="0.98"
+version="1.0"
arch="i386"
qtversion="4.5.2_01"
package_dir=$(cd `dirname $0` && pwd)
-destination="$package_dir/contents/usr/share/nevernote"
+destination="$package_dir/contents/usr/share/nixnote"
source_dir="../.."
qtlibs="../../../bitrock/lib"
qtarch="32"
# Cleanup any old stuff
-if [ -e "$package_dir/nevernote-${version}_${arch}.deb" ]
+if [ -e "$package_dir/nixnote-${version}_${arch}.deb" ]
then
- rm $package_dir/nevernote-${version}_${arch}.deb
+ rm $package_dir/nixnote-${version}_${arch}.deb
fi
-if [ -e "$package_dir/nevernote-${version}_${arch}.rpm" ]
+if [ -e "$package_dir/nixnote-${version}_${arch}.rpm" ]
then
- rm $package_dir/nevernote-${version}_${arch}.rpm
+ rm $package_dir/nixnote-${version}_${arch}.rpm
fi
############################
mkdir $package_dir/contents/usr/
mkdir $package_dir/contents/usr/share
mkdir $package_dir/contents/usr/share/applications
-mkdir $package_dir/contents/usr/share/nevernote
+mkdir $package_dir/contents/usr/share/nixnote
mkdir $package_dir/contents/usr/share/man
mkdir $package_dir/contents/usr/bin/
# Copy startup script & images
-cp $source_dir/nevernote.sh $package_dir/contents/usr/share/nevernote/
-cp $source_dir/*.txt $package_dir/contents/usr/share/nevernote/
-cp $source_dir/*.html $package_dir/contents/usr/share/nevernote/
-cp $source_dir/*.png $package_dir/contents/usr/share/nevernote/
-cp $source_dir/nevernote.desktop $package_dir/contents/usr/share/applications
-cp $source_dir/nevernote_path.sh $package_dir/contents/usr/bin/nevernote.sh
+cp $source_dir/nixnote.sh $package_dir/contents/usr/share/nixnote/
+cp $source_dir/*.txt $package_dir/contents/usr/share/nixnote/
+cp $source_dir/*.html $package_dir/contents/usr/share/nixnote/
+cp $source_dir/*.png $package_dir/contents/usr/share/nixnote/
+cp $source_dir/nixnote.desktop $package_dir/contents/usr/share/applications
+cp $source_dir/nixnote_path.sh $package_dir/contents/usr/bin/nixnote.sh
# Copy subdirectories
-cp -r $source_dir/images $package_dir/contents/usr/share/nevernote/
-cp -r $source_dir/lib $package_dir/contents/usr/share/nevernote/
-cp -r $source_dir/qss $package_dir/contents/usr/share/nevernote/
-cp -r $source_dir/spell $package_dir/contents/usr/share/nevernote/
-cp -r $source_dir/translations $package_dir/contents/usr/share/nevernote/
-cp -r $source_dir/xml $package_dir/contents/usr/share/nevernote/
+cp -r $source_dir/images $package_dir/contents/usr/share/nixnote/
+cp -r $source_dir/lib $package_dir/contents/usr/share/nixnote/
+cp -r $source_dir/qss $package_dir/contents/usr/share/nixnote/
+cp -r $source_dir/spell $package_dir/contents/usr/share/nixnote/
+cp -r $source_dir/translations $package_dir/contents/usr/share/nixnote/
+cp -r $source_dir/xml $package_dir/contents/usr/share/nixnote/
# Copy QT libraries.
-cp $qtlibs/qtjambi-linux$qtarch-$qtversion.jar $package_dir/contents/usr/share/nevernote/lib/
-cp $qtlibs/qtjambi-linux$qtarch-gcc-$qtversion.jar $package_dir/contents/usr/share/nevernote/lib/
+cp $qtlibs/qtjambi-linux$qtarch-$qtversion.jar $package_dir/contents/usr/share/nixnote/lib/
+cp $qtlibs/qtjambi-linux$qtarch-gcc-$qtversion.jar $package_dir/contents/usr/share/nixnote/lib/
-# Copy NeverNote itself
-cp $qtlibs/../nevernote.jar $package_dir/contents/usr/share/nevernote/
+# Copy NixNote itself
+cp $qtlibs/../nixnote.jar $package_dir/contents/usr/share/nixnote/
# Reset user permissions
chown -R root:root $package_dir/contents/
cp $package_dir/$arch/control ./contents/DEBIAN/
-dpkg -b $package_dir/contents $package_dir/nevernote-${version}_${arch}.deb
-alien -r $package_dir/nevernote-${version}_${arch}.deb
+dpkg -b $package_dir/contents $package_dir/nixnote-${version}_${arch}.deb
+alien -r $package_dir/nixnote-${version}_${arch}.deb
# Cleanup
rm -rf $package_dir/contents
-Package: nevernote
-Version: 0.98
+Package: nixnote
+Version: 1.0
Section: x11
Priority: optional
Architecture: i386
#!/bin/sh
-version="0.97"
+version="1.0"
arch="i386"
qtversion="4.5.2_01"
package_dir=$(cd `dirname $0` && pwd)
-destination="$package_dir/nevernote/usr/share/nevernote"
+destination="$package_dir/nixnote/usr/share/nixnote"
source_dir="../.."
qtlibs="../../../bitrock/lib"
qtarch="32"
# Cleanup any old stuff
-if [ -e "$package_dir/nevernote-${version}_${arch}.tar.gz" ]
+if [ -e "$package_dir/nixnote-${version}_${arch}.tar.gz" ]
then
- rm $package_dir/nevernote-${version}_${arch}.tar.gz
+ rm $package_dir/nixnote-${version}_${arch}.tar.gz
fi
############################
############################
# Create directories
-mkdir $package_dir/nevernote
-mkdir $package_dir/nevernote/usr/
-mkdir $package_dir/nevernote/usr/share
-mkdir $package_dir/nevernote/usr/share/applications
-mkdir $package_dir/nevernote/usr/share/nevernote
-mkdir $package_dir/contents/usr/share/man
-mkdir $package_dir/contents/usr/bin/
+mkdir $package_dir/nixnote
+mkdir $package_dir/nixnote/usr/
+mkdir $package_dir/nixnote/usr/share
+mkdir $package_dir/nixnote/usr/share/applications
+mkdir $package_dir/nixnote/usr/share/nixnote
+mkdir $package_dir/nixnote/usr/share/man
+mkdir $package_dir/nixnote/usr/bin/
# Copy startup script & images
-cp $source_dir/install.sh $package_dir/nevernote/
-cp $source_dir/*.sh $package_dir/nevernote/usr/share/nevernote/
-cp $source_dir/*.txt $package_dir/nevernote/usr/share/nevernote/
-cp $source_dir/*.html $package_dir/nevernote/usr/share/nevernote/
-cp $source_dir/*.png $package_dir/nevernote/usr/share/nevernote/
-cp $source_dir/nevernote.desktop $package_dir/nevernote/usr/share/applications
-cp $source_dir/nevernote_path.sh $package_dir/contents/usr/bin/nevernote.sh
+cp $source_dir/install.sh $package_dir/nixnote/
+cp $source_dir/*.sh $package_dir/nixnote/usr/share/nixnote/
+cp $source_dir/*.txt $package_dir/nixnote/usr/share/nixnote/
+cp $source_dir/*.html $package_dir/nixnote/usr/share/nixnote/
+cp $source_dir/*.png $package_dir/nixnote/usr/share/nixnote/
+cp $source_dir/nixnote.desktop $package_dir/nixnote/usr/share/applications
+cp $source_dir/nixnote_path.sh $package_dir/nixnote/usr/bin/nixnote.sh
# Copy subdirectories
-cp -r $source_dir/images $package_dir/nevernote/usr/share/nevernote/
-cp -r $source_dir/lib $package_dir/nevernote/usr/share/nevernote/
-cp -r $source_dir/qss $package_dir/nevernote/usr/share/nevernote/
-cp -r $source_dir/spell $package_dir/nevernote/usr/share/nevernote/
-cp -r $source_dir/translations $package_dir/nevernote/usr/share/nevernote/
-cp -r $source_dir/xml $package_dir/nevernote/usr/share/nevernote/
+cp -r $source_dir/images $package_dir/nixnote/usr/share/nixnote/
+cp -r $source_dir/lib $package_dir/nixnote/usr/share/nixnote/
+cp -r $source_dir/qss $package_dir/nixnote/usr/share/nixnote/
+cp -r $source_dir/spell $package_dir/nixnote/usr/share/nixnote/
+cp -r $source_dir/translations $package_dir/nixnote/usr/share/nixnote/
+cp -r $source_dir/xml $package_dir/nixnote/usr/share/nixnote/
# Copy QT libraries.
-cp $qtlibs/qtjambi-linux$qtarch-$qtversion.jar $package_dir/nevernote/usr/share/nevernote/lib/
-cp $qtlibs/qtjambi-linux$qtarch-gcc-$qtversion.jar $package_dir/nevernote/usr/share/nevernote/lib/
+cp $qtlibs/qtjambi-linux$qtarch-$qtversion.jar $package_dir/nixnote/usr/share/nixnote/lib/
+cp $qtlibs/qtjambi-linux$qtarch-gcc-$qtversion.jar $package_dir/nixnote/usr/share/nixnote/lib/
-# Copy NeverNote itself
-cp $qtlibs/../nevernote.jar $package_dir/nevernote/usr/share/nevernote/
+# Copy NixNote itself
+cp $qtlibs/../nixnote.jar $package_dir/nixnote/usr/share/nixnote/
# Reset user permissions
-chown -R root:root $package_dir/nevernote/
+chown -R root:root $package_dir/nixnote/
cd $package_dir
-tar -czf $package_dir/nevernote-${version}_${arch}.tar.gz ./nevernote
+tar -czf $package_dir/nixnote-${version}_${arch}.tar.gz ./nixnote
cd -
# Cleanup
-rm -rf $package_dir/nevernote
+rm -rf $package_dir/nixnote
border-image: none;
image: none;
}
+/*
QTreeView {
- border: 0.1em solid #666;
+ border: 0.05em solid #666;
padding: 0 0 0 0.3em;
height: 1.4em;
background: #B0C4DE;
background: #EFEBE7;
}
+*/
+ QTreeView {
+ border-top: 0.05em solid #666;
+ padding: 0 0 0 0.0em;
+ height: 0.0em;
+ background: #B0C4DE;
+ background: #EFEBE7;
+ }
background: transparent;
}
QHeaderView::section {
- border: 0.1em solid #666;
+ border: 0.05em solid #666;
padding: 0 0 0 0.3em;
height: 1.4em;
min-width: 20px;
-Welcome to NeverNote
+Welcome to NixNote
-This is an open source clone of Evernote designed to run on Linux. It is written in Java so it will also run on Windows & OS-X, but the primary focus has been to try and get a usableenvironment for Linux. While this is designed to work with Evernote, it is in no way connectedwith or supported by Evernote. Any problems you encounter will not be corrected by them and,since this is GPL software, you are using this software at your own risk.
+This is an open source clone of Evernote designed to run on Linux. It is written in Java so it will also run on Windows & OS-X, but the primary focus has been to try and get a usable environment for Linux. While this is designed to work with Evernote, it is in no way connectedwith or supported by Evernote. Any problems you encounter will not be corrected by them and,since this is GPL software, you are using this software at your own risk.
People have used this with both 64 & 32 bit versions of Linux as well as OpenJDK & Sun's Java and(so far) have not encountered any problems with these different environments.You can download NeverNote from http://www.nevernote.org.
-I've tried to add the ability to customize your NeverNote menu shortcuts,
+I've tried to add the ability to customize your NixNote menu shortcuts,
but I don't want to take the time to setup a new dialog box and all the
junk that entails. So, I chose a text file config instead. Eventually I
may change it so the text file can be edited within the running program, but
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.security.AccessControlException;\r
+\r
+\r
+/**\r
+ * The various settings used to control how a spell checker works are read from here.\r
+ * Includes the COST_* constants that decide how to figure the cost of converting one word to\r
+ * another in the EditDistance class.\r
+ * <p/>\r
+ * Also includes SPELL_* constants that control how misspellings are detected, for example, how to handle\r
+ * mixed-case words, etc.\r
+ *\r
+ * @author aim4min\r
+ * @see EditDistance\r
+ */\r
+public abstract class Configuration {\r
+\r
+ /** used by EditDistance: the cost of having to remove a character <br/>(integer greater than 0) */\r
+ public static final String COST_REMOVE_CHAR = "EDIT_DEL1";\r
+\r
+ /** used by EditDistance: the cost of having to insert a character <br/>(integer greater than 0)*/\r
+ public static final String COST_INSERT_CHAR = "EDIT_DEL2";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to swap two adjoining characters\r
+ * for the swap value to ever be used, it should be smaller than the COST_REMOVE_CHAR or COST_INSERT_CHAR values\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_SWAP_CHARS = "EDIT_SWAP";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to change case, for example, from i to I.\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_CHANGE_CASE = "EDIT_CASE";\r
+\r
+ /**\r
+ * used by EditDistance: the cost of having to substitute one character for another\r
+ * for the sub value to ever be used, it should be smaller than the COST_REMOVE_CHAR or COST_INSERT_CHAR values\r
+ * <br/>(integer greater than 0)\r
+ */\r
+ public static final String COST_SUBST_CHARS = "EDIT_SUB";\r
+\r
+// public static final String EDIT_SIMILAR = "EDIT_SIMILAR"; //DMV: these does not seem to be used at all\r
+// public static final String EDIT_MIN = "EDIT_MIN";\r
+// public static final String EDIT_MAX = "EDIT_MAX";\r
+\r
+ /** the maximum cost of suggested spelling. Any suggestions that cost more are thrown away\r
+ * <br/> integer greater than 1)\r
+ */\r
+ public static final String SPELL_THRESHOLD = "SPELL_THRESHOLD";\r
+\r
+ /** words that are all upper case are not spell checked, example: "CIA" <br/>(boolean) */\r
+ public static final String SPELL_IGNOREUPPERCASE = "SPELL_IGNOREUPPERCASE";\r
+ /** words that have mixed case are not spell checked, example: "SpellChecker"<br/>(boolean) */\r
+ public static final String SPELL_IGNOREMIXEDCASE = "SPELL_IGNOREMIXEDCASE";\r
+ /** words that look like an Internet address are not spell checked, example: "http://www.google.com" <br/>(boolean)*/\r
+ public static final String SPELL_IGNOREINTERNETADDRESSES = "SPELL_IGNOREINTERNETADDRESS";\r
+ /** words that have digits in them are not spell checked, example: "mach5" <br/>(boolean) */\r
+ public static final String SPELL_IGNOREDIGITWORDS = "SPELL_IGNOREDIGITWORDS";\r
+ /** I don't know what this does. It doesn't seem to be used <br/>(boolean) */\r
+ public static final String SPELL_IGNOREMULTIPLEWORDS = "SPELL_IGNOREMULTIPLEWORDS";\r
+ /** the first word of a sentence is expected to start with an upper case letter <br/>(boolean) */\r
+ public static final String SPELL_IGNORESENTENCECAPITALIZATION = "SPELL_IGNORESENTENCECAPTILIZATION";\r
+\r
+ /**\r
+ * Gets one of the integer constants\r
+ * @param key one of the integer constants defined in this class\r
+ * @return int value of the setting\r
+ */\r
+ public abstract int getInteger(String key);\r
+\r
+ /**\r
+ * Gets one of the boolean constants\r
+ * @param key one of the boolean constants defined in this class\r
+ * @return boolean value of the setting\r
+ */\r
+ public abstract boolean getBoolean(String key);\r
+\r
+ /**\r
+ * Sets one of the integer constants\r
+ * @param key one of the integer constants defined in this class\r
+ * @param value new integer value of the constant\r
+ */\r
+ public abstract void setInteger(String key, int value);\r
+\r
+ /**\r
+ * Sets one of the boolean constants\r
+ * @param key one of the boolean constants defined in this class\r
+ * @param value new boolean value of this setting\r
+ */\r
+ public abstract void setBoolean(String key, boolean value);\r
+\r
+ /**\r
+ * gets a new default Configuration\r
+ * @return Configuration\r
+ */\r
+ public static final Configuration getConfiguration() {\r
+ try {\r
+ String config = System.getProperty("jazzy.config"); // added by bd\r
+ if (config != null && config.length() > 0)\r
+ return getConfiguration(config);\r
+ } catch (AccessControlException e) {\r
+ e.printStackTrace();\r
+ } \r
+ return getConfiguration(null);\r
+ }\r
+\r
+ /**\r
+ * Returns a new instance of a Configuration class\r
+ * @param className - the class to return, must be based on Configuration\r
+ * @return Configuration\r
+ */\r
+ public static final Configuration getConfiguration(String className) {\r
+\r
+ Configuration result;\r
+\r
+ if (className != null && className.length() > 0) {\r
+ try {\r
+ result = (Configuration) Class.forName(className).newInstance();\r
+ } catch (InstantiationException e) {\r
+ result = new PropertyConfiguration();\r
+ } catch (IllegalAccessException e) {\r
+ result = new PropertyConfiguration();\r
+ } catch (ClassNotFoundException e) {\r
+ result = new PropertyConfiguration();\r
+ }\r
+ } else {\r
+ result = new PropertyConfiguration();\r
+ }\r
+ return result;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+\r
+/**\r
+ * A phonetic encoding algorithm that takes an English word and computes a phonetic version of it. This\r
+ * allows for phonetic matches in a spell checker. This class is a port of the C++ DoubleMetaphone() class,\r
+ * which was intended to return two possible phonetic translations for certain words, although the Java version\r
+ * only seems to be concerned with one, making the "double" part erroneous.\r
+ * <br>\r
+ * source code for the original C++ can be found\r
+ * here: <a href="http://aspell.sourceforge.net/metaphone/"/>http://aspell.sourceforge.net/metaphone/</a>\r
+ * DoubleMetaphone does some processing, such as uppercasing, on the input string first to normalize it. Then, to\r
+ * create the key, the function traverses the input string in a while loop, sending successive characters into a giant\r
+ * switch statement. Before determining the appropriate pronunciation, the algorithm considers the context\r
+ * surrounding each character within the input string.\r
+ * <p>\r
+ * Things that were changed:\r
+ * <br/>The alternate flag could be set to true but was never checked so why bother with it. REMOVED\r
+ * <br/>Why was this class serializable?\r
+ * <br/>The primary, in, length and last variables could be initialized and local to the\r
+ * process method and references passed around the appropriate methods. As such there are\r
+ * no class variables and this class becomes firstly threadsafe and secondly could be static final.\r
+ * <br/>The function call SlavoGermaic was called repeatedly in the process function, it is now only called once.\r
+ *\r
+ */\r
+public class DoubleMeta implements Transformator {\r
+\r
+ /**\r
+ * The replace list is used in the getSuggestions method.\r
+ * All of the letters in the misspelled word are replaced with the characters from\r
+ * this list to try and generate more suggestions, which implies l*n tries,\r
+ * if l is the size of the string, and n is the size of this list.\r
+ *\r
+ * In addition to that, each of these letters is added to the misspelled word.\r
+ */\r
+ private static char[] replaceList = {'A', 'B', 'X', 'S', 'K', 'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0'};\r
+\r
+\r
+ private static final String[] myList = {"GN", "KN", "PN", "WR", "PS", ""};\r
+ private static final String[] list1 = {"ACH", ""};\r
+ private static final String[] list2 = {"BACHER", "MACHER", ""};\r
+ private static final String[] list3 = {"CAESAR", ""};\r
+ private static final String[] list4 = {"CHIA", ""};\r
+ private static final String[] list5 = {"CH", ""};\r
+ private static final String[] list6 = {"CHAE", ""};\r
+ private static final String[] list7 = {"HARAC", "HARIS", ""};\r
+ private static final String[] list8 = {"HOR", "HYM", "HIA", "HEM", ""};\r
+ private static final String[] list9 = {"CHORE", ""};\r
+ private static final String[] list10 = {"VAN ", "VON ", ""};\r
+ private static final String[] list11 = {"SCH", ""};\r
+ private static final String[] list12 = {"ORCHES", "ARCHIT", "ORCHID", ""};\r
+ private static final String[] list13 = {"T", "S", ""};\r
+ private static final String[] list14 = {"A", "O", "U", "E", ""};\r
+ private static final String[] list15 = {"L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""};\r
+ private static final String[] list16 = {"MC", ""};\r
+ private static final String[] list17 = {"CZ", ""};\r
+ private static final String[] list18 = {"WICZ", ""};\r
+ private static final String[] list19 = {"CIA", ""};\r
+ private static final String[] list20 = {"CC", ""};\r
+ private static final String[] list21 = {"I", "E", "H", ""};\r
+ private static final String[] list22 = {"HU", ""};\r
+ private static final String[] list23 = {"UCCEE", "UCCES", ""};\r
+ private static final String[] list24 = {"CK", "CG", "CQ", ""};\r
+ private static final String[] list25 = {"CI", "CE", "CY", ""};\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list26 = {\r
+// "CIO", "CIE", "CIA", ""\r
+// };\r
+ private static final String[] list27 = {" C", " Q", " G", ""};\r
+ private static final String[] list28 = {"C", "K", "Q", ""};\r
+ private static final String[] list29 = {"CE", "CI", ""};\r
+ private static final String[] list30 = {"DG", ""};\r
+ private static final String[] list31 = {"I", "E", "Y", ""};\r
+ private static final String[] list32 = {"DT", "DD", ""};\r
+ private static final String[] list33 = {"B", "H", "D", ""};\r
+ private static final String[] list34 = {"B", "H", "D", ""};\r
+ private static final String[] list35 = {"B", "H", ""};\r
+ private static final String[] list36 = {"C", "G", "L", "R", "T", ""};\r
+ private static final String[] list37 = {"EY", ""};\r
+ private static final String[] list38 = {"LI", ""};\r
+ private static final String[] list39 = {"ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", ""};\r
+ private static final String[] list40 = {"ER", ""};\r
+ private static final String[] list41 = {"DANGER", "RANGER", "MANGER", ""};\r
+ private static final String[] list42 = {"E", "I", ""};\r
+ private static final String[] list43 = {"RGY", "OGY", ""};\r
+ private static final String[] list44 = {"E", "I", "Y", ""};\r
+ private static final String[] list45 = {"AGGI", "OGGI", ""};\r
+ private static final String[] list46 = {"VAN ", "VON ", ""};\r
+ private static final String[] list47 = {"SCH", ""};\r
+ private static final String[] list48 = {"ET", ""};\r
+\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list49 = {\r
+// "IER ", ""\r
+// };\r
+ private static final String[] list50 = {"JOSE", ""};\r
+ private static final String[] list51 = {"SAN ", ""};\r
+ private static final String[] list52 = {"SAN ", ""};\r
+ private static final String[] list53 = {"JOSE", ""};\r
+ private static final String[] list54 = {"L", "T", "K", "S", "N", "M", "B", "Z", ""};\r
+ private static final String[] list55 = {"S", "K", "L", ""};\r
+ private static final String[] list56 = {"ILLO", "ILLA", "ALLE", ""};\r
+ private static final String[] list57 = {"AS", "OS", ""};\r
+ private static final String[] list58 = {"A", "O", ""};\r
+ private static final String[] list59 = {"ALLE", ""};\r
+ private static final String[] list60 = {"UMB", ""};\r
+ private static final String[] list61 = {"ER", ""};\r
+ private static final String[] list62 = {"P", "B", ""};\r
+ private static final String[] list63 = {"IE", ""};\r
+ private static final String[] list64 = {"ME", "MA", ""};\r
+ private static final String[] list65 = {"ISL", "YSL", ""};\r
+ private static final String[] list66 = {"SUGAR", ""};\r
+ private static final String[] list67 = {"SH", ""};\r
+ private static final String[] list68 = {"HEIM", "HOEK", "HOLM", "HOLZ", ""};\r
+ private static final String[] list69 = {"SIO", "SIA", ""};\r
+ private static final String[] list70 = {"SIAN", ""};\r
+ private static final String[] list71 = {"M", "N", "L", "W", ""};\r
+ private static final String[] list72 = {"Z", ""};\r
+ private static final String[] list73 = {"Z", ""};\r
+ private static final String[] list74 = {"SC", ""};\r
+ private static final String[] list75 = {"OO", "ER", "EN", "UY", "ED", "EM", ""};\r
+ private static final String[] list76 = {"ER", "EN", ""};\r
+ private static final String[] list77 = {"I", "E", "Y", ""};\r
+ private static final String[] list78 = {"AI", "OI", ""};\r
+ private static final String[] list79 = {"S", "Z", ""};\r
+ private static final String[] list80 = {"TION", ""};\r
+ private static final String[] list81 = {"TIA", "TCH", ""};\r
+ private static final String[] list82 = {"TH", ""};\r
+ private static final String[] list83 = {"TTH", ""};\r
+ private static final String[] list84 = {"OM", "AM", ""};\r
+ private static final String[] list85 = {"VAN ", "VON ", ""};\r
+ private static final String[] list86 = {"SCH", ""};\r
+ private static final String[] list87 = {"T", "D", ""};\r
+ private static final String[] list88 = {"WR", ""};\r
+ private static final String[] list89 = {"WH", ""};\r
+ private static final String[] list90 = {"EWSKI", "EWSKY", "OWSKI", "OWSKY", ""};\r
+ private static final String[] list91 = {"SCH", ""};\r
+ private static final String[] list92 = {"WICZ", "WITZ", ""};\r
+ private static final String[] list93 = {"IAU", "EAU", ""};\r
+ private static final String[] list94 = {"AU", "OU", ""};\r
+ private static final String[] list95 = {"C", "X", ""};\r
+\r
+// DMV: used by the orininal code which returned two phonetic code, but not the current code\r
+// private static final String[] list96 = {\r
+// "ZO", "ZI", "ZA", ""\r
+// };\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @return\r
+ */\r
+ private final static boolean SlavoGermanic(String in) {\r
+ if ((in.indexOf("W") > -1) || (in.indexOf("K") > -1) || (in.indexOf("CZ") > -1) || (in.indexOf("WITZ") > -1))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param main\r
+ */\r
+ private final static void MetaphAdd(StringBuffer primary, String main) {\r
+ if (main != null) {\r
+ primary.append(main);\r
+ }\r
+ }\r
+\r
+ private final static void MetaphAdd(StringBuffer primary, char main) {\r
+ primary.append(main);\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param at\r
+ * @return\r
+ */\r
+ private final static boolean isVowel(String in, int at, int length) {\r
+ if ((at < 0) || (at >= length))\r
+ return false;\r
+ char it = in.charAt(at);\r
+ if ((it == 'A') || (it == 'E') || (it == 'I') || (it == 'O') || (it == 'U') || (it == 'Y'))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * put your documentation comment here\r
+ * @param string\r
+ * @param start\r
+ * @param length\r
+ * @param list\r
+ * @return\r
+ */\r
+ private final static boolean stringAt(String string, int start, int length, String[] list) {\r
+ if ((start < 0) || (start >= string.length()) || list.length == 0)\r
+ return false;\r
+ String substr = string.substring(start, start + length);\r
+ for (int i = 0; i < list.length; i++) {\r
+ if (list[i].equals(substr))\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Take the given word, and return the best phonetic hash for it.\r
+ * Vowels are minimized as much as possible, and consenants\r
+ * that have similiar sounds are converted to the same consenant\r
+ * for example, 'v' and 'f' are both converted to 'f'\r
+ * @param word the texte to transform\r
+ * @return the result of the phonetic transformation\r
+ */\r
+ public final String transform(String word) {\r
+ StringBuffer primary = new StringBuffer(word.length() + 5);\r
+ String in = word.toUpperCase() + " ";\r
+ int current = 0;\r
+ int length = in.length();\r
+ if (length < 1)\r
+ return "";\r
+ int last = length - 1;\r
+ boolean isSlavoGermaic = SlavoGermanic(in);\r
+ if (stringAt(in, 0, 2, myList))\r
+ current += 1;\r
+ if (in.charAt(0) == 'X') {\r
+ MetaphAdd(primary, 'S');\r
+ current += 1;\r
+ }\r
+ while (current < length) {\r
+ switch (in.charAt(current)) {\r
+ case 'A':\r
+ case 'E':\r
+ case 'I':\r
+ case 'O':\r
+ case 'U':\r
+ case 'Y':\r
+ if (current == 0)\r
+ MetaphAdd(primary, 'A');\r
+ current += 1;\r
+ break;\r
+ case 'B':\r
+ MetaphAdd(primary, 'P');\r
+ if (in.charAt(current + 1) == 'B')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case '\u00C7':\r
+ MetaphAdd(primary, 'S');\r
+ current += 1;\r
+ break;\r
+ case 'C':\r
+ if ((current > 1) && !isVowel(in, current - 2, length) && stringAt(in, (current - 1), 3, list1) && (in.charAt(current + 2) != 'I') && (in.charAt(current + 2) != 'E') || stringAt(in, (current - 2), 6, list2)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, current, 6, list3)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 4, list4)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list5)) {\r
+ if ((current > 0) && stringAt(in, current, 4, list6)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, (current + 1), 5, list7) || stringAt(in, current + 1, 3, list8) && !stringAt(in, 0, 5, list9)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, 0, 4, list10) || stringAt(in, 0, 3, list11) || stringAt(in, current - 2, 6, list12) || stringAt(in, current + 2, 1, list13) || (stringAt(in, current - 1, 1, list14) || (current == 0)) && stringAt(in, current + 2, 1, list15)) {\r
+ MetaphAdd(primary, 'K');\r
+ } else {\r
+ if (current > 0) {\r
+ if (stringAt(in, 0, 2, list16))\r
+ MetaphAdd(primary, 'K');\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ } else {\r
+ MetaphAdd(primary, 'X');\r
+ }\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list17) && !stringAt(in, current, 4, list18)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list19)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list20) && !((current == 1) && in.charAt(0) == 'M')) {\r
+ if (stringAt(in, current + 2, 1, list21) && !stringAt(in, current + 2, 2, list22)) {\r
+ if (((current == 1) && (in.charAt(current - 1) == 'A')) || stringAt(in, (current - 1), 5, list23))\r
+ MetaphAdd(primary, "KS");\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if (stringAt(in, current, 2, list24)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ } else if (stringAt(in, current, 2, list25)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 2;\r
+ break;\r
+ }\r
+\r
+ MetaphAdd(primary, 'K');\r
+ if (stringAt(in, current + 1, 2, list27))\r
+ current += 3;\r
+ else if (stringAt(in, current + 1, 1, list28) && !stringAt(in, current + 1, 2, list29))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'D':\r
+ if (stringAt(in, current, 2, list30)) {\r
+ if (stringAt(in, current + 2, 1, list31)) {\r
+ MetaphAdd(primary, 'J');\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, "TK");\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ MetaphAdd(primary, 'T');\r
+ if (stringAt(in, current, 2, list32)) {\r
+ current += 2;\r
+ } else {\r
+ current += 1;\r
+ }\r
+ break;\r
+ case 'F':\r
+ if (in.charAt(current + 1) == 'F')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'F');\r
+ break;\r
+ case 'G':\r
+ if (in.charAt(current + 1) == 'H') {\r
+ if ((current > 0) && !isVowel(in, current - 1, length)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (current < 3) {\r
+ if (current == 0) {\r
+ if (in.charAt(current + 2) == 'I')\r
+ MetaphAdd(primary, 'J');\r
+ else\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if ((current > 1) && stringAt(in, current - 2, 1, list33) || ((current > 2) && stringAt(in, current - 3, 1, list34)) || ((current > 3) && stringAt(in, current - 4, 1, list35))) {\r
+ current += 2;\r
+ break;\r
+ } else {\r
+ if ((current > 2) && (in.charAt(current - 1) == 'U') && stringAt(in, current - 3, 1, list36)) {\r
+ MetaphAdd(primary, 'F');\r
+ } else {\r
+ if ((current > 0) && (in.charAt(current - 1) != 'I'))\r
+ MetaphAdd(primary, 'K');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ }\r
+ if (in.charAt(current + 1) == 'N') {\r
+ if ((current == 1) && isVowel(in, 0, length) && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "KN");\r
+ } else {\r
+ if (!stringAt(in, current + 2, 2, list37) && (in.charAt(current + 1) != 'Y') && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "N");\r
+ } else {\r
+ MetaphAdd(primary, "KN");\r
+ }\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 2, list38) && !isSlavoGermaic) {\r
+ MetaphAdd(primary, "KL");\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && ((in.charAt(current + 1) == 'Y') || stringAt(in, current + 1, 2, list39))) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((stringAt(in, current + 1, 2, list40) || (in.charAt(current + 1) == 'Y')) && !stringAt(in, 0, 6, list41) && !stringAt(in, current - 1, 1, list42) && !stringAt(in, current - 1, 3, list43)) {\r
+ MetaphAdd(primary, 'K');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list44) || stringAt(in, current - 1, 4, list45)) {\r
+ if (stringAt(in, 0, 4, list46) || stringAt(in, 0, 3, list47) || stringAt(in, current + 1, 2, list48)) {\r
+ MetaphAdd(primary, 'K');\r
+ } else {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (in.charAt(current + 1) == 'G')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'H':\r
+ if (((current == 0) || isVowel(in, current - 1, length)) && isVowel(in, current + 1, length)) {\r
+ MetaphAdd(primary, 'H');\r
+ current += 2;\r
+ } else {\r
+ current += 1;\r
+ }\r
+ break;\r
+ case 'J':\r
+ if (stringAt(in, current, 4, list50) || stringAt(in, 0, 4, list51)) {\r
+ if ((current == 0) && (in.charAt(current + 4) == ' ') || stringAt(in, 0, 4, list52)) {\r
+ MetaphAdd(primary, 'H');\r
+ } else {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ current += 1;\r
+ break;\r
+ }\r
+ if ((current == 0) && !stringAt(in, current, 4, list53)) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (isVowel(in, current - 1, length) && !isSlavoGermaic && ((in.charAt(current + 1) == 'A') || in.charAt(current + 1) == 'O')) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (current == last) {\r
+ MetaphAdd(primary, 'J');\r
+ } else {\r
+ if (!stringAt(in, current + 1, 1, list54) && !stringAt(in, current - 1, 1, list55)) {\r
+ MetaphAdd(primary, 'J');\r
+ }\r
+ }\r
+ }\r
+ }\r
+ if (in.charAt(current + 1) == 'J')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'K':\r
+ if (in.charAt(current + 1) == 'K')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'L':\r
+ if (in.charAt(current + 1) == 'L') {\r
+ if (((current == (length - 3)) && stringAt(in, current - 1, 4, list56)) || ((stringAt(in, last - 1, 2, list57) || stringAt(in, last, 1, list58)) && stringAt(in, current - 1, 4, list59))) {\r
+ MetaphAdd(primary, 'L');\r
+ current += 2;\r
+ break;\r
+ }\r
+ current += 2;\r
+ } else\r
+ current += 1;\r
+ MetaphAdd(primary, 'L');\r
+ break;\r
+ case 'M':\r
+ if ((stringAt(in, current - 1, 3, list60) && (((current + 1) == last) || stringAt(in, current + 2, 2, list61))) || (in.charAt(current + 1) == 'M'))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'M');\r
+ break;\r
+ case 'N':\r
+ if (in.charAt(current + 1) == 'N')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'N');\r
+ break;\r
+ case '\u00D1':\r
+ current += 1;\r
+ MetaphAdd(primary, 'N');\r
+ break;\r
+ case 'P':\r
+ if (in.charAt(current + 1) == 'N') {\r
+ MetaphAdd(primary, 'F');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list62))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'P');\r
+ break;\r
+ case 'Q':\r
+ if (in.charAt(current + 1) == 'Q')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'K');\r
+ break;\r
+ case 'R':\r
+ if ((current == last) && !isSlavoGermaic && stringAt(in, current - 2, 2, list63) && !stringAt(in, current - 4, 2, list64)) {\r
+// MetaphAdd(primary, "");\r
+ } else\r
+ MetaphAdd(primary, 'R');\r
+ if (in.charAt(current + 1) == 'R')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'S':\r
+ if (stringAt(in, current - 1, 3, list65)) {\r
+ current += 1;\r
+ break;\r
+ }\r
+ if ((current == 0) && stringAt(in, current, 5, list66)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list67)) {\r
+ if (stringAt(in, current + 1, 4, list68))\r
+ MetaphAdd(primary, 'S');\r
+ else\r
+ MetaphAdd(primary, 'X');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 3, list69) || stringAt(in, current, 4, list70)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (((current == 0) && stringAt(in, current + 1, 1, list71)) || stringAt(in, current + 1, 1, list72)) {\r
+ MetaphAdd(primary, 'S');\r
+ if (stringAt(in, current + 1, 1, list73))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list74)) {\r
+ if (in.charAt(current + 2) == 'H')\r
+ if (stringAt(in, current + 3, 2, list75)) {\r
+ if (stringAt(in, current + 3, 2, list76)) {\r
+ MetaphAdd(primary, "X");\r
+ } else {\r
+ MetaphAdd(primary, "SK");\r
+ }\r
+ current += 3;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 2, 1, list77)) {\r
+ MetaphAdd(primary, 'S');\r
+ current += 3;\r
+ break;\r
+ }\r
+ MetaphAdd(primary, "SK");\r
+ current += 3;\r
+ break;\r
+ }\r
+ if ((current == last) && stringAt(in, current - 2, 2, list78)) {\r
+ //MetaphAdd(primary, "");\r
+ } else\r
+ MetaphAdd(primary, 'S');\r
+ if (stringAt(in, current + 1, 1, list79))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'T':\r
+ if (stringAt(in, current, 4, list80)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 3, list81)) {\r
+ MetaphAdd(primary, 'X');\r
+ current += 3;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 2, list82) || stringAt(in, current, 3, list83)) {\r
+ if (stringAt(in, (current + 2), 2, list84) || stringAt(in, 0, 4, list85) || stringAt(in, 0, 3, list86)) {\r
+ MetaphAdd(primary, 'T');\r
+ } else {\r
+ MetaphAdd(primary, '0');\r
+ }\r
+ current += 2;\r
+ break;\r
+ }\r
+ if (stringAt(in, current + 1, 1, list87)) {\r
+ current += 2;\r
+ } else\r
+ current += 1;\r
+ MetaphAdd(primary, 'T');\r
+ break;\r
+ case 'V':\r
+ if (in.charAt(current + 1) == 'V')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ MetaphAdd(primary, 'F');\r
+ break;\r
+ case 'W':\r
+ if (stringAt(in, current, 2, list88)) {\r
+ MetaphAdd(primary, 'R');\r
+ current += 2;\r
+ break;\r
+ }\r
+ if ((current == 0) && (isVowel(in, current + 1, length) || stringAt(in, current, 2, list89))) {\r
+ MetaphAdd(primary, 'A');\r
+ }\r
+ if (((current == last) && isVowel(in, current - 1, length)) || stringAt(in, current - 1, 5, list90) || stringAt(in, 0, 3, list91)) {\r
+ MetaphAdd(primary, 'F');\r
+ current += 1;\r
+ break;\r
+ }\r
+ if (stringAt(in, current, 4, list92)) {\r
+ MetaphAdd(primary, "TS");\r
+ current += 4;\r
+ break;\r
+ }\r
+ current += 1;\r
+ break;\r
+ case 'X':\r
+ if (!((current == last) && (stringAt(in, current - 3, 3, list93) || stringAt(in, current - 2, 2, list94))))\r
+ MetaphAdd(primary, "KS");\r
+ if (stringAt(in, current + 1, 1, list95))\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ case 'Z':\r
+ if (in.charAt(current + 1) == 'H') {\r
+ MetaphAdd(primary, 'J');\r
+ current += 2;\r
+ break;\r
+ } else {\r
+ MetaphAdd(primary, 'S');\r
+ }\r
+ if (in.charAt(current + 1) == 'Z')\r
+ current += 2;\r
+ else\r
+ current += 1;\r
+ break;\r
+ default:\r
+ current += 1;\r
+ }\r
+ }\r
+ return primary.toString();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Transformator#getReplaceList()\r
+ */\r
+ public char[] getReplaceList() {\r
+ return replaceList;\r
+ }\r
+}\r
+\r
+\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.InputStreamReader;\r
+\r
+/**\r
+ * This class is based on Levenshtein Distance algorithms, and it calculates how similar two words are.\r
+ * If the words are identical, then the distance is 0. The more that the words have in common, the lower the distance value.\r
+ * The distance value is based on how many operations it takes to get from one word to the other. Possible operations are\r
+ * swapping characters, adding a character, deleting a character, and substituting a character.\r
+ * The resulting distance is the sum of these operations weighted by their cost, which can be set in the Configuration object.\r
+ * When there are multiple ways to convert one word into the other, the lowest cost distance is returned.\r
+ * <br/>\r
+ * Another way to think about this: what are the cheapest operations that would have to be done on the "original" word to end up\r
+ * with the "similar" word? Each operation has a cost, and these are added up to get the distance.\r
+ * <br/>\r
+ *\r
+ * @see com.swabunga.spell.engine.Configuration#COST_REMOVE_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_INSERT_CHAR\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SUBST_CHARS\r
+ * @see com.swabunga.spell.engine.Configuration#COST_SWAP_CHARS\r
+ *\r
+ */\r
+\r
+public class EditDistance {\r
+\r
+ /**\r
+ * Fetches the spell engine configuration properties.\r
+ */\r
+ public static Configuration config = Configuration.getConfiguration();\r
+\r
+ /**\r
+ * get the weights for each possible operation\r
+ */\r
+ static final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);\r
+ static final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);\r
+ static final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);\r
+ static final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);\r
+ static final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); \r
+\r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar) {\r
+ return getDistance(word,similar,null);\r
+ } \r
+ \r
+ /**\r
+ * Evaluates the distance between two words.\r
+ * \r
+ * @param word One word to evaluates\r
+ * @param similar The other word to evaluates\r
+ * @return a number representing how easy or complex it is to transform on\r
+ * word into a similar one.\r
+ */\r
+ public static final int getDistance(String word, String similar, int[][] matrix) {\r
+ /* JMH Again, there is no need to have a global class matrix variable\r
+ * in this class. I have removed it and made the getDistance static final\r
+ * DMV: I refactored this method to make it more efficient, more readable, and simpler.\r
+ * I also fixed a bug with how the distance was being calculated. You could get wrong\r
+ * distances if you compared ("abc" to "ab") depending on what you had setup your\r
+ * COST_REMOVE_CHAR and EDIT_INSERTION_COST values to - that is now fixed.\r
+ * WRS: I added a distance for case comparison, so a misspelling of "i" would be closer to "I" than\r
+ * to "a".\r
+ */\r
+\r
+ //Allocate memory outside of the loops. \r
+ int i;\r
+ int j;\r
+ int costOfSubst;\r
+ int costOfSwap;\r
+ int costOfDelete;\r
+ int costOfInsertion;\r
+ int costOfCaseChange;\r
+ \r
+ boolean isSwap;\r
+ char sourceChar = 0;\r
+ char otherChar = 0;\r
+ \r
+ int a_size = word.length() + 1;\r
+ int b_size = similar.length() + 1;\r
+ \r
+ \r
+ //Only allocate new memory if we need a bigger matrix. \r
+ if (matrix == null || matrix.length < a_size || matrix[0].length < b_size)\r
+ matrix = new int[a_size][b_size];\r
+ \r
+ matrix[0][0] = 0;\r
+\r
+ for (i = 1; i != a_size; ++i)\r
+ matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column\r
+\r
+ for (j = 1; j != b_size; ++j)\r
+ matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row\r
+\r
+ for (i = 1; i != a_size; ++i) {\r
+ sourceChar = word.charAt(i-1);\r
+ for (j = 1; j != b_size; ++j) {\r
+\r
+ otherChar = similar.charAt(j-1);\r
+ if (sourceChar == otherChar) {\r
+ matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up\r
+ continue;\r
+ }\r
+\r
+ costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];\r
+ //if needed, add up the cost of doing a swap\r
+ costOfSwap = Integer.MAX_VALUE;\r
+\r
+ isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 2) && word.charAt(i - 2) == otherChar;\r
+ if (isSwap)\r
+ costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];\r
+\r
+ costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];\r
+ costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];\r
+\r
+ costOfCaseChange = Integer.MAX_VALUE;\r
+ \r
+ if (equalIgnoreCase(sourceChar, otherChar))\r
+ costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];\r
+ \r
+ matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);\r
+ }\r
+ }\r
+\r
+ return matrix[a_size - 1][b_size - 1];\r
+ }\r
+\r
+ /**\r
+ * checks to see if the two charactors are equal ignoring case. \r
+ * @param ch1\r
+ * @param ch2\r
+ * @return boolean\r
+ */\r
+ private static boolean equalIgnoreCase(char ch1, char ch2) {\r
+ if (ch1 == ch2)\r
+ {\r
+ return true;\r
+ }\r
+ else\r
+ {\r
+ return (Character.toLowerCase(ch1) == Character.toLowerCase(ch2));\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * For debugging, this creates a string that represents the matrix. To read the matrix, look at any square. That is the cost to get from\r
+ * the partial letters along the top to the partial letters along the side.\r
+ * @param src - the source string that the matrix columns are based on\r
+ * @param dest - the dest string that the matrix rows are based on\r
+ * @param matrix - a two dimensional array of costs (distances)\r
+ * @return String\r
+ */\r
+ @SuppressWarnings("unused")\r
+static private String dumpMatrix(String src, String dest, int matrix[][]) {\r
+ StringBuffer s = new StringBuffer("");\r
+\r
+ int cols = matrix.length -1;\r
+ int rows = matrix[0].length -1;\r
+\r
+ for (int i = 0; i < cols + 1; i++) {\r
+ for (int j = 0; j < rows + 1; j++) {\r
+ if (i == 0 && j == 0) {\r
+ s.append("\n ");\r
+ continue;\r
+\r
+ }\r
+ if (i == 0) {\r
+ s.append("| ");\r
+ s.append(dest.charAt(j - 1));\r
+ continue;\r
+ }\r
+ if (j == 0) {\r
+ s.append(src.charAt(i - 1));\r
+ continue;\r
+ }\r
+ String num = Integer.toString(matrix[i - 1][j - 1]);\r
+ int padding = 4 - num.length();\r
+ s.append("|");\r
+ for (int k = 0; k < padding; k++)\r
+ s.append(' ');\r
+ s.append(num);\r
+ }\r
+ s.append('\n');\r
+ }\r
+ return s.toString();\r
+\r
+ }\r
+\r
+\r
+ static private int minimum(int a, int b, int c, int d, int e) {\r
+ int mi = a;\r
+ if (b < mi)\r
+ mi = b;\r
+ if (c < mi)\r
+ mi = c;\r
+ if (d < mi)\r
+ mi = d;\r
+ if (e < mi)\r
+ mi = e;\r
+\r
+ return mi;\r
+ }\r
+\r
+ /**\r
+ * For testing edit distances\r
+ * @param args an array of two strings we want to evaluate their distances.\r
+ * @throws java.lang.Exception when problems occurs during reading args.\r
+ */\r
+ public static void main(String[] args) throws Exception {\r
+ BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));\r
+ int[][] matrix = new int[0][0]; \r
+ while (true) {\r
+\r
+ String input1 = stdin.readLine();\r
+ if (input1 == null || input1.length() == 0)\r
+ break;\r
+\r
+ String input2 = stdin.readLine();\r
+ if (input2 == null || input2.length() == 0)\r
+ break;\r
+\r
+ System.out.println(EditDistance.getDistance(input1, input2,matrix));\r
+ }\r
+ System.out.println("done");\r
+ }\r
+}\r
+\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.util.HashMap;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * The SpellDictionary class holds the instance of the dictionary.\r
+ * <p>\r
+ * This class is thread safe. Derived classes should ensure that this preserved.\r
+ * </p>\r
+ * <p>\r
+ * There are many open source dictionary files. For just a few see:\r
+ * http://wordlist.sourceforge.net/\r
+ * </p>\r
+ * <p>\r
+ * This dictionary class reads words one per line. Make sure that your word list\r
+ * is formatted in this way (most are).\r
+ * </p>\r
+ */\r
+public class GenericSpellDictionary extends SpellDictionaryASpell {\r
+\r
+//tech_monkey: the alphabet / replace list stuff has been moved into the Transformator classes,\r
+//since they are so closely tied to how the phonetic transformations are done.\r
+// /**\r
+// * This replace list is used if no phonetic file is supplied or it doesn't\r
+// * contain the alphabet.\r
+// */\r
+// protected static final char[] englishAlphabet =\r
+\r
+\r
+ /** A field indicating the initial hash map capacity (16KB) for the main\r
+ * dictionary hash map. Interested to see what the performance of a\r
+ * smaller initial capacity is like.\r
+ */\r
+ private final static int INITIAL_CAPACITY = 16 * 1024;\r
+\r
+ /**\r
+ * The hashmap that contains the word dictionary. The map is hashed on the doublemeta\r
+ * code. The map entry contains a LinkedList of words that have the same double meta code.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected HashMap mainDictionary = new HashMap(INITIAL_CAPACITY);\r
+\r
+ /** Holds the dictionary file for appending*/\r
+ private File dictFile = null;\r
+\r
+\r
+ /**\r
+ * Dictionary constructor that uses the DoubleMeta class with the\r
+ * English alphabet.\r
+ * @param wordList The file containing dictionary as a words list.\r
+ * @throws java.io.FileNotFoundException when the words list file could not \r
+ * be located on the system.\r
+ * @throws java.io.IOException when problems occurs while reading the words \r
+ * list file\r
+ */\r
+ public GenericSpellDictionary(File wordList) throws FileNotFoundException, IOException {\r
+ this(wordList, (File) null);\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * If phonetic is null, then DoubleMeta is used with the English alphabet\r
+ * @param wordList The file containing dictionary as a words list.\r
+ * @param phonetic The file containing the phonetic transformation \r
+ * information.\r
+ * @throws java.io.FileNotFoundException when the words list or phonetic \r
+ * file could not be located on the system\r
+ * @throws java.io.IOException when problems occurs while reading the \r
+ * words list or phonetic file\r
+ */\r
+ public GenericSpellDictionary(File wordList, File phonetic) throws FileNotFoundException, IOException {\r
+\r
+ super(phonetic);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <p>This needs to be made thread safe (synchronized)</p>\r
+ * @param word The word to add to the dictionary\r
+ */\r
+ public void addWord(String word) {\r
+ putWord(word);\r
+ if (dictFile == null)\r
+ return;\r
+ try {\r
+ FileWriter w = new FileWriter(dictFile.toString(), true);\r
+ // Open with append.\r
+ w.write(word);\r
+ w.write("\n");\r
+ w.close();\r
+ } catch (IOException ex) {\r
+ System.out.println("Error writing to dictionary file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the dictionary from a word list file.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * This is a very slow function. On my machine it takes quite a while to\r
+ * load the data in. I suspect that we could speed this up quite allot.\r
+ */\r
+ protected void createDictionary(BufferedReader in) throws IOException {\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null) {\r
+ line = new String(line.toCharArray());\r
+ putWord(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word in the dictionary\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWord(String word) {\r
+ String code = getCode(word);\r
+ LinkedList list = (LinkedList) mainDictionary.get(code);\r
+ if (list != null) {\r
+ list.add(word);\r
+ } else {\r
+ list = new LinkedList();\r
+ list.add(word);\r
+ mainDictionary.put(code, list);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ * @param code The phonetic code we want to find words for\r
+ * @return the list of words having the same phonetic code\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+@Override\r
+public List getWords(String code) {\r
+ //Check the main dictionary.\r
+ List mainDictResult = (List) mainDictionary.get(code);\r
+ if (mainDictResult == null)\r
+ return new Vector();\r
+ return mainDictResult;\r
+ }\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ * @param word The word to checked in the dictionary\r
+ * @return indication if the word is in the dictionary\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+@Override\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileReader;\r
+import java.io.IOException;\r
+import java.io.InputStreamReader;\r
+import java.io.Reader;\r
+import java.util.HashMap;\r
+import java.util.Vector;\r
+\r
+import com.swabunga.util.StringUtility;\r
+\r
+/**\r
+ * A Generic implementation of a transformator takes an \r
+ * <a href="http://aspell.net/man-html/Phonetic-Code.html">\r
+ * aspell phonetics file</a> and constructs some sort of transformation \r
+ * table using the inner class TransformationRule.\r
+ * </p>\r
+ * Basically, each transformation rule represent a line in the phonetic file.\r
+ * One line contains two groups of characters separated by white space(s).\r
+ * The first group is the <em>match expression</em>. \r
+ * The <em>match expression</em> describe letters to associate with a syllable.\r
+ * The second group is the <em>replacement expression</em> giving the phonetic \r
+ * equivalent of the <em>match expression</em>.\r
+ *\r
+ * @see SpellDictionaryASpell SpellDictionaryASpell for information on getting\r
+ * phonetic files for aspell.\r
+ *\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r
+ */\r
+public class GenericTransformator implements Transformator {\r
+\r
+\r
+ /**\r
+ * This replace list is used if no phonetic file is supplied or it doesn't\r
+ * contain the alphabet.\r
+ */\r
+ private static final char[] defaultEnglishAlphabet = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'};\r
+\r
+ /**\r
+ * The alphabet start marker.\r
+ * @see GenericTransformator#KEYWORD_ALPHBET KEYWORD_ALPHBET\r
+ */\r
+ public static final char ALPHABET_START = '[';\r
+ /**\r
+ * The alphabet end marker.\r
+ * @see GenericTransformator#KEYWORD_ALPHBET KEYWORD_ALPHBET\r
+ */\r
+ public static final char ALPHABET_END = ']';\r
+ /**\r
+ * Phonetic file keyword indicating that a different alphabet is used \r
+ * for this language. The keyword must be followed an\r
+ * {@link GenericTransformator#ALPHABET_START ALPHABET_START} marker, \r
+ * a list of characters defining the alphabet and a\r
+ * {@link GenericTransformator#ALPHABET_END ALPHABET_END} marker.\r
+ */\r
+ public static final String KEYWORD_ALPHBET = "alphabet";\r
+ /**\r
+ * Phonetic file lines starting with the keywords are skipped. \r
+ * The key words are: version, followup, collapse_result.\r
+ * Comments, starting with '#', are also skipped to the end of line.\r
+ */\r
+ public static final String[] IGNORED_KEYWORDS = {"version", "followup", "collapse_result"};\r
+\r
+ /**\r
+ * Start a group of characters which can be appended to the match expression\r
+ * of the phonetic file.\r
+ */\r
+ public static final char STARTMULTI = '(';\r
+ /**\r
+ * End a group of characters which can be appended to the match expression\r
+ * of the phonetic file.\r
+ */\r
+ public static final char ENDMULTI = ')';\r
+ /**\r
+ * During phonetic transformation of a word each numeric character is\r
+ * replaced by this DIGITCODE.\r
+ */\r
+ public static final String DIGITCODE = "0";\r
+ /**\r
+ * Phonetic file character code indicating that the replace expression\r
+ * is empty.\r
+ */\r
+ public static final String REPLACEVOID = "_";\r
+\r
+ private Object[] ruleArray = null;\r
+ private char[] alphabetString = defaultEnglishAlphabet;\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic file\r
+ */\r
+ public GenericTransformator(File phonetic) throws IOException {\r
+ buildRules(new BufferedReader(new FileReader(phonetic)));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+\r
+ }\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell\r
+ * @param encoding the character set required\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic file\r
+ */\r
+ public GenericTransformator(File phonetic, String encoding) throws IOException {\r
+ buildRules(new BufferedReader(new InputStreamReader(new FileInputStream(phonetic), encoding)));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+ }\r
+\r
+ /**\r
+ * Construct a transformation table from the phonetic file\r
+ * @param phonetic the phonetic file as specified in aspell. The file is\r
+ * supplied as a reader.\r
+ * @throws java.io.IOException indicates a problem while reading\r
+ * the phonetic information\r
+ */\r
+ public GenericTransformator(Reader phonetic) throws IOException {\r
+ buildRules(new BufferedReader(phonetic));\r
+ alphabetString = washAlphabetIntoReplaceList(getReplaceList());\r
+ }\r
+\r
+ /**\r
+ * Goes through an alphabet and makes sure that only one of those letters\r
+ * that are coded equally will be in the replace list.\r
+ * In other words, it removes any letters in the alphabet\r
+ * that are redundant phonetically.\r
+ *\r
+ * This is done to improve speed in the getSuggestion method.\r
+ *\r
+ * @param alphabet The complete alphabet to wash.\r
+ * @return The washed alphabet to be used as replace list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private char[] washAlphabetIntoReplaceList(char[] alphabet) {\r
+\r
+ HashMap letters = new HashMap(alphabet.length);\r
+\r
+ for (char element : alphabet) {\r
+ String tmp = String.valueOf(element);\r
+ String code = transform(tmp);\r
+ if (!letters.containsKey(code)) {\r
+ letters.put(code, new Character(element));\r
+ }\r
+ }\r
+\r
+ Object[] tmpCharacters = letters.values().toArray();\r
+ char[] washedArray = new char[tmpCharacters.length];\r
+\r
+ for (int i = 0; i < tmpCharacters.length; i++) {\r
+ washedArray[i] = ((Character) tmpCharacters[i]).charValue();\r
+ }\r
+\r
+ return washedArray;\r
+ }\r
+\r
+\r
+ /**\r
+ * Takes out all single character replacements and put them in a char array.\r
+ * This array can later be used for adding or changing letters in getSuggestion().\r
+ * @return char[] An array of chars with replacements characters\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public char[] getCodeReplaceList() {\r
+ char[] replacements;\r
+ TransformationRule rule;\r
+ Vector tmp = new Vector();\r
+\r
+ if (ruleArray == null)\r
+ return null;\r
+ for (Object element : ruleArray) {\r
+ rule = (TransformationRule) element;\r
+ if (rule.getReplaceExp().length() == 1)\r
+ tmp.addElement(rule.getReplaceExp());\r
+ }\r
+ replacements = new char[tmp.size()];\r
+ for (int i = 0; i < tmp.size(); i++) {\r
+ replacements[i] = ((String) tmp.elementAt(i)).charAt(0);\r
+ }\r
+ return replacements;\r
+ }\r
+\r
+ /**\r
+ * Builds up an char array with the chars in the alphabet of the language as it was read from the\r
+ * alphabet tag in the phonetic file.\r
+ * @return char[] An array of chars representing the alphabet or null if no alphabet was available.\r
+ */\r
+ public char[] getReplaceList() {\r
+ return alphabetString;\r
+ }\r
+\r
+ /**\r
+ * Builds the phonetic code of the word.\r
+ * @param word the word to transform\r
+ * @return the phonetic transformation of the word\r
+ */\r
+ public String transform(String word) {\r
+\r
+ if (ruleArray == null)\r
+ return null;\r
+\r
+ TransformationRule rule;\r
+ StringBuffer str = new StringBuffer(word.toUpperCase());\r
+ int strLength = str.length();\r
+ int startPos = 0, add = 1;\r
+\r
+ while (startPos < strLength) {\r
+\r
+ add = 1;\r
+ if (Character.isDigit(str.charAt(startPos))) {\r
+ StringUtility.replace(str, startPos, startPos + DIGITCODE.length(), DIGITCODE);\r
+ startPos += add;\r
+ continue;\r
+ }\r
+\r
+ for (Object element : ruleArray) {\r
+ //System.out.println("Testing rule#:"+i);\r
+ rule = (TransformationRule) element;\r
+ if (rule.startsWithExp() && startPos > 0)\r
+ continue;\r
+ if (startPos + rule.lengthOfMatch() > strLength) {\r
+ continue;\r
+ }\r
+ if (rule.isMatching(str, startPos)) {\r
+ String replaceExp = rule.getReplaceExp();\r
+\r
+ add = replaceExp.length();\r
+ StringUtility.replace(str, startPos, startPos + rule.getTakeOut(), replaceExp);\r
+ strLength -= rule.getTakeOut();\r
+ strLength += add;\r
+ //System.out.println("Replacing with rule#:"+i+" add="+add);\r
+ break;\r
+ }\r
+ }\r
+ startPos += add;\r
+ }\r
+ //System.out.println(word);\r
+ //System.out.println(str.toString());\r
+ return str.toString();\r
+ }\r
+\r
+ // Used to build up the transformastion table.\r
+ @SuppressWarnings("unchecked")\r
+private void buildRules(BufferedReader in) throws IOException {\r
+ String read = null;\r
+ Vector ruleList = new Vector();\r
+ while ((read = in.readLine()) != null) {\r
+ buildRule(realTrimmer(read), ruleList);\r
+ }\r
+ ruleArray = new TransformationRule[ruleList.size()];\r
+ ruleList.copyInto(ruleArray);\r
+ }\r
+\r
+ // Here is where the real work of reading the phonetics file is done.\r
+ @SuppressWarnings("unchecked")\r
+private void buildRule(String str, Vector ruleList) {\r
+ if (str.length() < 1)\r
+ return;\r
+ for (String element : IGNORED_KEYWORDS) {\r
+ if (str.startsWith(element))\r
+ return;\r
+ }\r
+\r
+ // A different alphabet is used for this language, will be read into\r
+ // the alphabetString variable.\r
+ if (str.startsWith(KEYWORD_ALPHBET)) {\r
+ int start = str.indexOf(ALPHABET_START);\r
+ int end = str.lastIndexOf(ALPHABET_END);\r
+ if (end != -1 && start != -1) {\r
+ alphabetString = str.substring(++start, end).toCharArray();\r
+ }\r
+ return;\r
+ }\r
+\r
+ // str contains two groups of characters separated by white space(s).\r
+ // The fisrt group is the "match expression". The second group is the \r
+ // "replacement expression" giving the phonetic equivalent of the \r
+ // "match expression".\r
+ TransformationRule rule = null;\r
+ StringBuffer matchExp = new StringBuffer();\r
+ StringBuffer replaceExp = new StringBuffer();\r
+ boolean start = false,\r
+ end = false;\r
+ int takeOutPart = 0,\r
+ matchLength = 0;\r
+ boolean match = true,\r
+ inMulti = false;\r
+ for (int i = 0; i < str.length(); i++) {\r
+ if (Character.isWhitespace(str.charAt(i))) {\r
+ match = false;\r
+ } else {\r
+ if (match) {\r
+ if (!isReservedChar(str.charAt(i))) {\r
+ matchExp.append(str.charAt(i));\r
+ if (!inMulti) {\r
+ takeOutPart++;\r
+ matchLength++;\r
+ }\r
+ if (str.charAt(i) == STARTMULTI || str.charAt(i) == ENDMULTI)\r
+ inMulti = !inMulti;\r
+ }\r
+ if (str.charAt(i) == '-')\r
+ takeOutPart--;\r
+ if (str.charAt(i) == '^')\r
+ start = true;\r
+ if (str.charAt(i) == '$')\r
+ end = true;\r
+ } else {\r
+ replaceExp.append(str.charAt(i));\r
+ }\r
+ }\r
+ }\r
+ if (replaceExp.toString().equals(REPLACEVOID)) {\r
+ replaceExp = new StringBuffer("");\r
+ //System.out.println("Changing _ to \"\" for "+matchExp.toString());\r
+ }\r
+ rule = new TransformationRule(matchExp.toString(), replaceExp.toString(), takeOutPart, matchLength, start, end);\r
+ //System.out.println(rule.toString());\r
+ ruleList.addElement(rule);\r
+ }\r
+\r
+ // Chars with special meaning to aspell. Not everyone is implemented here.\r
+ private boolean isReservedChar(char ch) {\r
+ if (ch == '<' || ch == '>' || ch == '^' || ch == '$' || ch == '-' || Character.isDigit(ch))\r
+ return true;\r
+ return false;\r
+ }\r
+\r
+ // Trims off everything we don't care about.\r
+ private String realTrimmer(String row) {\r
+ int pos = row.indexOf('#');\r
+ if (pos != -1) {\r
+ row = row.substring(0, pos);\r
+ }\r
+ return row.trim();\r
+ }\r
+\r
+ // Inner Classes\r
+ /*\r
+ * Holds the match string and the replace string and all the rule attributes.\r
+ * Is responsible for indicating matches.\r
+ */\r
+ private class TransformationRule {\r
+\r
+ private final String replace;\r
+ private final char[] match;\r
+ // takeOut=number of chars to replace;\r
+ // matchLength=length of matching string counting multies as one.\r
+ private final int takeOut, matchLength;\r
+ private final boolean start, end;\r
+\r
+ // Construktor\r
+ public TransformationRule(String match, String replace, int takeout, int matchLength, boolean start, boolean end) {\r
+ this.match = match.toCharArray();\r
+ this.replace = replace;\r
+ this.takeOut = takeout;\r
+ this.matchLength = matchLength;\r
+ this.start = start;\r
+ this.end = end;\r
+ }\r
+\r
+ /*\r
+ * Returns true if word from pos and forward matches the match string.\r
+ * Precondition: wordPos+matchLength<word.length()\r
+ */\r
+ public boolean isMatching(StringBuffer word, int wordPos) {\r
+ boolean matching = true, inMulti = false, multiMatch = false;\r
+ char matchCh;\r
+\r
+ for (char element : match) {\r
+ matchCh = element;\r
+ if (matchCh == STARTMULTI || matchCh == ENDMULTI) {\r
+ inMulti = !inMulti;\r
+ if (!inMulti)\r
+ matching = matching & multiMatch;\r
+ else\r
+ multiMatch = false;\r
+ } else {\r
+ if (matchCh != word.charAt(wordPos)) {\r
+ if (inMulti)\r
+ multiMatch = multiMatch | false;\r
+ else\r
+ matching = false;\r
+ } else {\r
+ if (inMulti)\r
+ multiMatch = multiMatch | true;\r
+ else\r
+ matching = true;\r
+ }\r
+ if (!inMulti)\r
+ wordPos++;\r
+ if (!matching)\r
+ break;\r
+ }\r
+ }\r
+ if (end && wordPos != word.length())\r
+ matching = false;\r
+ return matching;\r
+ }\r
+\r
+ public String getReplaceExp() {\r
+ return replace;\r
+ }\r
+\r
+ public int getTakeOut() {\r
+ return takeOut;\r
+ }\r
+\r
+ public boolean startsWithExp() {\r
+ return start;\r
+ }\r
+\r
+ public int lengthOfMatch() {\r
+ return matchLength;\r
+ }\r
+\r
+ // Just for debugging purposes.\r
+ @Override\r
+ public String toString() {\r
+ return "Match:" + String.valueOf(match) + " Replace:" + replace + " TakeOut:" + takeOut + " MatchLength:" + matchLength + " Start:" + start + " End:" + end;\r
+ }\r
+\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.*;\r
+import java.net.URL;\r
+import java.util.Properties;\r
+\r
+\r
+/**\r
+ * Implementation class to read the properties controlling the spell engine. \r
+ * The properties are read form the <code>configuration.properties</code> file.\r
+ *\r
+ * @author aim4min\r
+ */\r
+public class PropertyConfiguration extends Configuration {\r
+\r
+ /**\r
+ * The persistent set of properties supported by the spell engine\r
+ */\r
+ public Properties prop;\r
+ /**\r
+ * The name of the file containing spell engine properties\r
+ */\r
+ public URL filename;\r
+\r
+ /**\r
+ * Constructs and loads spell engine properties configuration.\r
+ */\r
+ public PropertyConfiguration() {\r
+ prop = new Properties();\r
+ try {\r
+ filename = getClass().getClassLoader().getResource("com/swabunga/spell/engine/configuration.properties");\r
+ InputStream in = filename.openStream();\r
+ prop.load(in);\r
+ } catch (Exception e) {\r
+ System.out.println("Could not load Properties file :\n" + e);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#getBoolean(String)\r
+ */\r
+ public boolean getBoolean(String key) {\r
+ return new Boolean(prop.getProperty(key)).booleanValue();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#getInteger(String)\r
+ */\r
+ public int getInteger(String key) {\r
+ return new Integer(prop.getProperty(key)).intValue();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#setBoolean(String, boolean)\r
+ */\r
+ public void setBoolean(String key, boolean value) {\r
+ String string = null;\r
+ if (value)\r
+ string = "true";\r
+ else\r
+ string = "false";\r
+\r
+ prop.setProperty(key, string);\r
+ save();\r
+ }\r
+\r
+ /**\r
+ * @see com.swabunga.spell.engine.Configuration#setInteger(String, int)\r
+ */\r
+ public void setInteger(String key, int value) {\r
+ prop.setProperty(key, Integer.toString(value));\r
+ save();\r
+ }\r
+\r
+ /**\r
+ * Writes the property list (key and element pairs) in the \r
+ * PropertyConfiguration file.\r
+ */\r
+ public void save() {\r
+ try {\r
+ File file = new File(filename.getFile());\r
+ FileOutputStream fout = new FileOutputStream(file);\r
+ prop.store(fout, "HEADER");\r
+ } catch (FileNotFoundException e) {\r
+ } catch (IOException e) {\r
+ }\r
+ }\r
+\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.util.List;\r
+\r
+/**\r
+ * An interface for all dictionary implementations. It defines the most basic\r
+ * operations on a dictionary: adding words, checking if a word is correct, and getting a list\r
+ * of suggestions for misspelled words.\r
+ */\r
+public interface SpellDictionary {\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary.\r
+ * @param word The word to add to the dictionary\r
+ */\r
+ public void addWord(String word);\r
+\r
+ /**\r
+ * Evaluates if the word is correctly spelled against the dictionary.\r
+ * @param word The word to verify if it's spelling is OK.\r
+ * @return Indicates if the word is present in the dictionary.\r
+ */\r
+ public boolean isCorrect(String word);\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to any word.\r
+ * If the word is correctly spelled, then this method\r
+ * could return just that one word, or it could still return a list\r
+ * of words with similar spellings.\r
+ * <br/>\r
+ * Each suggested word has a score, which is an integer\r
+ * that represents how different the suggested word is from the sourceWord.\r
+ * If the words are the exactly the same, then the score is 0.\r
+ * You can get the dictionary to only return the most similar words by setting\r
+ * an appropriately low threshold value.\r
+ * If you set the threshold value too low, you may get no suggestions for a given word.\r
+ * <p>\r
+ * This method is only needed to provide backward compatibility. \r
+ * @see #getSuggestions(String, int, int[][])\r
+ * \r
+ * @param sourceWord the string that we want to get a list of spelling suggestions for\r
+ * @param scoreThreshold Any words that have score less than this number are returned.\r
+ * @return List a List of suggested words\r
+ * @see com.swabunga.spell.engine.Word\r
+ * \r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String sourceWord, int scoreThreshold);\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to any word.\r
+ * If the word is correctly spelled, then this method\r
+ * could return just that one word, or it could still return a list\r
+ * of words with similar spellings.\r
+ * <br/>\r
+ * Each suggested word has a score, which is an integer\r
+ * that represents how different the suggested word is from the sourceWord.\r
+ * If the words are the exactly the same, then the score is 0.\r
+ * You can get the dictionary to only return the most similar words by setting\r
+ * an appropriately low threshold value.\r
+ * If you set the threshold value too low, you may get no suggestions for a given word.\r
+ * <p>\r
+ * @param sourceWord the string that we want to get a list of spelling suggestions for\r
+ * @param scoreThreshold Any words that have score less than this number are returned.\r
+ * @param Two dimensional int array used to calculate edit distance. Allocating \r
+ * this memory outside of the function will greatly improve efficiency. \r
+ * @return List a List of suggested words\r
+ * @see com.swabunga.spell.engine.Word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String sourceWord, int scoreThreshold , int[][] matrix);\r
+\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/* Created by bgalbs on Jan 30, 2003 at 11:45:25 PM */\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.File;\r
+import java.io.IOException;\r
+import java.io.Reader;\r
+import java.security.InvalidParameterException;\r
+import java.util.Collections;\r
+import java.util.Enumeration;\r
+import java.util.Hashtable;\r
+import java.util.Iterator;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * Container for various methods that any <code>SpellDictionary</code> will use.\r
+ * This class is based on the original Jazzy aspell port.\r
+ * <p/>\r
+ * Derived classes will need words list files as spell checking reference. \r
+ * Words list file is a dictionary with one word per line. There are many \r
+ * open source dictionary files, see: \r
+ * <a href="http://wordlist.sourceforge.net/">\r
+ * http://wordlist.sourceforge.net/</a>\r
+ * <p/>\r
+ * You can choose words lists form <a href="http://aspell.net/">aspell</a> \r
+ * many differents languages dictionaries. To grab some, install \r
+ * <code>aspell</code> and the dictionaries you require. Then run aspell \r
+ * specifying the name of the dictionary and the words list file to dump it \r
+ * into, for example:\r
+ * <pre>\r
+ * aspell --master=fr-40 dump master > fr-40.txt\r
+ * </pre>\r
+ * Note: the number following the language is the size indicator. A bigger\r
+ * number gives a more extensive language coverage. Size 40 is more than \r
+ * adequate for many usages.\r
+ * <p/>\r
+ * For some languages, Aspell can also supply you with the phonetic file. \r
+ * On Windows, go into aspell <code>data</code> directory and copy the \r
+ * phonetic file corresponding to your language, for example the \r
+ * <code>fr_phonet.dat</code> for the <code>fr</code> language. The phonetic\r
+ * file should be in directory <code>/usr/share/aspell</code> on Unix.\r
+ *\r
+ * @see GenericTransformator GenericTransformator for information on \r
+ * phonetic files.\r
+ */\r
+public abstract class SpellDictionaryASpell implements SpellDictionary {\r
+\r
+\r
+ /** The reference to a Transformator, used to transform a word into it's phonetic code. */\r
+ protected Transformator tf;\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(File phonetic) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic);\r
+ }\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(File phonetic, String encoding) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic, encoding);\r
+ }\r
+\r
+ /**\r
+ * Constructs a new SpellDictionaryASpell\r
+ * @param phonetic The Reader to use for phonetic transformation of the \r
+ * words list. If <code>phonetic</code> is null, the the transformation\r
+ * uses {@link DoubleMeta} transformation.\r
+ * @throws java.io.IOException indicates problems reading the phonetic \r
+ * information\r
+ */\r
+ public SpellDictionaryASpell(Reader phonetic) throws IOException {\r
+ if (phonetic == null)\r
+ tf = new DoubleMeta();\r
+ else\r
+ tf = new GenericTransformator(phonetic);\r
+ }\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to an\r
+ * incorrect word. \r
+ * <p>\r
+ * This method is only needed to provide backward compatibility.\r
+ * @see #getSuggestions(String, int, int[][])\r
+ * @param word Suggestions for given misspelt word\r
+ * @param threshold The lower boundary of similarity to misspelt word\r
+ * @return Vector a List of suggestions\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String word, int threshold) {\r
+ \r
+ return getSuggestions(word,threshold,null);\r
+ \r
+ }\r
+\r
+ /**\r
+ * Returns a list of Word objects that are the suggestions to an\r
+ * incorrect word.\r
+ * <p>\r
+ * @param word Suggestions for given misspelt word\r
+ * @param threshold The lower boundary of similarity to misspelt word\r
+ * @param matrix Two dimensional int array used to calculate\r
+ * edit distance. Allocating this memory outside of the function will greatly improve efficiency. \r
+ * @return Vector a List of suggestions\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String word, int threshold, int[][] matrix) {\r
+\r
+ int i;\r
+ int j;\r
+ \r
+ if(matrix == null)\r
+ matrix = new int[0][0];\r
+ \r
+ Hashtable nearmisscodes = new Hashtable();\r
+ String code = getCode(word);\r
+\r
+ // add all words that have the same phonetics\r
+ nearmisscodes.put(code, code);\r
+ Vector phoneticList = getWordsFromCode(word, nearmisscodes);\r
+\r
+ // do some tranformations to pick up more results\r
+ //interchange\r
+ nearmisscodes = new Hashtable();\r
+ char[] charArray = word.toCharArray();\r
+ char a;\r
+ char b ;\r
+ \r
+ for (i = 0; i < word.length() - 1; i++) {\r
+ a = charArray[i];\r
+ b = charArray[i + 1];\r
+ charArray[i] = b;\r
+ charArray[i + 1] = a;\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ charArray[i] = a;\r
+ charArray[i + 1] = b;\r
+ }\r
+\r
+ char[] replacelist = tf.getReplaceList();\r
+\r
+ //change\r
+ charArray = word.toCharArray();\r
+ char original; \r
+ for (i = 0; i < word.length(); i++) {\r
+ original = charArray[i];\r
+ for (j = 0; j < replacelist.length; j++) {\r
+ charArray[i] = replacelist[j];\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ }\r
+ charArray[i] = original;\r
+ }\r
+\r
+ //add\r
+ charArray = (word += " ").toCharArray();\r
+ int iy = charArray.length - 1;\r
+ while (true) {\r
+ for (j = 0; j < replacelist.length; j++) {\r
+ charArray[iy] = replacelist[j];\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ }\r
+ if (iy == 0)\r
+ break;\r
+ charArray[iy] = charArray[iy - 1];\r
+ --iy;\r
+ }\r
+\r
+ //delete\r
+ word = word.trim();\r
+ charArray = word.toCharArray();\r
+ char[] charArray2 = new char[charArray.length - 1];\r
+ for (int ix = 0; ix < charArray2.length; ix++) {\r
+ charArray2[ix] = charArray[ix];\r
+ }\r
+ \r
+ a = charArray[charArray.length - 1];\r
+ int ii = charArray2.length;\r
+ while (true) {\r
+ String s = getCode(new String(charArray));\r
+ nearmisscodes.put(s, s);\r
+ if (ii == 0)\r
+ break;\r
+ b = a;\r
+ a = charArray2[ii - 1];\r
+ charArray2[ii - 1] = b;\r
+ --ii;\r
+ }\r
+\r
+ nearmisscodes.remove(code); //already accounted for in phoneticList\r
+\r
+ Vector wordlist = getWordsFromCode(word, nearmisscodes);\r
+\r
+ if (wordlist.size() == 0 && phoneticList.size() == 0)\r
+ addBestGuess(word, phoneticList, matrix);\r
+\r
+\r
+ // We sort a Vector at the end instead of maintaining a\r
+ // continously sorted TreeSet because everytime you add a collection\r
+ // to a treeset it has to be resorted. It's better to do this operation\r
+ // once at the end.\r
+\r
+ Collections.sort(phoneticList, new Word()); //always sort phonetic matches along the top\r
+ Collections.sort(wordlist, new Word()); //the non-phonetic matches can be listed below\r
+\r
+ phoneticList.addAll(wordlist);\r
+ return phoneticList;\r
+ }\r
+\r
+ /**\r
+ * When we don't come up with any suggestions (probably because the threshold was too strict),\r
+ * then pick the best guesses from the those words that have the same phonetic code.\r
+ * <p>\r
+ * This method is only needed to provide backward compatibility.\r
+ * @see addBestGuess(String word, Vector wordList, int[][] matrix)\r
+ * @param word - the word we are trying spell correct\r
+ * @param wordList - the linked list that will get the best guess\r
+ */\r
+ @SuppressWarnings({ "unused", "unchecked" })\r
+private void addBestGuess(String word, Vector wordList) {\r
+ addBestGuess(word,wordList,null);\r
+ }\r
+ \r
+ /**\r
+ * When we don't come up with any suggestions (probably because the threshold was too strict),\r
+ * then pick the best guesses from the those words that have the same phonetic code.\r
+ * @param word - the word we are trying spell correct\r
+ * @param Two dimensional array of int used to calculate \r
+ * edit distance. Allocating this memory outside of the function will greatly improve efficiency. \r
+ * @param wordList - the linked list that will get the best guess\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private void addBestGuess(String word, Vector wordList, int[][] matrix) {\r
+ if(matrix == null)\r
+ matrix = new int[0][0];\r
+ \r
+ if (wordList.size() != 0)\r
+ throw new InvalidParameterException("the wordList vector must be empty");\r
+\r
+ int bestScore = Integer.MAX_VALUE;\r
+ \r
+ String code = getCode(word);\r
+ List simwordlist = getWords(code);\r
+\r
+ LinkedList candidates = new LinkedList();\r
+\r
+ for (Iterator j = simwordlist.iterator(); j.hasNext();) {\r
+ String similar = (String) j.next();\r
+ int distance = EditDistance.getDistance(word, similar, matrix);\r
+ if (distance <= bestScore) {\r
+ bestScore = distance;\r
+ Word goodGuess = new Word(similar, distance);\r
+ candidates.add(goodGuess);\r
+ }\r
+ }\r
+\r
+ //now, only pull out the guesses that had the best score\r
+ for (Iterator iter = candidates.iterator(); iter.hasNext();) {\r
+ Word candidate = (Word) iter.next();\r
+ if (candidate.getCost() == bestScore)\r
+ wordList.add(candidate);\r
+ }\r
+\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private Vector getWordsFromCode(String word, Hashtable codes) {\r
+ Configuration config = Configuration.getConfiguration();\r
+ Vector result = new Vector();\r
+ int[][] matrix = new int[0][0]; \r
+ final int configDistance = config.getInteger(Configuration.SPELL_THRESHOLD);\r
+\r
+ for (Enumeration i = codes.keys(); i.hasMoreElements();) {\r
+ String code = (String) i.nextElement();\r
+\r
+ List simwordlist = getWords(code);\r
+ for (Iterator iter = simwordlist.iterator(); iter.hasNext();) {\r
+ String similar = (String) iter.next();\r
+ int distance = EditDistance.getDistance(word, similar, matrix);\r
+ if (distance < configDistance) {\r
+ Word w = new Word(similar, distance);\r
+ result.addElement(w);\r
+ }\r
+ }\r
+ }\r
+ return result;\r
+ }\r
+\r
+ /**\r
+ * Returns the phonetic code representing the word.\r
+ * @param word The word we want the phonetic code.\r
+ * @return The value of the phonetic code for the word.\r
+ */\r
+ public String getCode(String word) {\r
+ return tf.transform(word);\r
+ }\r
+\r
+ /**\r
+ * Returns a list of words that have the same phonetic code.\r
+ * @param phoneticCode The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected abstract List getWords(String phoneticCode);\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\rJazzy - a Java library for Spell Checking\rCopyright (C) 2001 Mindaugas Idzelis\rFull text of license can be found in LICENSE.txt\r\rThis library is free software; you can redistribute it and/or\rmodify it under the terms of the GNU Lesser General Public\rLicense as published by the Free Software Foundation; either\rversion 2.1 of the License, or (at your option) any later version.\r\rThis library is distributed in the hope that it will be useful,\rbut WITHOUT ANY WARRANTY; without even the implied warranty of\rMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\rLesser General Public License for more details.\r\rYou should have received a copy of the GNU Lesser General Public\rLicense along with this library; if not, write to the Free Software\rFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r*/\rpackage com.swabunga.spell.engine;\r\rimport java.io.File;\rimport java.io.FileInputStream;\rimport java.io.FileNotFoundException;\rimport java.io.FileOutputStream;\rimport java.io.IOException;\rimport java.io.ObjectInputStream;\rimport java.io.ObjectOutputStream;\rimport java.io.Serializable;\rimport java.util.HashMap;\rimport java.util.Iterator;\rimport java.util.List;\r\r/**\r * Yet another <code>SpellDictionary</code> this one is based on Damien Guillaume's\r * Diskbased dictionary but adds a cache to try to improve abit on performance.\r *\r * @author Robert Gustavsson\r * @version 0.01\r */\r\rpublic class SpellDictionaryCachedDichoDisk extends SpellDictionaryDichoDisk {\r \r // Only used for testing to measure the effectiveness of the cache.\r static public int hits=0;\r static public int codes=0;\r\r public static final String JAZZY_DIR=".jazzy";\r public static final String PRE_CACHE_FILE_EXT=".pre";\r\r private static int MAX_CACHED=10000;\r\r @SuppressWarnings("unchecked")\r private final HashMap suggestionCache=new HashMap(MAX_CACHED);\r private String preCacheFileName;\r private String preCacheDir;\r\r /**\r * Dictionary Convienence Constructor.\r */\r public SpellDictionaryCachedDichoDisk(File wordList)\r throws FileNotFoundException, IOException {\r super(wordList);\r loadPreCache(wordList);\r }\r \r /**\r * Dictionary Convienence Constructor.\r */\r public SpellDictionaryCachedDichoDisk(File wordList, String encoding)\r throws FileNotFoundException, IOException {\r super(wordList, encoding);\r loadPreCache(wordList);\r }\r\r /**\r * Dictionary constructor that uses an aspell phonetic file to\r * build the transformation table.\r */\r\r public SpellDictionaryCachedDichoDisk(File wordList, File phonetic)\r throws FileNotFoundException, IOException {\r super(wordList, phonetic);\r loadPreCache(wordList);\r }\r\r /**\r * Dictionary constructor that uses an aspell phonetic file to\r * build the transformation table.\r */\r public SpellDictionaryCachedDichoDisk(File wordList, File phonetic, String encoding)\r throws FileNotFoundException, IOException {\r super(wordList, phonetic, encoding);\r loadPreCache(wordList);\r }\r\r /**\r * Add a word permanantly to the dictionary (and the dictionary file).\r * <i>not implemented !</i>\r */\r @Override\r public void addWord(String word) {\r System.err.println("error: addWord is not implemented for SpellDictionaryCachedDichoDisk");\r }\r\r /**\r * Clears the cache.\r */\r public void clearCache(){\r suggestionCache.clear();\r }\r\r /**\r * Returns a list of strings (words) for the code.\r */\r @Override\r @SuppressWarnings("unchecked")\r public List getWords(String code) {\r List list;\r codes++;\r if(suggestionCache.containsKey(code)){\r hits++;\r list=getCachedList(code);\r return list;\r }\r list=super.getWords(code);\r addToCache(code,list);\r \r return list;\r }\r /**\r * This method returns the cached suggestionlist and also moves the code to\r * the top of the codeRefQueue to indicate this code has resentlly been\r * referenced.\r */\r @SuppressWarnings("unchecked")\r private List getCachedList(String code){\r CacheObject obj=(CacheObject)suggestionCache.get(code);\r obj.setRefTime();\r return obj.getSuggestionList();\r }\r\r /**\r * Adds a code and it's suggestion list to the cache.\r */\r @SuppressWarnings("unchecked")\r private void addToCache(String code, List l){\r String c=null;\r String lowestCode=null;\r long lowestTime=Long.MAX_VALUE;\r Iterator it;\r CacheObject obj;\r\r if(suggestionCache.size()>=MAX_CACHED){\r it=suggestionCache.keySet().iterator();\r while(it.hasNext()){\r c=(String)it.next();\r obj=(CacheObject)suggestionCache.get(c);\r if(obj.getRefTime()==0){\r lowestCode=c;\r break;\r }\r if(lowestTime>obj.getRefTime()){\r lowestCode=c;\r lowestTime=obj.getRefTime();\r }\r }\r suggestionCache.remove(lowestCode);\r } \r suggestionCache.put(code,new CacheObject(l));\r }\r\r /**\r * Load the cache from file. The cach file has the same name as the \r * dico file with the .pre extension added.\r */\r @SuppressWarnings("unchecked")\r private void loadPreCache(File dicoFile)throws IOException{\r String code;\r List suggestions;\r long size,\r time;\r File preFile;\r ObjectInputStream in;\r\r preCacheDir=System.getProperty("user.home")+"/"+JAZZY_DIR;\r preCacheFileName=preCacheDir+"/"+dicoFile.getName()+PRE_CACHE_FILE_EXT;\r //System.out.println(preCacheFileName);\r preFile=new File(preCacheFileName);\r if(!preFile.exists()){\r System.err.println("No precache file");\r return;\r }\r //System.out.println("Precaching...");\r in=new ObjectInputStream(new FileInputStream(preFile));\r try{\r size=in.readLong();\r for(int i=0;i<size;i++){\r code=(String)in.readObject();\r time=in.readLong();\r suggestions=(List)in.readObject();\r suggestionCache.put(code,new CacheObject(suggestions,time));\r }\r }catch(ClassNotFoundException ex){\r System.out.println(ex.getMessage());\r }\r in.close();\r }\r\r /**\r * Saves the current cache to file.\r */\r @SuppressWarnings("unchecked")\r public void saveCache() throws IOException{\r String code;\r CacheObject obj;\r File preFile,\r preDir;\r ObjectOutputStream out;\r Iterator it;\r\r if(preCacheFileName==null || preCacheDir==null){\r System.err.println("Precache filename has not been set.");\r return;\r }\r //System.out.println("Saving cache to precache file...");\r preDir=new File(preCacheDir);\r if(!preDir.exists())\r preDir.mkdir();\r preFile=new File(preCacheFileName);\r out=new ObjectOutputStream(new FileOutputStream(preFile));\r it=suggestionCache.keySet().iterator();\r out.writeLong(suggestionCache.size());\r while(it.hasNext()){\r code=(String)it.next();\r obj=(CacheObject)suggestionCache.get(code);\r out.writeObject(code);\r out.writeLong(obj.getRefTime());\r out.writeObject(obj.getSuggestionList());\r }\r out.close();\r }\r\r // INNER CLASSES\r // ------------------------------------------------------------------------\r @SuppressWarnings("serial")\r private class CacheObject implements Serializable{\r \r @SuppressWarnings("unchecked")\r private List suggestions=null;\r private long refTime=0;\r\r @SuppressWarnings("unchecked")\r public CacheObject(List list){\r this.suggestions=list;\r }\r\r @SuppressWarnings("unchecked")\r public CacheObject(List list, long time){\r this.suggestions=list;\r this.refTime=time;\r }\r \r @SuppressWarnings("unchecked")\r public List getSuggestionList(){\r return suggestions;\r }\r\r public void setRefTime(){\r refTime=System.currentTimeMillis();\r }\r\r public long getRefTime(){\r return refTime;\r }\r }\r}\r
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.EOFException;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.io.RandomAccessFile;\r
+import java.util.LinkedList;\r
+import java.util.List;\r
+\r
+/**\r
+ * Another implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge\r
+ * footprint of <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version\r
+ * of this class that implements some caching strategies might be a good idea in the future, if there's any\r
+ * demand for it.\r
+ *\r
+ * This implementation requires a special dictionary file, with "code*word" lines sorted by code.\r
+ * It's using a dichotomy algorithm to search for words in the dictionary\r
+ *\r
+ * @author Damien Guillaume\r
+ * @version 0.1\r
+ */\r
+public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {\r
+\r
+ /** Holds the dictionary file for reading*/\r
+ private RandomAccessFile dictFile = null;\r
+\r
+ /** dictionary and phonetic file encoding */\r
+ private String encoding = null;\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList)\r
+ throws FileNotFoundException, IOException {\r
+ super((File) null);\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, String encoding)\r
+ throws FileNotFoundException, IOException {\r
+ super((File) null);\r
+ this.encoding = encoding;\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, File phonetic)\r
+ throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+ \r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @param encoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding)\r
+ throws FileNotFoundException, IOException {\r
+ super(phonetic, encoding);\r
+ this.encoding = encoding;\r
+ dictFile = new RandomAccessFile(wordList, "r");\r
+ }\r
+ \r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <i>not implemented !</i>\r
+ * @param word The word to add.\r
+ */\r
+ public void addWord(String word) {\r
+ System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk");\r
+ }\r
+\r
+ /**\r
+ * Search the dictionary file for the words corresponding to the code\r
+ * within positions p1 - p2\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+private LinkedList dichoFind(String code, long p1, long p2) throws IOException {\r
+ //System.out.println("dichoFind("+code+","+p1+","+p2+")");\r
+ long pm = (p1 + p2) / 2;\r
+ dictFile.seek(pm);\r
+ String l;\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ pm = dictFile.getFilePointer();\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ long pm2 = dictFile.getFilePointer();\r
+ if (pm2 >= p2)\r
+ return(seqFind(code, p1, p2));\r
+ int istar = l.indexOf('*');\r
+ if (istar == -1)\r
+ throw new IOException("bad format: no * !");\r
+ String testcode = l.substring(0, istar);\r
+ int comp = code.compareTo(testcode);\r
+ if (comp < 0)\r
+ return(dichoFind(code, p1, pm-1));\r
+ else if (comp > 0)\r
+ return(dichoFind(code, pm2, p2));\r
+ else {\r
+ LinkedList l1 = dichoFind(code, p1, pm-1);\r
+ LinkedList l2 = dichoFind(code, pm2, p2);\r
+ String word = l.substring(istar+1);\r
+ l1.add(word);\r
+ l1.addAll(l2);\r
+ return(l1);\r
+ }\r
+ }\r
+ \r
+ @SuppressWarnings("unchecked")\r
+private LinkedList seqFind(String code, long p1, long p2) throws IOException {\r
+ //System.out.println("seqFind("+code+","+p1+","+p2+")");\r
+ LinkedList list = new LinkedList();\r
+ dictFile.seek(p1);\r
+ while (dictFile.getFilePointer() < p2) {\r
+ String l;\r
+ if (encoding == null)\r
+ l = dictFile.readLine();\r
+ else\r
+ l = dictReadLine();\r
+ int istar = l.indexOf('*');\r
+ if (istar == -1)\r
+ throw new IOException("bad format: no * !");\r
+ String testcode = l.substring(0, istar);\r
+ if (testcode.equals(code)) {\r
+ String word = l.substring(istar+1);\r
+ list.add(word);\r
+ }\r
+ }\r
+ return(list);\r
+ }\r
+ \r
+ /**\r
+ * Read a line of dictFile with a specific encoding\r
+ */\r
+ private String dictReadLine() throws IOException {\r
+ int max = 255;\r
+ byte b=0;\r
+ byte[] buf = new byte[max];\r
+ int i=0;\r
+ try {\r
+ for (; b != '\n' && b != '\r' && i<max-1; i++) {\r
+ b = dictFile.readByte();\r
+ buf[i] = b;\r
+ }\r
+ } catch (EOFException ex) {\r
+ }\r
+ if (i == 0)\r
+ return("");\r
+ String s = new String(buf, 0, i-1, encoding);\r
+ return(s);\r
+ }\r
+ \r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ * @param code The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ //System.out.println("getWords("+code+")");\r
+ LinkedList list;\r
+ try {\r
+ list = dichoFind(code, 0, dictFile.length()-1);\r
+ //System.out.println(list);\r
+ } catch (IOException ex) {\r
+ System.err.println("IOException: " + ex.getMessage());\r
+ list = new LinkedList();\r
+ }\r
+ return list;\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/* Created by bgalbs on Jan 30, 2003 at 11:38:39 PM */\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedOutputStream;\r
+import java.io.BufferedReader;\r
+import java.io.BufferedWriter;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileOutputStream;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.ArrayList;\r
+import java.util.Collections;\r
+import java.util.HashMap;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.StringTokenizer;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * An implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge\r
+ * footprint of <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version\r
+ * of this class that implements some caching strategies might be a good idea in the future, if there's any\r
+ * demand for it.\r
+ * <p>\r
+ * This class makes use of the "classic" Java IO library (java.io). However, it could probably benefit from\r
+ * the new IO APIs (java.nio) and it is anticipated that a future version of this class, probably called\r
+ * <code>SpellDictionaryDiskNIO</code> will appear at some point.\r
+ *\r
+ * @author Ben Galbraith (ben@galbraiths.org)\r
+ * @version 0.1\r
+ * @since 0.5\r
+ */\r
+public class SpellDictionaryDisk extends SpellDictionaryASpell {\r
+ private final static String DIRECTORY_WORDS = "words";\r
+ private final static String DIRECTORY_DB = "db";\r
+ private final static String FILE_CONTENTS = "contents";\r
+ private final static String FILE_DB = "words.db";\r
+ private final static String FILE_INDEX = "words.idx";\r
+\r
+ /* maximum number of words an index entry can represent */\r
+ private final static int INDEX_SIZE_MAX = 200;\r
+\r
+ private final File base;\r
+ private final File words;\r
+ private final File db;\r
+ @SuppressWarnings("unchecked")\r
+private Map index;\r
+ /**\r
+ * The flag indicating if the initial preparation or loading of the on \r
+ * disk dictionary is complete.\r
+ */\r
+ protected boolean ready;\r
+\r
+ /* used at time of creation of index to speed up determining the number of words per index entry */\r
+ @SuppressWarnings("unchecked")\r
+private List indexCodeCache = null;\r
+\r
+ /**\r
+ * Construct a spell dictionary on disk. \r
+ * The spell dictionary is created from words list(s) contained in file(s).\r
+ * A words list file is a file with one word per line. Words list files are\r
+ * located in a <code>base/words</code> dictionary where <code>base</code> \r
+ * is the path to <code>words</code> dictionary. The on disk spell \r
+ * dictionary is created in <code>base/db</code> dictionary and contains \r
+ * files:\r
+ * <ul>\r
+ * <li><code>contents</code> list the words files used for spelling.</li>\r
+ * <li><code>words.db</code> the content of words files organized as\r
+ * a <em>database</em> of words.</li>\r
+ * <li><code>words.idx</code> an index file to the <code>words.db</code>\r
+ * file content.</li>\r
+ * </ul>\r
+ * The <code>contents</code> file has a list of \r
+ * <code>filename, size</code> indicating the name and length of each files\r
+ * in the <code>base/words</code> dictionary. If one of theses files was \r
+ * changed, added or deleted before the call to the constructor, the process \r
+ * of producing new or updated <code>words.db</code> and \r
+ * <code>words.idx</code> files is started again.\r
+ * <p/>\r
+ * The spellchecking process is then worked upon the <code>words.db</code>\r
+ * and <code>words.idx</code> files.\r
+ * <p/>\r
+ * \r
+ * NOTE: Do *not* create two instances of this class pointing to the same <code>base</code> unless\r
+ * you are sure that a new dictionary does not have to be created. In the future, some sort of\r
+ * external locking mechanism may be created that handles this scenario gracefully.\r
+ * \r
+ * @param base the base directory in which <code>SpellDictionaryDisk</code> can expect to find\r
+ * its necessary files.\r
+ * @param phonetic the phonetic file used by the spellchecker.\r
+ * @param block if a new word db needs to be created, there can be a considerable delay before\r
+ * the constructor returns. If block is true, this method will block while the db is created\r
+ * and return when done. If block is false, this method will create a thread to create the new\r
+ * dictionary and return immediately.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * files on the system\r
+ * @throws java.io.IOException indicates problems reading the files\r
+ */\r
+ public SpellDictionaryDisk(File base, File phonetic, boolean block) throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ this.ready = false;\r
+\r
+ this.base = base;\r
+ this.words = new File(base, DIRECTORY_WORDS);\r
+ this.db = new File(base, DIRECTORY_DB);\r
+\r
+ if (!this.base.exists()) throw new FileNotFoundException("Couldn't find required path '" + this.base + "'");\r
+ if (!this.words.exists()) throw new FileNotFoundException("Couldn't find required path '" + this.words + "'");\r
+ if (!this.db.exists()) db.mkdirs();\r
+\r
+ if (newDictionaryFiles()) {\r
+ if (block) {\r
+ buildNewDictionaryDatabase();\r
+ loadIndex();\r
+ ready = true;\r
+ } else {\r
+ Thread t = new Thread() {\r
+ @Override\r
+ public void run() {\r
+ try {\r
+ buildNewDictionaryDatabase();\r
+ loadIndex();\r
+ ready = true;\r
+ } catch (Exception e) {\r
+ e.printStackTrace();\r
+ }\r
+ }\r
+ };\r
+ t.start();\r
+ }\r
+ } else {\r
+ loadIndex();\r
+ ready = true;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Builds the file words database file and the contents file for the on\r
+ * disk dictionary.\r
+ */\r
+ protected void buildNewDictionaryDatabase() throws FileNotFoundException, IOException {\r
+ /* combine all dictionary files into one sorted file */\r
+ File sortedFile = buildSortedFile();\r
+\r
+ /* create the db for the sorted file */\r
+ buildCodeDb(sortedFile);\r
+ sortedFile.delete();\r
+\r
+ /* build contents file */\r
+ buildContentsFile();\r
+ }\r
+\r
+ /**\r
+ * Adds another word to the dictionary. <em>This method is not yet implemented\r
+ * for this class</em>.\r
+ * @param word The word to add.\r
+ */\r
+ public void addWord(String word) {\r
+ throw new UnsupportedOperationException("addWord not yet implemented (sorry)");\r
+ }\r
+\r
+ /**\r
+ * Returns a list of words that have the same phonetic code.\r
+ * @param code The phonetic code common to the list of words\r
+ * @return A list of words having the same phonetic code\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ Vector words = new Vector();\r
+\r
+ int[] posLen = getStartPosAndLen(code);\r
+ if (posLen != null) {\r
+ try {\r
+ InputStream input = new FileInputStream(new File(db, FILE_DB));\r
+ input.skip(posLen[0]);\r
+ byte[] bytes = new byte[posLen[1]];\r
+ input.read(bytes, 0, posLen[1]);\r
+ input.close();\r
+\r
+ String data = new String(bytes);\r
+ String[] lines = split(data, "\n");\r
+ for (String line : lines) {\r
+ String[] s = split(line, ",");\r
+ if (s[0].equals(code)) words.addElement(s[1]);\r
+ }\r
+ } catch (Exception e) {\r
+ e.printStackTrace();\r
+ }\r
+ }\r
+\r
+ return words;\r
+ }\r
+\r
+ /**\r
+ * Indicates if the initial preparation or loading of the on disk dictionary\r
+ * is complete.\r
+ * @return the indication that the dictionary initial setup is done.\r
+ */\r
+ public boolean isReady() {\r
+ return ready;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private boolean newDictionaryFiles() throws FileNotFoundException, IOException {\r
+ /* load in contents file, which indicates the files and sizes of the last db build */\r
+ List contents = new ArrayList();\r
+ File c = new File(db, FILE_CONTENTS);\r
+ if (c.exists()) {\r
+ BufferedReader reader = null;\r
+ try {\r
+ reader = new BufferedReader(new FileReader(c));\r
+ String line;\r
+ while ((line = reader.readLine()) != null) {\r
+ // format of file should be [filename],[size]\r
+ String[] s = split(line, ",");\r
+ contents.add(new FileSize(s[0], Integer.parseInt(s[1])));\r
+ }\r
+ } catch (FileNotFoundException e) {\r
+ throw e;\r
+ } catch (IOException e) {\r
+ throw e;\r
+ } finally {\r
+ if (reader != null) reader.close();\r
+ }\r
+ }\r
+\r
+ /* compare this to the actual directory */\r
+ boolean changed = false;\r
+ File[] wordFiles = words.listFiles();\r
+ if (contents.size() != wordFiles.length) {\r
+ // if the size of the contents list and the number of word files are different, it\r
+ // means we've definitely got to reindex\r
+ changed = true;\r
+ } else {\r
+ // check and make sure that all the word files haven't changed on us\r
+ for (File wordFile : wordFiles) {\r
+ FileSize fs = new FileSize(wordFile.getName(), wordFile.length());\r
+ if (!contents.contains(fs)) {\r
+ changed = true;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ return changed;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private File buildSortedFile() throws FileNotFoundException, IOException {\r
+ List w = new ArrayList();\r
+\r
+ /*\r
+ * read every single word into the list. eeek. if this causes problems,\r
+ * we may wish to explore disk-based sorting or more efficient memory-based storage\r
+ */\r
+ File[] wordFiles = words.listFiles();\r
+ for (File wordFile : wordFiles) {\r
+ BufferedReader r = new BufferedReader(new FileReader(wordFile));\r
+ String word;\r
+ while ((word = r.readLine()) != null) {\r
+ if (!word.equals("")) {\r
+ w.add(word.trim());\r
+ }\r
+ }\r
+ r.close();\r
+ }\r
+\r
+ Collections.sort(w);\r
+\r
+ // FIXME - error handling for running out of disk space would be nice.\r
+ File file = File.createTempFile("jazzy", "sorted");\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(file));\r
+ String prev = null;\r
+ for (int i = 0; i < w.size(); i++) {\r
+ String word = (String) w.get(i);\r
+ if (prev == null || !prev.equals(word)) {\r
+ writer.write(word);\r
+ writer.newLine();\r
+ }\r
+ prev = word;\r
+ }\r
+ writer.close();\r
+\r
+ return file;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private void buildCodeDb(File sortedWords) throws FileNotFoundException, IOException {\r
+ List codeList = new ArrayList();\r
+\r
+ BufferedReader reader = new BufferedReader(new FileReader(sortedWords));\r
+ String word;\r
+ while ((word = reader.readLine()) != null) {\r
+ codeList.add(new CodeWord(this.getCode(word), word));\r
+ }\r
+ reader.close();\r
+\r
+ Collections.sort(codeList);\r
+\r
+ List index = new ArrayList();\r
+\r
+ BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(new File(db, FILE_DB)));\r
+ String currentCode = null;\r
+ int currentPosition = 0;\r
+ int currentLength = 0;\r
+ for (int i = 0; i < codeList.size(); i++) {\r
+ CodeWord cw = (CodeWord) codeList.get(i);\r
+ String thisCode = cw.getCode();\r
+// if (thisCode.length() > 3) thisCode = thisCode.substring(0, 3);\r
+ thisCode = getIndexCode(thisCode, codeList);\r
+ String toWrite = cw.getCode() + "," + cw.getWord() + "\n";\r
+ byte[] bytes = toWrite.getBytes();\r
+\r
+ if (currentCode == null) currentCode = thisCode;\r
+ if (!currentCode.equals(thisCode)) {\r
+ index.add(new Object[]{currentCode, new int[]{currentPosition, currentLength}});\r
+ currentPosition += currentLength;\r
+ currentLength = bytes.length;\r
+ currentCode = thisCode;\r
+ } else {\r
+ currentLength += bytes.length;\r
+ }\r
+ out.write(bytes);\r
+ }\r
+ out.close();\r
+\r
+ // Output the last iteration\r
+ if (currentCode != null && currentPosition != 0 && currentLength != 0)\r
+ index.add(new Object[]{currentCode, new int[]{currentPosition, currentLength}});\r
+\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(new File(db, FILE_INDEX)));\r
+ for (int i = 0; i < index.size(); i++) {\r
+ Object[] o = (Object[]) index.get(i);\r
+ writer.write(o[0].toString());\r
+ writer.write(",");\r
+ writer.write(String.valueOf(((int[]) o[1])[0]));\r
+ writer.write(",");\r
+ writer.write(String.valueOf(((int[]) o[1])[1]));\r
+ writer.newLine();\r
+ }\r
+ writer.close();\r
+ }\r
+\r
+ private void buildContentsFile() throws IOException {\r
+ File[] wordFiles = words.listFiles();\r
+ if (wordFiles.length > 0) {\r
+ BufferedWriter writer = new BufferedWriter(new FileWriter(new File(db, FILE_CONTENTS)));\r
+ for (File wordFile : wordFiles) {\r
+ writer.write(wordFile.getName());\r
+ writer.write(",");\r
+ writer.write(String.valueOf(wordFile.length()));\r
+ writer.newLine();\r
+ }\r
+ writer.close();\r
+ } else {\r
+ new File(db, FILE_CONTENTS).delete();\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Loads the index file from disk. The index file accelerates words lookup\r
+ * into the dictionary db file.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void loadIndex() throws IOException {\r
+ index = new HashMap();\r
+ File idx = new File(db, FILE_INDEX);\r
+ BufferedReader reader = new BufferedReader(new FileReader(idx));\r
+ String line;\r
+ while ((line = reader.readLine()) != null) {\r
+ String[] fields = split(line, ",");\r
+ index.put(fields[0], new int[]{Integer.parseInt(fields[1]), Integer.parseInt(fields[2])});\r
+ }\r
+ reader.close();\r
+ }\r
+\r
+ private int[] getStartPosAndLen(String code) {\r
+ while (code.length() > 0) {\r
+ int[] posLen = (int[]) index.get(code);\r
+ if (posLen == null) {\r
+ code = code.substring(0, code.length() - 1);\r
+ } else {\r
+ return posLen;\r
+ }\r
+ }\r
+ return null;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private String getIndexCode(String code, List codes) {\r
+ if (indexCodeCache == null) indexCodeCache = new ArrayList();\r
+\r
+ if (code.length() <= 1) return code;\r
+\r
+ for (int i = 0; i < indexCodeCache.size(); i++) {\r
+ String c = (String) indexCodeCache.get(i);\r
+ if (code.startsWith(c)) return c;\r
+ }\r
+\r
+ int foundSize = -1;\r
+ boolean cacheable = false;\r
+ for (int z = 1; z < code.length(); z++) {\r
+ String thisCode = code.substring(0, z);\r
+ int count = 0;\r
+ for (int i = 0; i < codes.size();) {\r
+ if (i == 0) {\r
+ i = Collections.binarySearch(codes, new CodeWord(thisCode, ""));\r
+ if (i < 0) i = 0;\r
+ }\r
+\r
+ CodeWord cw = (CodeWord) codes.get(i);\r
+ if (cw.getCode().startsWith(thisCode)) {\r
+ count++;\r
+ if (count > INDEX_SIZE_MAX) break;\r
+ } else if (cw.getCode().compareTo(thisCode) > 0) break;\r
+ i++;\r
+ }\r
+ if (count <= INDEX_SIZE_MAX) {\r
+ cacheable = true;\r
+ foundSize = z;\r
+ break;\r
+ }\r
+ }\r
+\r
+ String newCode = (foundSize == -1) ? code : code.substring(0, foundSize);\r
+ if (cacheable) indexCodeCache.add(newCode);\r
+ return newCode;\r
+ }\r
+\r
+ private static String[] split(String input, String delimiter) {\r
+ StringTokenizer st = new StringTokenizer(input, delimiter);\r
+ int count = st.countTokens();\r
+ String[] out = new String[count];\r
+\r
+ for (int i = 0; i < count; i++) {\r
+ out[i] = st.nextToken();\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+private class CodeWord implements Comparable {\r
+ private final String code;\r
+ private final String word;\r
+\r
+ public CodeWord(String code, String word) {\r
+ this.code = code;\r
+ this.word = word;\r
+ }\r
+\r
+ public String getCode() {\r
+ return code;\r
+ }\r
+\r
+ public String getWord() {\r
+ return word;\r
+ }\r
+\r
+ @Override\r
+ public boolean equals(Object o) {\r
+ if (this == o) return true;\r
+ if (!(o instanceof CodeWord)) return false;\r
+\r
+ final CodeWord codeWord = (CodeWord) o;\r
+\r
+ if (!word.equals(codeWord.word)) return false;\r
+\r
+ return true;\r
+ }\r
+\r
+ @Override\r
+ public int hashCode() {\r
+ return word.hashCode();\r
+ }\r
+\r
+ public int compareTo(Object o) {\r
+ return code.compareTo(((CodeWord) o).getCode());\r
+ }\r
+ }\r
+\r
+ private class FileSize {\r
+ private final String filename;\r
+ private final long size;\r
+\r
+ public FileSize(String filename, long size) {\r
+ this.filename = filename;\r
+ this.size = size;\r
+ }\r
+\r
+ @SuppressWarnings("unused")\r
+ public String getFilename() {\r
+ return filename;\r
+ }\r
+\r
+ @SuppressWarnings("unused")\r
+ public long getSize() {\r
+ return size;\r
+ }\r
+\r
+ @Override\r
+ public boolean equals(Object o) {\r
+ if (this == o) return true;\r
+ if (!(o instanceof FileSize)) return false;\r
+\r
+ final FileSize fileSize = (FileSize) o;\r
+\r
+ if (size != fileSize.size) return false;\r
+ if (!filename.equals(fileSize.filename)) return false;\r
+\r
+ return true;\r
+ }\r
+\r
+ @Override\r
+ public int hashCode() {\r
+ int result;\r
+ result = filename.hashCode();\r
+ result = (int) (29 * result + size);\r
+ return result;\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+/*\r
+ * put your module comment here\r
+ * formatted with JxBeauty (c) johann.langhofer@nextra.at\r
+ */\r
+\r
+package com.swabunga.spell.engine;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileReader;\r
+import java.io.FileWriter;\r
+import java.io.IOException;\r
+import java.io.Reader;\r
+import java.util.Hashtable;\r
+import java.util.List;\r
+import java.util.Vector;\r
+\r
+/**\r
+ * The SpellDictionaryHashMap holds the dictionary\r
+ * <p/>\r
+ * This class is thread safe. Derived classes should ensure that this preserved.\r
+ * <p/>\r
+ * There are many open source dictionary files. For just a few see:\r
+ * http://wordlist.sourceforge.net/\r
+ * <p/>\r
+ * This dictionary class reads words one per line. Make sure that your word list\r
+ * is formatted in this way (most are).\r
+ * <p/>\r
+ * Note that you must create the dictionary with a word list for the added\r
+ * words to persist.\r
+ */\r
+public class SpellDictionaryHashMap extends SpellDictionaryASpell {\r
+ /** A field indicating the initial hash map capacity (16KB) for the main\r
+ * dictionary hash map. Interested to see what the performance of a\r
+ * smaller initial capacity is like.\r
+ */\r
+ private final static int INITIAL_CAPACITY = 16 * 1024;\r
+\r
+ /**\r
+ * The hashmap that contains the word dictionary. The map is hashed on the doublemeta\r
+ * code. The map entry contains a LinkedList of words that have the same double meta code.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected Hashtable mainDictionary = new Hashtable(INITIAL_CAPACITY);\r
+\r
+ /** Holds the dictionary file for appending*/\r
+ private File dictFile = null;\r
+\r
+ /**\r
+ * Dictionary Constructor.\r
+ * @throws java.io.IOException indicates a problem with the file system\r
+ */\r
+ public SpellDictionaryHashMap() throws IOException {\r
+ super((File) null);\r
+ }\r
+\r
+ /**\r
+ * Dictionary Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(Reader wordList) throws IOException {\r
+ super((File) null);\r
+ createDictionary(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Dictionary convenience Constructor.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * words list file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException {\r
+ this(new FileReader(wordList));\r
+ dictFile = wordList;\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * file\r
+ */\r
+ public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException {\r
+ super(phonetic);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table. Encoding is used for phonetic file only; \r
+ * default encoding is used for wordList\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The file to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @param phoneticEncoding Uses the character set encoding specified\r
+ * @throws java.io.FileNotFoundException indicates problems locating the\r
+ * file on the system\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * or phonetic information\r
+ */\r
+ public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException {\r
+ super(phonetic, phoneticEncoding);\r
+ dictFile = wordList;\r
+ createDictionary(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Dictionary constructor that uses an aspell phonetic file to\r
+ * build the transformation table.\r
+ * @param wordList The file containing the words list for the dictionary\r
+ * @param phonetic The reader to use for phonetic transformation of the \r
+ * wordlist.\r
+ * @throws java.io.IOException indicates problems reading the words list\r
+ * or phonetic information\r
+ */\r
+ public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException {\r
+ super(phonetic);\r
+ dictFile = null;\r
+ createDictionary(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Add words from a file to existing dictionary hashmap.\r
+ * This function can be called as many times as needed to\r
+ * build the internal word list. Duplicates are not added.\r
+ * <p>\r
+ * Note that adding a dictionary does not affect the target\r
+ * dictionary file for the addWord method. That is, addWord() continues\r
+ * to make additions to the dictionary file specified in createDictionary()\r
+ * <P>\r
+ * @param wordList a File object that contains the words, on word per line.\r
+ * @throws FileNotFoundException\r
+ * @throws IOException\r
+ */\r
+ public void addDictionary(File wordList) throws FileNotFoundException, IOException {\r
+ addDictionaryHelper(new BufferedReader(new FileReader(wordList)));\r
+ }\r
+\r
+ /**\r
+ * Add words from a Reader to existing dictionary hashmap.\r
+ * This function can be called as many times as needed to\r
+ * build the internal word list. Duplicates are not added.\r
+ * <p>\r
+ * Note that adding a dictionary does not affect the target\r
+ * dictionary file for the addWord method. That is, addWord() continues\r
+ * to make additions to the dictionary file specified in createDictionary()\r
+ * <P>\r
+ * @param wordList a Reader object that contains the words, on word per line.\r
+ * @throws IOException\r
+ */\r
+ public void addDictionary(Reader wordList) throws IOException {\r
+ addDictionaryHelper(new BufferedReader(wordList));\r
+ }\r
+\r
+ /**\r
+ * Add a word permanently to the dictionary (and the dictionary file).\r
+ * <p>This needs to be made thread safe (synchronized)</p>\r
+ */\r
+ public void addWord(String word) {\r
+ putWord(word);\r
+ if (dictFile == null)\r
+ return;\r
+ try {\r
+ FileWriter w = new FileWriter(dictFile.toString(), true);\r
+ // Open with append.\r
+ w.write(word);\r
+ w.write("\n");\r
+ w.close();\r
+ } catch (IOException ex) {\r
+ System.out.println("Error writing to dictionary file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the dictionary from a word list file.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * This is a very slow function. On my machine it takes quite a while to\r
+ * load the data in. I suspect that we could speed this up quite allot.\r
+ */\r
+ protected void createDictionary(BufferedReader in) throws IOException {\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null && line.length() > 0) {\r
+ line = new String(line.toCharArray());\r
+ putWord(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Adds to the existing dictionary from a word list file. If the word\r
+ * already exists in the dictionary, a new entry is not added.\r
+ * <p>\r
+ * Each word in the reader should be on a separate line.\r
+ * <p>\r
+ * Note: for whatever reason that I haven't yet looked into, the phonetic codes\r
+ * for a particular word map to a vector of words rather than a hash table.\r
+ * This is a drag since in order to check for duplicates you have to iterate\r
+ * through all the words that use the phonetic code.\r
+ * If the vector-based implementation is important, it may be better\r
+ * to subclass for the cases where duplicates are bad.\r
+ */\r
+ protected void addDictionaryHelper(BufferedReader in) throws IOException {\r
+\r
+ String line = "";\r
+ while (line != null) {\r
+ line = in.readLine();\r
+ if (line != null && line.length() > 0) {\r
+ line = new String(line.toCharArray());\r
+ putWordUnique(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word in the dictionary\r
+ * @param word The word to add\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWord(String word) {\r
+ String code = getCode(word);\r
+ Vector list = (Vector) mainDictionary.get(code);\r
+ if (list != null) {\r
+ list.addElement(word);\r
+ } else {\r
+ list = new Vector();\r
+ list.addElement(word);\r
+ mainDictionary.put(code, list);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Allocates a word, if it is not already present in the dictionary. A word\r
+ * with a different case is considered the same.\r
+ * @param word The word to add\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected void putWordUnique(String word) {\r
+\r
+ String code = getCode(word);\r
+ Vector list = (Vector) mainDictionary.get(code);\r
+\r
+ if (list != null) {\r
+\r
+ boolean isAlready = false;\r
+\r
+ for (int i = 0; i < list.size(); i++) {\r
+\r
+ if (word.equalsIgnoreCase((String) list.elementAt(i))) {\r
+ isAlready = true;\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (!isAlready)\r
+ list.addElement(word);\r
+\r
+ } else {\r
+\r
+ list = new Vector();\r
+ list.addElement(word);\r
+ mainDictionary.put(code, list);\r
+\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Returns a list of strings (words) for the code.\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public List getWords(String code) {\r
+ //Check the main dictionary.\r
+ Vector mainDictResult = (Vector) mainDictionary.get(code);\r
+ if (mainDictResult == null)\r
+ return new Vector();\r
+ return mainDictResult;\r
+ }\r
+\r
+ /**\r
+ * Returns true if the word is correctly spelled against the current word list.\r
+ */\r
+ @Override\r
+@SuppressWarnings("unchecked")\r
+public boolean isCorrect(String word) {\r
+ List possible = getWords(getCode(word));\r
+ if (possible.contains(word))\r
+ return true;\r
+ //JMH should we always try the lowercase version. If I dont then capitalised\r
+ //words are always returned as incorrect.\r
+ else if (possible.contains(word.toLowerCase()))\r
+ return true;\r
+ return false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+/**\r
+ * An interface for all Transformators - which take a dictionary word and converts into its\r
+ * phonetic hash. These phonetic hashes are useful for determining what other words are\r
+ * similar to it, and then list those words as suggestions.\r
+ *\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r
+ */\r
+public interface Transformator {\r
+\r
+ /**\r
+ * Take the given word, and return the best phonetic hash for it.\r
+ * @param word the word to transform\r
+ * @return the phonetic transformation of the word\r
+ */\r
+ public String transform(String word);\r
+\r
+ /**\r
+ * gets the list of characters that should be swapped in to the misspelled word\r
+ * in order to try to find more suggestions.\r
+ * In general, this list represents all of the unique phonetic characters\r
+ * for this Transformator.\r
+ * <p/>\r
+ * The replace list is used in the getSuggestions method.\r
+ * All of the letters in the misspelled word are replaced with the characters from\r
+ * this list to try and generate more suggestions, which implies l*n tries,\r
+ * if l is the size of the string, and n is the size of this list.\r
+ * <p/>\r
+ * In addition to that, each of these letters is added to the misspelled word.\r
+ * <p/>\r
+ * @return char[] misspelled words should try replacing with these characters to get more suggestions\r
+ */\r
+ public char[] getReplaceList();\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.engine;\r
+\r
+import java.util.Comparator;\r
+\r
+/**\r
+ * The Word object holds information for one suggested spelling.\r
+ * It contains both the suggested word string and the distance cost, which represents how different the suggested\r
+ * word is from the misspelling.\r
+ * <p>This class is now immutable.\r
+ * </p>\r
+ */\r
+@SuppressWarnings("unchecked")\r
+public class Word implements Comparator {\r
+ private String word;\r
+ private final int score;\r
+\r
+ /**\r
+ * Constructs a new Word.\r
+ * @param word The text of a word.\r
+ * @param score The word's distance cost\r
+ */\r
+ public Word(String word, int score) {\r
+ this.word = word;\r
+ this.score = score;\r
+ }\r
+\r
+ /**\r
+ * Constructs a new Word.\r
+ */\r
+ public Word() {\r
+ this.word = "";\r
+ this.score = 0;\r
+ }\r
+\r
+ /**\r
+ * Compares two words, mostly for the purpose of sorting words.\r
+ * @param o1 the first word\r
+ * @param o2 the second word\r
+ * @return -1 if the first word is more similar to the misspelled word\r
+ * <br>1 if the second word is more similar to the misspelled word\r
+ * <br>0 if both words are equally similar\r
+ *\r
+ */\r
+ public int compare(Object o1, Object o2) {\r
+ if (((Word) o1).getCost() < ((Word) o2).getCost()) return -1;\r
+ if (((Word) o1).getCost() == ((Word) o2).getCost()) return 0;\r
+ return 1;\r
+ }\r
+\r
+ /**\r
+ * Indicates if this word is equal to another one.\r
+ * @param o The other word to compare\r
+ * @return The indication of equality\r
+ */\r
+ @Override\r
+public boolean equals(Object o) {\r
+ if (o instanceof Word) // added by bd\r
+ return(((Word)o).getWord().equals(getWord()));\r
+ return false;\r
+ }\r
+ \r
+ /**\r
+ * gets suggested spelling\r
+ * @return the actual text of the suggest spelling\r
+ */\r
+ public String getWord() {\r
+ return word;\r
+ }\r
+\r
+ /**\r
+ * sets suggested spelling\r
+ * @param word The text to set for suggestd spelling\r
+ */\r
+ public void setWord(String word) {\r
+ this.word = word;\r
+ }\r
+\r
+ /**\r
+ * A cost measures how close a match this word was to the original word\r
+ * @return 0 if an exact match. Higher numbers are worse matches.\r
+ * @see EditDistance\r
+ */\r
+ public int getCost() {\r
+ return score;\r
+ }\r
+\r
+ /**\r
+ * returns the suggested spelling\r
+ * @return The word's text \r
+ */\r
+ @Override\r
+public String toString() {\r
+ return word;\r
+ }\r
+}\r
+\r
--- /dev/null
+EDIT_DEL1=95\r
+EDIT_DEL2=95\r
+EDIT_SWAP=90\r
+EDIT_SUB=100\r
+EDIT_CASE=10\r
+\r
+#DMV: the following commented out settings do not seem to be used at all\r
+#EDIT_SIMILAR=10\r
+#EDIT_MIN=90\r
+#EDIT_MAX=100\r
+\r
+SPELL_THRESHOLD=140\r
+SPELL_IGNOREUPPERCASE=true\r
+SPELL_IGNOREMIXEDCASE=false\r
+SPELL_IGNOREINTERNETADDRESS=true\r
+SPELL_IGNOREDIGITWORDS=true\r
+SPELL_IGNOREMULTIPLEWORDS=false\r
+SPELL_IGNORESENTENCECAPTILIZATION=true\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.text.BreakIterator;\r
+\r
+/**\r
+ * Defines common methods and behaviour for the various word finding\r
+ * subclasses.\r
+ *\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public abstract class AbstractWordFinder implements WordFinder {\r
+\r
+ //~ Instance/static variables .............................................\r
+\r
+ /** The word being analyzed */\r
+ protected Word currentWord;\r
+ /** The word following the current one */\r
+ protected Word nextWord;\r
+ /** Indicate if the current word starts a new sentence */\r
+ protected boolean startsSentence;\r
+ /** Holds the text to analyze */\r
+ protected String text;\r
+ /** An iterator to work through the sentence */\r
+ protected BreakIterator sentenceIterator;\r
+\r
+ //~ Constructors ..........................................................\r
+\r
+ /**\r
+ * Creates a new AbstractWordFinder object.\r
+ *\r
+ * @param inText the String to iterate through.\r
+ */\r
+ public AbstractWordFinder(String inText) {\r
+ text = inText;\r
+ setup();\r
+ }\r
+\r
+ /**\r
+ * Creates a new default AbstractWordFinder object.\r
+ */\r
+ public AbstractWordFinder() {\r
+ text = "";\r
+ setup();\r
+ }\r
+ //~ Methods ...............................................................\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns\r
+ * a new Word object corresponding to the next word.\r
+ *\r
+ * @return the following word.\r
+ */\r
+ public abstract Word next();\r
+\r
+ /**\r
+ * Return the text being searched. May have changed since first set\r
+ * through calls to replace.\r
+ *\r
+ * @return the text being searched.\r
+ */\r
+ public String getText() {\r
+\r
+ return text;\r
+ }\r
+ \r
+ /**\r
+ * Defines the text to search.\r
+ * @param newText The text to be analyzed\r
+ */\r
+ public void setText(String newText) {\r
+ text = newText;\r
+ setup();\r
+ }\r
+\r
+ /**\r
+ * Returns the current word in the iteration .\r
+ *\r
+ * @return the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public Word current() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Indicates if there is some more word to analyze\r
+ * @return true if there are further words in the string.\r
+ */\r
+ public boolean hasNext() {\r
+\r
+ return nextWord != null;\r
+\r
+ }\r
+\r
+ /**\r
+ * Replace the current word in the search with a replacement string.\r
+ *\r
+ * @param newWord the replacement string.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public void replace(String newWord) {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ StringBuffer sb = new StringBuffer(text.substring(0, currentWord.getStart()));\r
+ sb.append(newWord);\r
+ sb.append(text.substring(currentWord.getEnd()));\r
+ int diff = newWord.length() - currentWord.getText().length();\r
+ currentWord.setText(newWord);\r
+ /* Added Conditional to ensure a NullPointerException is avoided (11 Feb 2003) */\r
+ if (nextWord != null) {\r
+ nextWord.setStart(nextWord.getStart() + diff);\r
+ }\r
+ text = sb.toString();\r
+\r
+ sentenceIterator.setText(text);\r
+ int start = currentWord.getStart();\r
+ sentenceIterator.following(start);\r
+ startsSentence = sentenceIterator.current() == start;\r
+\r
+ }\r
+\r
+ /**\r
+ * @return true if the current word starts a new sentence.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public boolean startsSentence() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return startsSentence;\r
+ }\r
+\r
+ /**\r
+ * Return the text being searched. May have changed since first set\r
+ * through calls to replace.\r
+ *\r
+ * @return the text being searched.\r
+ */\r
+ public String toString() {\r
+\r
+ return text;\r
+ }\r
+\r
+ /**\r
+ * Adjusts the sentence iterator and the startSentence flag according to the\r
+ * currentWord.\r
+ * @param wd the wd parameter is not presently used.\r
+ */\r
+ protected void setSentenceIterator(Word wd) {\r
+ int current = sentenceIterator.current();\r
+\r
+ if (current == currentWord.getStart())\r
+ startsSentence = true;\r
+ else {\r
+ startsSentence = false;\r
+\r
+ if (currentWord.getEnd() > current) {\r
+ sentenceIterator.next();\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Indicates if the character at the specified position is acceptable as\r
+ * part of a word. To be acceptable, the character need to be a letter\r
+ * or a digit. It is also acceptable if the character is one of ''', '@',\r
+ * '.' or '_' and is preceded and followed by letter or digit.\r
+ * @param posn The character position to analyze.\r
+ * @return true if the character is a letter or digit\r
+ */\r
+ //Added more intelligent character recognition (11 Feb '03)\r
+ protected boolean isWordChar(int posn) {\r
+ boolean out = false;\r
+\r
+ char curr = text.charAt(posn);\r
+\r
+ if ((posn == 0) || (posn == text.length() - 1)) {\r
+ return Character.isLetterOrDigit(curr);\r
+ }\r
+\r
+ char prev = text.charAt(posn - 1);\r
+ char next = text.charAt(posn + 1);\r
+\r
+\r
+ switch (curr) {\r
+ case '\'':\r
+ case '@':\r
+ case '.':\r
+ case '_':\r
+ out = (Character.isLetterOrDigit(prev) && Character.isLetterOrDigit(next));\r
+ break;\r
+ default :\r
+ out = Character.isLetterOrDigit(curr);\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ /**\r
+ * Indicates if the character at the specified character is acceptable as\r
+ * part of a word. To be acceptable, the character need to be a letter\r
+ * or a digit or a ' (an apostrophe).\r
+ * @param c The character to evaluates if it can be part of a word\r
+ * @return true if the character is a letter, digit or a ' (an apostrophe).\r
+ */\r
+ protected boolean isWordChar(char c) {\r
+ boolean out = false;\r
+\r
+ if (Character.isLetterOrDigit(c) || (c == '\'')) {\r
+ out = true;\r
+ }\r
+\r
+ return out;\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * first non letter or digit character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, char startIgnore) {\r
+ return ignore(index, new Character(startIgnore), null);\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * <code>endIgnore</code> character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The character which mark the end of skipping through. If\r
+ * the value of endIgnore is <code>null</code>, skipping characters stop\r
+ * at first non letter or digit character.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, char startIgnore, char endIgnore) {\r
+ return ignore(index, new Character(startIgnore), new Character(endIgnore));\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code>, and until the \r
+ * <code>endIgnore</code> character is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The character that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The character which mark the end of skipping through. If\r
+ * the value of endIgnore is <code>null</code>, skipping characters stop\r
+ * at first non letter or digit character.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, Character startIgnore, Character endIgnore) {\r
+ int newIndex = index;\r
+\r
+ if (newIndex < text.length()) {\r
+ Character curChar = new Character(text.charAt(newIndex));\r
+\r
+ if (curChar.equals(startIgnore)) {\r
+ newIndex++;\r
+ while (newIndex < text.length()) {\r
+ curChar = new Character(text.charAt(newIndex));\r
+ if (endIgnore != null && curChar.equals(endIgnore)){\r
+ newIndex++;\r
+ break;\r
+ } else if (endIgnore == null && !Character.isLetterOrDigit(curChar.charValue())){\r
+ break;\r
+ }\r
+ newIndex++;\r
+ }\r
+ }\r
+ }\r
+\r
+ return newIndex;\r
+ }\r
+\r
+ /**\r
+ * Ignores or skip over text starting from the index position specified \r
+ * if it contains the <code>startIgnore</code> string, and until the \r
+ * <code>endIgnore</code> string is encountered or end of text is \r
+ * detected.\r
+ * @param index The start position in text.\r
+ * @param startIgnore The string that should be at <code>index</code> \r
+ * position to start skipping through.\r
+ * @param endIgnore The string which mark the end of skipping through.\r
+ * @return The index position pointing after the skipped characters or the\r
+ * original index if the ignore condition could not be met.\r
+ */\r
+ protected int ignore(int index, String startIgnore, String endIgnore) {\r
+\r
+ //{{{\r
+ int newIndex = index;\r
+ int len = text.length();\r
+ int slen = startIgnore.length();\r
+ int elen = endIgnore.length();\r
+\r
+ if (!((newIndex + slen) >= len)) {\r
+ String seg = text.substring(newIndex, newIndex + slen);\r
+\r
+ // System.out.println(seg + ":" + seg.length()+ ":" + startIgnore + ":" + slen);\r
+ if (seg.equals(startIgnore)) {\r
+ newIndex += slen;\r
+ cycle: while (true) {\r
+\r
+ if (newIndex == (text.length() - elen)) {\r
+\r
+ break cycle;\r
+ }\r
+\r
+ String ss = text.substring(newIndex, newIndex + elen);\r
+\r
+ if (ss.equals(endIgnore)) {\r
+ newIndex += elen;\r
+\r
+ break cycle;\r
+ } else {\r
+ newIndex++;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ return newIndex;\r
+ } //}}}\r
+\r
+ /**\r
+ * Initializes the sentenseIterator\r
+ */\r
+ protected void init() {\r
+ sentenceIterator = BreakIterator.getSentenceInstance();\r
+ sentenceIterator.setText(text);\r
+ }\r
+ \r
+ /**\r
+ * Defines the starting positions for text analysis\r
+ */\r
+ private void setup() {\r
+ currentWord = new Word("", 0);\r
+ nextWord = new Word("", 0);\r
+ startsSentence = true;\r
+\r
+ init();\r
+\r
+ try {\r
+ next();\r
+ } catch (WordNotFoundException e) {\r
+ currentWord = null;\r
+ nextWord = null;\r
+ }\r
+ }\r
+\r
+ \r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.text.BreakIterator;\r
+\r
+\r
+/**\r
+ * This class tokenizes a input string.\r
+ *\r
+ * <p>\r
+ * It also allows for the string to be mutated. The result after the spell\r
+ * checking is completed is available to the call to getFinalText\r
+ * </p>\r
+ *\r
+ * @author Jason Height(jheight@chariot.net.au)\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public abstract class AbstractWordTokenizer implements WordTokenizer {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ /** The word being analyzed */\r
+ protected Word currentWord;\r
+ /** The word finder used to filter out words which are non pertinent to\r
+ * spell checking */\r
+ protected WordFinder finder;\r
+ /** An iterator to work through the sentence */\r
+ protected BreakIterator sentenceIterator;\r
+\r
+ /** The cumulative word count that have been processed */\r
+ protected int wordCount = 0;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new AbstractWordTokenizer object.\r
+ *\r
+ * @param text the text to process.\r
+ */\r
+ public AbstractWordTokenizer(String text) {\r
+ this(new DefaultWordFinder(text));\r
+ }\r
+\r
+ /**\r
+ * Creates a new AbstractWordTokenizer object.\r
+ *\r
+ * @param wf the custom WordFinder to use in searching for words.\r
+ */\r
+ public AbstractWordTokenizer(WordFinder wf) {\r
+ this.finder = wf;\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * Returns the current number of words that have been processed\r
+ *\r
+ * @return number of words so far iterated.\r
+ */\r
+ public int getCurrentWordCount() {\r
+\r
+ return wordCount;\r
+ }\r
+\r
+ /**\r
+ * Returns the end of the current word in the text\r
+ *\r
+ * @return index in string of the end of the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public int getCurrentWordEnd() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord.getEnd();\r
+ }\r
+\r
+ /**\r
+ * Returns the index of the start of the current word in the text\r
+ *\r
+ * @return index in string of the start of the current word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public int getCurrentWordPosition() {\r
+\r
+ if (currentWord == null) {\r
+ throw new WordNotFoundException("No Words in current String");\r
+ }\r
+\r
+ return currentWord.getStart();\r
+ }\r
+\r
+ /**\r
+ * Returns true if there are more words that can be processed in the string\r
+ *\r
+ * @return true if there are further words in the text.\r
+ */\r
+ public boolean hasMoreWords() {\r
+\r
+ return finder.hasNext();\r
+ }\r
+\r
+ /**\r
+ * Returns searches for the next word in the text, and returns that word.\r
+ *\r
+ * @return the string representing the current word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ public String nextWord() {\r
+ currentWord = finder.next();\r
+\r
+ return currentWord.getText();\r
+ }\r
+\r
+ /**\r
+ * Replaces the current word token\r
+ *\r
+ * @param newWord replacement word.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public abstract void replaceWord(String newWord);\r
+\r
+ /**\r
+ * Returns the current text that is being tokenized (includes any changes\r
+ * that have been made)\r
+ *\r
+ * @return the text being tokenized.\r
+ */\r
+ public String getContext() {\r
+\r
+ return finder.toString();\r
+ }\r
+\r
+ /**\r
+ * returns true if the current word is at the start of a sentence\r
+ *\r
+ * @return true if the current word starts a sentence.\r
+ * @throws WordNotFoundException current word has not yet been set.\r
+ */\r
+ public boolean isNewSentence() {\r
+\r
+ return finder.startsSentence();\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.List;\r
+\r
+/** This event is fired off by the SpellChecker and is passed to the\r
+ * registered SpellCheckListeners\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+class BasicSpellCheckEvent implements SpellCheckEvent {\r
+\r
+ /**The list holding the suggested Word objects for the misspelt word*/\r
+ @SuppressWarnings("unchecked")\r
+private final List suggestions;\r
+ /**The misspelt word*/\r
+ private final String invalidWord;\r
+ /**The action to be done when the event returns*/\r
+ private short action = INITIAL;\r
+ /**Contains the word to be replaced if the action is REPLACE or REPLACEALL*/\r
+ private String replaceWord = null;\r
+\r
+ @SuppressWarnings("unused")\r
+private final String context;\r
+ private final int startPosition;\r
+\r
+\r
+ /**Constructs the SpellCheckEvent\r
+ * @param invalidWord The word that is misspelt\r
+ * @param suggestions A list of Word objects that are suggested to replace the currently misspelt word\r
+ * @param tokenizer The reference to the tokenizer that caused this\r
+ * event to fire.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public BasicSpellCheckEvent(String invalidWord, List suggestions, WordTokenizer tokenizer) {\r
+ this.invalidWord = invalidWord;\r
+ this.suggestions = suggestions;\r
+ this.context = tokenizer.getContext();\r
+ this.startPosition = tokenizer.getCurrentWordPosition();\r
+ }\r
+\r
+ /** Returns the list of suggested Word objects\r
+ * @return A list of words phonetically close to the misspelt word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions() {\r
+ return suggestions;\r
+ }\r
+\r
+ /** Returns the currently misspelt word\r
+ * @return The text misspelt\r
+ */\r
+ public String getInvalidWord() {\r
+ return invalidWord;\r
+ }\r
+\r
+ /** Returns the context in which the misspelt word is used\r
+ * @return The text containing the context\r
+ */\r
+ public String getWordContext() {\r
+ //JMH TBD\r
+ return null;\r
+ }\r
+\r
+ /** Returns the start position of the misspelt word in the context\r
+ * @return The position of the word\r
+ */\r
+ public int getWordContextPosition() {\r
+ return startPosition;\r
+ }\r
+\r
+ /** Returns the action type the user has to handle\r
+ * @return The type of action the event is carrying\r
+ */\r
+ public short getAction() {\r
+ return action;\r
+ }\r
+\r
+ /** Returns the text to replace\r
+ * @return the text of the word to replace\r
+ */\r
+ public String getReplaceWord() {\r
+ return replaceWord;\r
+ }\r
+\r
+ /** Set the action to replace the currently misspelt word with the new word\r
+ * @param newWord The word to replace the currently misspelt word\r
+ * @param replaceAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void replaceWord(String newWord, boolean replaceAll) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ if (replaceAll)\r
+ action = REPLACEALL;\r
+ else\r
+ action = REPLACE;\r
+ replaceWord = newWord;\r
+ }\r
+\r
+ /**\r
+ * Set the action it ignore the currently misspelt word.\r
+ * @param ignoreAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void ignoreWord(boolean ignoreAll) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ if (ignoreAll)\r
+ action = IGNOREALL;\r
+ else\r
+ action = IGNORE;\r
+ }\r
+\r
+ /** Set the action to add a new word into the dictionary. This will also replace the\r
+ * currently misspelt word.\r
+ * @param newWord The new word to add to the dictionary.\r
+ */\r
+ public void addToDictionary(String newWord) {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ action = ADDTODICT;\r
+ replaceWord = newWord;\r
+ }\r
+\r
+ /** Set the action to terminate processing of the spellchecker.\r
+ */\r
+ public void cancel() {\r
+ if (action != INITIAL)\r
+ throw new IllegalStateException("The action can can only be set once");\r
+ action = CANCEL;\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+/**\r
+ * A basic word finder, which searches text for sequences of letters.\r
+ * \r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public class DefaultWordFinder extends AbstractWordFinder {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ * \r
+ * @param inText the String to search\r
+ */\r
+ public DefaultWordFinder(String inText) {\r
+ super(inText);\r
+ }\r
+\r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ */\r
+ public DefaultWordFinder() {\r
+ super();\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns a\r
+ * new Word object corresponding to the next word.\r
+ * \r
+ * @return the next word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ public Word next() {\r
+ if (nextWord == null) {\r
+ throw new WordNotFoundException("No more words found.");\r
+ }\r
+ currentWord.copy(nextWord);\r
+ setSentenceIterator(currentWord);\r
+\r
+ int i = currentWord.getEnd();\r
+ boolean finished = false;\r
+\r
+ while (i < text.length() && !finished) {\r
+ if (isWordChar(i)) {\r
+ nextWord.setStart(i);\r
+ int end = getNextWordEnd(text, i);\r
+ nextWord.setText(text.substring(i, end));\r
+ finished = true;\r
+ }\r
+ i++;\r
+ }\r
+ if (!finished)\r
+ nextWord = null;\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Returns the position in the string <em>after</em> the end of the next word.\r
+ * Note that this return value should not be used as an index into the string\r
+ * without checking first that it is in range, since it is possible for the\r
+ * value <code>text.length()</code> to be returned by this method.\r
+ */\r
+ private int getNextWordEnd(String text, int startPos) {\r
+ // If we're dealing with a possible 'internet word' we need to provide\r
+ // some special handling\r
+ if (SpellChecker.isINETWord(text.substring(startPos))) {\r
+ for (int i = startPos; i < text.length(); i++) {\r
+ char ch = text.charAt(i);\r
+ if (Character.isLetterOrDigit(ch))\r
+ continue;\r
+\r
+ if (ch == '\r' || ch == '\n')\r
+ return i;\r
+ // Chop off any characters that might be enclosing the 'internet word'. eg ',",),]\r
+ if (Character.isSpaceChar(ch))\r
+ if (i > 0 && Character.isLetterOrDigit(text.charAt(i - 1)))\r
+ return i;\r
+ else\r
+ return i - 1;\r
+ }\r
+ return text.length();\r
+ } else {\r
+ for (int i = startPos; i < text.length(); i++) {\r
+ if (!isWordChar(i))\r
+ return i;\r
+ }\r
+ return text.length();\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+\r
+import java.text.BreakIterator;\r
+\r
+import javax.swing.text.AttributeSet;\r
+import javax.swing.text.BadLocationException;\r
+import javax.swing.text.Document;\r
+import javax.swing.text.Segment;\r
+import javax.swing.text.StyledDocument;\r
+\r
+\r
+/** This class tokenizes a swing document model. It also allows for the\r
+ * document model to be changed when corrections occur.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public class DocumentWordTokenizer implements WordTokenizer {\r
+ /** Holds the start character position of the current word*/\r
+ private int currentWordPos = 0;\r
+ /** Holds the end character position of the current word*/\r
+ private int currentWordEnd = 0;\r
+ /** Holds the start character position of the next word*/\r
+ private int nextWordPos = -1;\r
+ /** The actual text that is being tokenized*/\r
+ private final Document document;\r
+ /** The character iterator over the document*/\r
+ private final Segment text;\r
+ /** The cumulative word count that have been processed*/\r
+ private int wordCount = 0;\r
+ /** Flag indicating if there are any more tokens (words) left*/\r
+ private boolean moreTokens = true;\r
+ /** Is this a special case where the currentWordStart, currntWordEnd and\r
+ * nextWordPos have already been calculated. (see nextWord)\r
+ */\r
+ private boolean first = true;\r
+ private final BreakIterator sentenceIterator;\r
+ private boolean startsSentence = true;\r
+\r
+ /**\r
+ * Creates a new DocumentWordTokenizer to work on a document\r
+ * @param document The document to spell check\r
+ */\r
+ public DocumentWordTokenizer(Document document) {\r
+ this.document = document;\r
+ //Create a text segment over the entire document\r
+ text = new Segment();\r
+ sentenceIterator = BreakIterator.getSentenceInstance();\r
+ try {\r
+ document.getText(0, document.getLength(), text);\r
+ sentenceIterator.setText(text);\r
+ currentWordPos = getNextWordStart(text, 0);\r
+ //If the current word pos is -1 then the string was all white space\r
+ if (currentWordPos != -1) {\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r
+ } else {\r
+ moreTokens = false;\r
+ }\r
+ } catch (BadLocationException ex) {\r
+ moreTokens = false;\r
+ }\r
+ }\r
+\r
+ /** This helper method will return the start character of the next\r
+ * word in the buffer from the start position\r
+ */\r
+ private static int getNextWordStart(Segment text, int startPos) {\r
+ if (startPos <= text.getEndIndex())\r
+ for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {\r
+ if (Character.isLetterOrDigit(ch)) {\r
+ return text.getIndex();\r
+ }\r
+ }\r
+ return -1;\r
+ }\r
+\r
+ /** This helper method will return the end of the next word in the buffer.\r
+ *\r
+ */\r
+ private static int getNextWordEnd(Segment text, int startPos) {\r
+ for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {\r
+ if (!Character.isLetterOrDigit(ch)) {\r
+ if (ch == '-' || ch == '\'') { // handle ' and - inside words\r
+ char ch2 = text.next();\r
+ text.previous();\r
+ if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))\r
+ continue;\r
+ }\r
+ return text.getIndex();\r
+ }\r
+ }\r
+ return text.getEndIndex();\r
+ }\r
+\r
+ /**\r
+ * Indicates if there are more words left\r
+ * @return true if more words can be found in the text.\r
+ */\r
+ public boolean hasMoreWords() {\r
+ return moreTokens;\r
+ }\r
+ \r
+ /**\r
+ * Sets the current word position at the start of the word containing\r
+ * the char at position pos. This way a call to nextWord() will return\r
+ * this word.\r
+ * \r
+ * @param pos position in the word we want to set as current.\r
+ */\r
+ public void posStartFullWordFrom(int pos){\r
+ currentWordPos=text.getBeginIndex();\r
+ if(pos>text.getEndIndex())\r
+ pos=text.getEndIndex();\r
+ for (char ch = text.setIndex(pos); ch != Segment.DONE; ch = text.previous()) {\r
+ if (!Character.isLetterOrDigit(ch)) {\r
+ if (ch == '-' || ch == '\'') { // handle ' and - inside words\r
+ char ch2 = text.previous();\r
+ text.next();\r
+ if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))\r
+ continue;\r
+ }\r
+ currentWordPos=text.getIndex()+1;\r
+ break;\r
+ }\r
+ }\r
+ //System.out.println("CurPos:"+currentWordPos);\r
+ if(currentWordPos==0)\r
+ first=true;\r
+ moreTokens=true;\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd + 1);\r
+ }\r
+\r
+ /**\r
+ * Returns the number of word tokens that have been processed thus far\r
+ * @return the number of words found so far.\r
+ */\r
+ public int getCurrentWordPosition() {\r
+ return currentWordPos;\r
+ }\r
+\r
+ /**\r
+ * Returns an index representing the end location of the current word in the text.\r
+ * @return index of the end of the current word in the text.\r
+ */\r
+ public int getCurrentWordEnd() {\r
+ return currentWordEnd;\r
+ }\r
+\r
+ /**\r
+ * This returns the next word in the iteration. Note that any implementation should return\r
+ * the current word, and then replace the current word with the next word found in the\r
+ * input text (if one exists).\r
+ * @return the next word in the iteration.\r
+ */\r
+ public String nextWord() {\r
+ if (!first) {\r
+ currentWordPos = nextWordPos;\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd + 1);\r
+ }\r
+ int current = sentenceIterator.current();\r
+ if (current == currentWordPos)\r
+ startsSentence = true;\r
+ else {\r
+ startsSentence = false;\r
+ if (currentWordEnd > current)\r
+ sentenceIterator.next();\r
+ }\r
+ //The nextWordPos has already been populated\r
+ String word = null;\r
+ try {\r
+ word = document.getText(currentWordPos, currentWordEnd - currentWordPos);\r
+ } catch (BadLocationException ex) {\r
+ moreTokens = false;\r
+ }\r
+ wordCount++;\r
+ first = false;\r
+ if (nextWordPos == -1)\r
+ moreTokens = false;\r
+ return word;\r
+ }\r
+\r
+ /**\r
+ * Returns the number of word tokens that have been processed thus far\r
+ * @return the number of words found so far.\r
+ */\r
+ public int getCurrentWordCount() {\r
+ return wordCount;\r
+ }\r
+\r
+ /** Replaces the current word token\r
+ * @param newWord The new word to replace the misspelt one\r
+ */\r
+ public void replaceWord(String newWord) {\r
+ @SuppressWarnings("unused")\r
+ AttributeSet attr=null;\r
+ if (currentWordPos != -1) {\r
+ try {\r
+ if(document instanceof StyledDocument)\r
+ attr=((StyledDocument)document).getCharacterElement(currentWordPos).getAttributes();\r
+ document.remove(currentWordPos, currentWordEnd - currentWordPos);\r
+ document.insertString(currentWordPos, newWord, null);\r
+ //Need to reset the segment\r
+ document.getText(0, document.getLength(), text);\r
+ } catch (BadLocationException ex) {\r
+ throw new RuntimeException(ex.getMessage());\r
+ }\r
+ //Position after the newly replaced word(s)\r
+ first = true;\r
+ currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());\r
+ if (currentWordPos != -1) {\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r
+ sentenceIterator.setText(text);\r
+ sentenceIterator.following(currentWordPos);\r
+ } else\r
+ moreTokens = false;\r
+ }\r
+ }\r
+\r
+ /** Returns the current text that is being tokenized (includes any changes\r
+ * that have been made)\r
+ * @return The text, including changes.\r
+ */\r
+ public String getContext() {\r
+ return text.toString();\r
+ }\r
+\r
+ /** Indicates if the current word is at the start of a sentence\r
+ * @return true if the current word is at the start of a sentence\r
+ */\r
+ public boolean isNewSentence() {\r
+ // BreakIterator doesn't work when the first word in a sentence is not capitalised,\r
+ // but we need to check for capitalisation\r
+ if (startsSentence || currentWordPos < 2)\r
+ return(true);\r
+ \r
+ String textBefore = null;\r
+ try {\r
+ textBefore = document.getText(currentWordPos-2, 2);\r
+ } catch (BadLocationException ex) {\r
+ return(false);\r
+ }\r
+ return(textBefore != null && ".".equals(textBefore.trim()));\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.File;\r
+import java.io.FileReader;\r
+import java.io.IOException;\r
+\r
+\r
+/**\r
+ * This class tokenizes a input file.\r
+ *\r
+ * <p>\r
+ * Any takers to do this efficiently?? doesn't need to replace any words to\r
+ * start with. I need this to get an idea of how quick the spell checker is.\r
+ * </p>\r
+ */\r
+public class FileWordTokenizer extends AbstractWordTokenizer {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+// private File inFile;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new FileWordTokenizer object.\r
+ *\r
+ * @param inputFile the file to work upon\r
+ */\r
+ public FileWordTokenizer(File inputFile) {\r
+ super(stringValue(inputFile));\r
+ }\r
+\r
+ /**\r
+ * Creates a new FileWordTokenizer object and associate a WordFinder to it's\r
+ * processing.\r
+ *\r
+ * @param inputFile the file to word upon.\r
+ * @param finder the specialize processing for words.\r
+ */\r
+ public FileWordTokenizer(File inputFile, WordFinder finder) {\r
+ super(finder);\r
+ finder.setText(stringValue(inputFile));\r
+ }\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * Replaces the current word token\r
+ *\r
+ * @param s the new string\r
+ * @throws WordNotFoundException current word not yet set.\r
+ */\r
+ @Override\r
+public void replaceWord(String s) {\r
+ }\r
+\r
+ private static String stringValue(File inFile) {\r
+ @SuppressWarnings("unused")\r
+ File stringFile = inFile;\r
+ StringBuffer out = new StringBuffer("");\r
+\r
+ try{\r
+ BufferedReader in = new BufferedReader(new FileReader(inFile));\r
+ char[] c = new char[100];\r
+ int count;\r
+ while ((count = in.read(c, 0, c.length)) != -1){\r
+ out.append(c,0,count);\r
+ }\r
+ in.close();\r
+ } catch(IOException e){\r
+ System.err.println("File input error trying to open " + inFile.toString() + " : " + e);\r
+ }\r
+ return out.toString();\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+\r
+/**\r
+ * A word finder Java source files, which searches text for sequences of \r
+ * letters formated as Java comments.\r
+ *\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public class JavaWordFinder extends AbstractWordFinder {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ private boolean inComment;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new JavaWordFinder object.\r
+ *\r
+ * @param inText the String to search\r
+ */\r
+ public JavaWordFinder(String inText) {\r
+ super(inText);\r
+ }\r
+\r
+ /**\r
+ * Creates a new JavaWordFinder object.\r
+ */\r
+ public JavaWordFinder() {\r
+ super();\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns a\r
+ * new Word object corresponding to the next word.\r
+ *\r
+ * @return the next word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ @Override\r
+public Word next() {\r
+\r
+ if (nextWord == null) {\r
+ throw new WordNotFoundException("No more words found.");\r
+ }\r
+\r
+ currentWord.copy(nextWord);\r
+\r
+ @SuppressWarnings("unused")\r
+ int current = sentenceIterator.current();\r
+ setSentenceIterator(currentWord);\r
+\r
+ int i = currentWord.getEnd();\r
+ boolean finished = false;\r
+ boolean started = false;\r
+\r
+ search:\r
+ while (i < text.length() && !finished) {\r
+\r
+ i = ignore(i, '@');\r
+ i = ignore(i, "<code>", "</code>");\r
+ i = ignore(i, "<CODE>", "</CODE>");\r
+ i = ignore(i, '<', '>');\r
+\r
+ if (i >= text.length()) break search;\r
+\r
+ char currentLetter = text.charAt(i);\r
+ if (inComment) {\r
+ //Reset on new line.\r
+ if (currentLetter == '\n') {\r
+ inComment = false;\r
+ i++;\r
+ continue search;\r
+ } else if (!isWordChar(i)) {\r
+ i++;\r
+ continue search;\r
+ }\r
+ //Find words.\r
+ while (i < text.length() - 1) {\r
+ if (!started && isWordChar(i)) {\r
+ nextWord.setStart(i);\r
+ started = true;\r
+ } else if (started && !isWordChar(i)) {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ finished = true;\r
+ break search;\r
+ }\r
+\r
+ currentLetter = text.charAt(++i);\r
+ }\r
+ } else if (currentLetter == '*') {\r
+ inComment = true;\r
+ i++;\r
+ } else {\r
+ i++;\r
+ }\r
+ }\r
+\r
+ if (!started) {\r
+ nextWord = null;\r
+ } else if (!finished) {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ }\r
+\r
+ return currentWord;\r
+ }\r
+\r
+ /**\r
+ * Initializes this word finder\r
+ */\r
+\r
+ @Override\r
+protected void init() {\r
+// sentenceIterator = BreakIterator.getSentenceInstance();\r
+// sentenceIterator.setText(text);\r
+ super.init();\r
+ inComment = false;\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.List;\r
+\r
+/**\r
+ * This event is fired off by the SpellChecker and is passed to the\r
+ * registered SpellCheckListeners\r
+ * <p/>\r
+ * As far as I know, we will only require one implementation of the SpellCheckEvent\r
+ * (BasicSpellCheckEvent) but I have defined this interface just in case. The\r
+ * BasicSpellCheckEvent implementation is currently package private.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public interface SpellCheckEvent {\r
+ /** Field indicating that the incorrect word should be ignored*/\r
+ public static final short IGNORE = 0;\r
+ /** Field indicating that the incorrect word should be ignored forever*/\r
+ public static final short IGNOREALL = 1;\r
+ /** Field indicating that the incorrect word should be replaced*/\r
+ public static final short REPLACE = 2;\r
+ /** Field indicating that the incorrect word should be replaced always*/\r
+ public static final short REPLACEALL = 3;\r
+ /** Field indicating that the incorrect word should be added to the dictionary*/\r
+ public static final short ADDTODICT = 4;\r
+ /** Field indicating that the spell checking should be terminated*/\r
+ public static final short CANCEL = 5;\r
+ /** Initial case for the action */\r
+ public static final short INITIAL = -1;\r
+\r
+ /** Returns the list of suggested Word objects\r
+ * @return A list of words phonetically close to the misspelt word\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions();\r
+\r
+ /** Returns the currently misspelt word\r
+ * @return The text misspelt\r
+ */\r
+ public String getInvalidWord();\r
+\r
+ /** Returns the context in which the misspelt word is used\r
+ * @return The text containing the context\r
+ */\r
+ public String getWordContext();\r
+\r
+ /** Returns the start position of the misspelt word in the context\r
+ * @return The position of the word\r
+ */\r
+ public int getWordContextPosition();\r
+\r
+ /** Returns the action type the user has to handle\r
+ * @return The type of action the event is carrying\r
+ */\r
+ public short getAction();\r
+\r
+ /** Returns the text to replace\r
+ * @return the text of the word to replace\r
+ */\r
+ public String getReplaceWord();\r
+\r
+ /** Set the action to replace the currently misspelt word with the new word\r
+ * @param newWord The word to replace the currently misspelt word\r
+ * @param replaceAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void replaceWord(String newWord, boolean replaceAll);\r
+\r
+ /** Set the action it ignore the currently misspelt word.\r
+ * @param ignoreAll If set to true, the SpellChecker will replace all\r
+ * further occurrences of the misspelt word without firing a SpellCheckEvent.\r
+ */\r
+ public void ignoreWord(boolean ignoreAll);\r
+\r
+ /** Set the action to add a new word into the dictionary. This will also replace the\r
+ * currently misspelt word.\r
+ *@param newWord The new word to add\r
+ */\r
+ public void addToDictionary(String newWord);\r
+\r
+ /** Set the action to terminate processing of the spell checker.\r
+ */\r
+ public void cancel();\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.EventListener;\r
+\r
+/**\r
+ * This is the event based listener interface.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ */\r
+public interface SpellCheckListener extends EventListener {\r
+ \r
+ /**\r
+ * Propagates the spelling errors to listeners.\r
+ * @param event The event to handle\r
+ */\r
+ public void spellingError(SpellCheckEvent event);\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+import java.io.IOException;\r
+import java.util.ArrayList;\r
+import java.util.Enumeration;\r
+import java.util.HashMap;\r
+import java.util.Hashtable;\r
+import java.util.Iterator;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.Vector;\r
+\r
+import com.swabunga.spell.engine.Configuration;\r
+import com.swabunga.spell.engine.SpellDictionary;\r
+import com.swabunga.spell.engine.SpellDictionaryHashMap;\r
+import com.swabunga.spell.engine.Word;\r
+import com.swabunga.util.VectorUtility;\r
+\r
+\r
+/**\r
+ * This is the main class for spell checking (using the new event based spell\r
+ * checking). \r
+ * <p/>\r
+ * By default, the class makes a user dictionary to accumulate added words.\r
+ * Since this user directory has no file assign to persist added words, they\r
+ * will be retained for the duration of the spell checker instance.\r
+ * If you set a user dictionary like \r
+ * {@link com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap}\r
+ * to persist the added word, the user dictionary will have the possibility to\r
+ * grow and be available across differents invocations of the spell checker.\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ * 19 June 2002\r
+ */\r
+public class SpellChecker {\r
+ /** Flag indicating that the Spell Check completed without any errors present*/\r
+ public static final int SPELLCHECK_OK = -1;\r
+ /** Flag indicating that the Spell Check completed due to user cancellation*/\r
+ public static final int SPELLCHECK_CANCEL = -2;\r
+\r
+ @SuppressWarnings("unchecked")\r
+private final Vector eventListeners = new Vector();\r
+ @SuppressWarnings("unchecked")\r
+private final Vector dictionaries = new Vector();\r
+ private SpellDictionary userdictionary;\r
+\r
+ private final Configuration config = Configuration.getConfiguration();\r
+\r
+ /**This variable holds all of the words that are to be always ignored */\r
+ @SuppressWarnings("unchecked")\r
+private Vector ignoredWords = new Vector();\r
+ @SuppressWarnings("unchecked")\r
+private Hashtable autoReplaceWords = new Hashtable();\r
+ \r
+ // added caching - bd\r
+ // For cached operation a separate user dictionary is required\r
+ @SuppressWarnings("unchecked")\r
+private Map cache;\r
+ private int threshold = 0;\r
+ private int cacheSize = 0;\r
+ \r
+\r
+ /**\r
+ * Constructs the SpellChecker.\r
+ */\r
+ public SpellChecker() {\r
+ try {\r
+ userdictionary = new SpellDictionaryHashMap();\r
+ } catch (IOException e) {\r
+ throw new RuntimeException("this exception should never happen because we are using null phonetic file");\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Constructs the SpellChecker. The default threshold is used\r
+ *\r
+ * @param dictionary The dictionary used for looking up words.\r
+ */\r
+ public SpellChecker(SpellDictionary dictionary) {\r
+ this();\r
+ addDictionary(dictionary);\r
+ }\r
+\r
+\r
+ /**\r
+ * Constructs the SpellChecker with a threshold\r
+ *\r
+ * @param dictionary the dictionary used for looking up words.\r
+ * @param threshold the cost value above which any suggestions are \r
+ * thrown away\r
+ */\r
+ public SpellChecker(SpellDictionary dictionary, int threshold) {\r
+ this(dictionary);\r
+ config.setInteger(Configuration.SPELL_THRESHOLD, threshold);\r
+ }\r
+\r
+ /**\r
+ * Accumulates a dictionary at the end of the dictionaries list used\r
+ * for looking up words. Adding a dictionary give the flexibility to\r
+ * assign the base language dictionary, then a more technical, then...\r
+ *\r
+ * @param dictionary the dictionary to add at the end of the dictionary list.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void addDictionary(SpellDictionary dictionary) {\r
+ if (dictionary == null) {\r
+ throw new IllegalArgumentException("dictionary must be non-null");\r
+ }\r
+ this.dictionaries.addElement(dictionary);\r
+ }\r
+\r
+ /**\r
+ * Registers the user dictionary to which words are added.\r
+ *\r
+ * @param dictionary the dictionary to use when the user specify a new word\r
+ * to add.\r
+ */\r
+ public void setUserDictionary(SpellDictionary dictionary) {\r
+ userdictionary = dictionary;\r
+ }\r
+\r
+ /**\r
+ * Supply the instance of the configuration holding the spell checking engine\r
+ * parameters.\r
+ *\r
+ * @return Current Configuration\r
+ */\r
+ public Configuration getConfiguration() {\r
+ return config;\r
+ }\r
+\r
+ /**\r
+ * Adds a SpellCheckListener to the listeners list.\r
+ *\r
+ * @param listener The feature to be added to the SpellCheckListener attribute\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void addSpellCheckListener(SpellCheckListener listener) {\r
+ eventListeners.addElement(listener);\r
+ }\r
+\r
+\r
+ /**\r
+ * Removes a SpellCheckListener from the listeners list.\r
+ *\r
+ * @param listener The listener to be removed from the listeners list.\r
+ */\r
+ public void removeSpellCheckListener(SpellCheckListener listener) {\r
+ eventListeners.removeElement(listener);\r
+ }\r
+\r
+\r
+ /**\r
+ * Fires off a spell check event to the listeners.\r
+ *\r
+ * @param event The event that need to be processed by the spell checking\r
+ * system.\r
+ */\r
+ protected void fireSpellCheckEvent(SpellCheckEvent event) {\r
+ for (int i = eventListeners.size() - 1; i >= 0; i--) {\r
+ ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);\r
+ }\r
+ }\r
+\r
+\r
+ /**\r
+ * This method clears the words that are currently being remembered as\r
+ * <code>Ignore All</code> words and <code>Replace All</code> words.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void reset() {\r
+ ignoredWords = new Vector();\r
+ autoReplaceWords = new Hashtable();\r
+ }\r
+\r
+\r
+ /**\r
+ * Checks the text string.\r
+ * <p>\r
+ * Returns the corrected string.\r
+ *\r
+ * @param text The text that need to be spelled checked\r
+ * @return The text after spell checking\r
+ * @deprecated use checkSpelling(WordTokenizer)\r
+ */\r
+ @Deprecated\r
+public String checkString(String text) {\r
+ StringWordTokenizer tokens = new StringWordTokenizer(text);\r
+ checkSpelling(tokens);\r
+ return tokens.getContext();\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains at least a\r
+ * digit.\r
+ * Returns true if this word contains a digit.\r
+ *\r
+ * @param word The word to analyze for digit.\r
+ * @return true if the word contains at least a digit.\r
+ */\r
+ private final static boolean isDigitWord(String word) {\r
+ for (int i = word.length() - 1; i >= 0; i--) {\r
+ if (Character.isDigit(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ return false;\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains an Internet \r
+ * address. The method look for typical protocol or the habitual string \r
+ * in the word:\r
+ * <ul>\r
+ * <li>http://</li>\r
+ * <li>ftp://</li>\r
+ * <li>https://</li>\r
+ * <li>ftps://</li>\r
+ * <li>www.</li>\r
+ * </ul>\r
+ *\r
+ * One limitation is that this method cannot currently recognize email\r
+ * addresses. Since the 'word' that is passed in, may in fact contain\r
+ * the rest of the document to be checked, it is not (yet!) a good\r
+ * idea to scan for the @ character.\r
+ *\r
+ * @param word The word to analyze for an Internet address.\r
+ * @return true if this word looks like an Internet address.\r
+ */\r
+ public final static boolean isINETWord(String word) {\r
+ String lowerCaseWord = word.toLowerCase();\r
+ return lowerCaseWord.startsWith("http://") ||\r
+ lowerCaseWord.startsWith("www.") ||\r
+ lowerCaseWord.startsWith("ftp://") ||\r
+ lowerCaseWord.startsWith("https://") ||\r
+ lowerCaseWord.startsWith("ftps://");\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains all\r
+ * uppercases characters.\r
+ *\r
+ * @param word The word to analyze for uppercases characters\r
+ * @return true if this word contains all upper case characters\r
+ */\r
+ private final static boolean isUpperCaseWord(String word) {\r
+ for (int i = word.length() - 1; i >= 0; i--) {\r
+ if (Character.isLowerCase(word.charAt(i))) {\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+\r
+ /**\r
+ * Verifies if the word that is being spell checked contains lower and\r
+ * upper cased characters. Note that a phrase beginning with an upper cased\r
+ * character is not considered a mixed case word.\r
+ *\r
+ * @param word The word to analyze for mixed cases characters\r
+ * @param startsSentence True if this word is at the start of a sentence\r
+ * @return true if this word contains mixed case characters\r
+ */\r
+ private final static boolean isMixedCaseWord(String word, boolean startsSentence) {\r
+ int strLen = word.length();\r
+ boolean isUpper = Character.isUpperCase(word.charAt(0));\r
+ //Ignore the first character if this word starts the sentence and the first\r
+ //character was upper cased, since this is normal behaviour\r
+ if ((startsSentence) && isUpper && (strLen > 1))\r
+ isUpper = Character.isUpperCase(word.charAt(1));\r
+ if (isUpper) {\r
+ for (int i = word.length() - 1; i > 0; i--) {\r
+ if (Character.isLowerCase(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ } else {\r
+ for (int i = word.length() - 1; i > 0; i--) {\r
+ if (Character.isUpperCase(word.charAt(i))) {\r
+ return true;\r
+ }\r
+ }\r
+ }\r
+ return false;\r
+ }\r
+\r
+\r
+ /**\r
+ * This method will fire the spell check event and then handle the event\r
+ * action that has been selected by the user.\r
+ *\r
+ * @param tokenizer Description of the Parameter\r
+ * @param event The event to handle\r
+ * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {\r
+ fireSpellCheckEvent(event);\r
+ String word = event.getInvalidWord();\r
+ //Work out what to do in response to the event.\r
+ switch (event.getAction()) {\r
+ case SpellCheckEvent.INITIAL:\r
+ break;\r
+ case SpellCheckEvent.IGNORE:\r
+ break;\r
+ case SpellCheckEvent.IGNOREALL:\r
+ ignoreAll(word);\r
+ break;\r
+ case SpellCheckEvent.REPLACE:\r
+ tokenizer.replaceWord(event.getReplaceWord());\r
+ break;\r
+ case SpellCheckEvent.REPLACEALL:\r
+ String replaceAllWord = event.getReplaceWord();\r
+ if (!autoReplaceWords.containsKey(word)) {\r
+ autoReplaceWords.put(word, replaceAllWord);\r
+ }\r
+ tokenizer.replaceWord(replaceAllWord);\r
+ break;\r
+ case SpellCheckEvent.ADDTODICT:\r
+ String addWord = event.getReplaceWord();\r
+ if (!addWord.equals(word))\r
+ tokenizer.replaceWord(addWord);\r
+ userdictionary.addWord(addWord);\r
+ break;\r
+ case SpellCheckEvent.CANCEL:\r
+ return true;\r
+ default:\r
+ throw new IllegalArgumentException("Unhandled case.");\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Adds a word to the list of ignored words\r
+ * @param word The text of the word to ignore\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void ignoreAll(String word) {\r
+ if (!ignoredWords.contains(word)) {\r
+ ignoredWords.addElement(word);\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * Adds a word to the user dictionary\r
+ * @param word The text of the word to add\r
+ */\r
+ public void addToDictionary(String word) {\r
+ if (!userdictionary.isCorrect(word))\r
+ userdictionary.addWord(word);\r
+ }\r
+ \r
+ /**\r
+ * Indicates if a word is in the list of ignored words\r
+ * @param word The text of the word check\r
+ */\r
+ public boolean isIgnored(String word){\r
+ return ignoredWords.contains(word);\r
+ }\r
+ \r
+ /**\r
+ * Verifies if the word to analyze is contained in dictionaries. The order \r
+ * of dictionary lookup is:\r
+ * <ul>\r
+ * <li>The default user dictionary or the one set through \r
+ * {@link SpellChecker#setUserDictionary}</li>\r
+ * <li>The dictionary specified at construction time, if any.</li>\r
+ * <li>Any dictionary in the order they were added through \r
+ * {@link SpellChecker#addDictionary}</li>\r
+ * </ul>\r
+ *\r
+ * @param word The word to verify that it's spelling is known.\r
+ * @return true if the word is in a dictionary.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public boolean isCorrect(String word) {\r
+ if (userdictionary.isCorrect(word)) return true;\r
+ for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {\r
+ SpellDictionary dictionary = (SpellDictionary) e.nextElement();\r
+ if (dictionary.isCorrect(word)) return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Produces a list of suggested word after looking for suggestions in various\r
+ * dictionaries. The order of dictionary lookup is:\r
+ * <ul>\r
+ * <li>The default user dictionary or the one set through \r
+ * {@link SpellChecker#setUserDictionary}</li>\r
+ * <li>The dictionary specified at construction time, if any.</li>\r
+ * <li>Any dictionary in the order they were added through \r
+ * {@link SpellChecker#addDictionary}</li>\r
+ * </ul>\r
+ *\r
+ * @param word The word for which we want to gather suggestions\r
+ * @param threshold the cost value above which any suggestions are \r
+ * thrown away\r
+ * @return the list of words suggested\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public List getSuggestions(String word, int threshold) {\r
+ if (this.threshold != threshold && cache != null) {\r
+ this.threshold = threshold;\r
+ cache.clear();\r
+ }\r
+ \r
+ ArrayList suggestions = null;\r
+ \r
+ if (cache != null)\r
+ suggestions = (ArrayList) cache.get(word);\r
+\r
+ if (suggestions == null) {\r
+ suggestions = new ArrayList(50);\r
+ \r
+ for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {\r
+ SpellDictionary dictionary = (SpellDictionary) e.nextElement();\r
+ \r
+ if (dictionary != userdictionary)\r
+ VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false);\r
+ }\r
+\r
+ if (cache != null && cache.size() < cacheSize)\r
+ cache.put(word, suggestions);\r
+ }\r
+ \r
+ VectorUtility.addAll(suggestions, userdictionary.getSuggestions(word, threshold), false);\r
+ suggestions.trimToSize();\r
+ \r
+ return suggestions;\r
+ }\r
+\r
+ /**\r
+ * Activates a cache with the maximum number of entries set to 300\r
+ */\r
+ public void setCache() {\r
+ setCache(300);\r
+ }\r
+\r
+ /**\r
+ * Activates a cache with specified size\r
+ * @param size - max. number of cache entries (0 to disable chache)\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void setCache(int size) {\r
+ cacheSize = size;\r
+ if (size == 0)\r
+ cache = null;\r
+ else\r
+ cache = new HashMap((size + 2) / 3 * 4);\r
+ }\r
+\r
+ /**\r
+ * This method is called to check the spelling of the words that are returned\r
+ * by the WordTokenizer.\r
+ * <p/>\r
+ * For each invalid word the action listeners will be informed with a new \r
+ * SpellCheckEvent.<p>\r
+ *\r
+ * @param tokenizer The media containing the text to analyze.\r
+ * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that\r
+ * are found BEFORE any corrections are made.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public final int checkSpelling(WordTokenizer tokenizer) {\r
+ int errors = 0;\r
+ boolean terminated = false;\r
+ //Keep track of the previous word\r
+// String previousWord = null;\r
+ while (tokenizer.hasMoreWords() && !terminated) {\r
+ String word = tokenizer.nextWord();\r
+ //Check the spelling of the word\r
+ if (!isCorrect(word)) {\r
+ if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) ||\r
+ (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||\r
+ (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||\r
+ (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {\r
+ //Null event. Since we are ignoring this word due\r
+ //to one of the above cases.\r
+ } else {\r
+ //We cant ignore this misspelt word\r
+ //For this invalid word are we ignoring the misspelling?\r
+ if (!isIgnored(word)) {\r
+ errors++;\r
+ //Is this word being automagically replaced\r
+ if (autoReplaceWords.containsKey(word)) {\r
+ tokenizer.replaceWord((String) autoReplaceWords.get(word));\r
+ } else {\r
+ //JMH Need to somehow capitalise the suggestions if\r
+ //ignoreSentenceCapitalisation is not set to true\r
+ //Fire the event.\r
+ List suggestions = getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD));\r
+ if (capitalizeSuggestions(word, tokenizer))\r
+ suggestions = makeSuggestionsCapitalized(suggestions);\r
+ SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer);\r
+ terminated = fireAndHandleEvent(tokenizer, event);\r
+ }\r
+ }\r
+ }\r
+ } else {\r
+ //This is a correctly spelt word. However perform some extra checks\r
+ /*\r
+ * JMH TBD //Check for multiple words\r
+ * if (!ignoreMultipleWords &&) {\r
+ * }\r
+ */\r
+ //Check for capitalisation\r
+ if (isSupposedToBeCapitalized(word, tokenizer)) {\r
+ errors++;\r
+ StringBuffer buf = new StringBuffer(word);\r
+ buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));\r
+ Vector suggestion = new Vector();\r
+ suggestion.addElement(new Word(buf.toString(), 0));\r
+ SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);\r
+ terminated = fireAndHandleEvent(tokenizer, event);\r
+ }\r
+ }\r
+ }\r
+ if (terminated)\r
+ return SPELLCHECK_CANCEL;\r
+ else if (errors == 0)\r
+ return SPELLCHECK_OK;\r
+ else\r
+ return errors;\r
+ }\r
+ \r
+ \r
+ @SuppressWarnings("unchecked")\r
+private List makeSuggestionsCapitalized(List suggestions) {\r
+ Iterator iterator = suggestions.iterator();\r
+ while(iterator.hasNext()) {\r
+ Word word = (Word)iterator.next();\r
+ String suggestion = word.getWord();\r
+ StringBuffer stringBuffer = new StringBuffer(suggestion);\r
+ stringBuffer.setCharAt(0, Character.toUpperCase(suggestion.charAt(0)));\r
+ word.setWord(stringBuffer.toString());\r
+ }\r
+ return suggestions;\r
+ }\r
+\r
+ \r
+ private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) {\r
+ boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);\r
+ return configCapitalize && wordTokenizer.isNewSentence() && Character.isLowerCase(word.charAt(0));\r
+ } \r
+\r
+ private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) {\r
+ // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should also be capitalized\r
+ // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a sentence\r
+ boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);\r
+ boolean uppercase = Character.isUpperCase(word.charAt(0));\r
+ return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase);\r
+ }\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+\r
+/**\r
+ * This class tokenizes a input string.\r
+ *\r
+ * <p>\r
+ * It also allows for the string to be altered by calls to replaceWord(). The result after the spell\r
+ * checking is completed is available to the call to getContext.\r
+ * </p>\r
+ *\r
+ * @author Jason Height (jheight@chariot.net.au)\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+public class StringWordTokenizer extends AbstractWordTokenizer {\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new StringWordTokenizer object.\r
+ *\r
+ * @param s the string to tokenize.\r
+ */\r
+ public StringWordTokenizer(String s) {\r
+ super(s);\r
+ }\r
+\r
+ /**\r
+ * Creates a new StringWordTokenizer object.\r
+ *\r
+ * @param wf the custom WordFinder to use in tokenizing. Note\r
+ * that the string to tokenize will be encapsulated within the WordFinder.\r
+ */\r
+ public StringWordTokenizer(WordFinder wf) {\r
+ super(wf);\r
+ }\r
+\r
+ /**\r
+ * Creates a new StringWordTokenizer object.\r
+ * @param s the string to work on\r
+ * @param finder the custom WordFinder to use in tokenizing. Note\r
+ * that the string to tokenize will be encapsulated within the WordFinder.\r
+ */\r
+ public StringWordTokenizer(String s, WordFinder finder) {\r
+ super(finder);\r
+ finder.setText(s);\r
+ }\r
+\r
+ \r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ *\r
+ * @deprecated use getContext() instead as per the WordTokenizer\r
+ * interface specification.\r
+ * @return the final text.\r
+ */\r
+ public String getFinalText() {\r
+\r
+ return getContext();\r
+ }\r
+\r
+ /**\r
+ * Replace the current word in the iteration with the String s.\r
+ *\r
+ * @param s the String to replace the current word.\r
+ * @throws WordNotFoundException current word not yet set.\r
+ */\r
+ public void replaceWord(String s) {\r
+ finder.replace(s);\r
+ }\r
+}
\ No newline at end of file
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+//{{{ package and imports\r
+//:folding=explicit:\r
+package com.swabunga.spell.event;\r
+\r
+import java.util.Collection;\r
+import java.util.HashSet;\r
+\r
+/**\r
+ * A word finder for TeX and LaTeX documents, which searches text for\r
+ * sequences of letters, but ignores any commands and environments as well\r
+ * as Math environments.\r
+ *\r
+ * @author Anthony Roy (ajr@antroy.co.uk)\r
+ */\r
+\r
+//}}}\r
+\r
+public class TeXWordFinder extends AbstractWordFinder {\r
+\r
+//{{{ ~ Instance/static variables ...............................................\r
+\r
+ @SuppressWarnings("unused")\r
+private boolean IGNORE_COMMENTS = true;\r
+ @SuppressWarnings("unchecked")\r
+private final HashSet user_defined_ignores = new HashSet();\r
+ @SuppressWarnings("unused")\r
+private int regex_user_defined_ignores = STRING_EXPR;\r
+ /**\r
+ * A type where string expressions are used to define expression to ignore\r
+ */\r
+ public static final int STRING_EXPR = 0;\r
+ /**\r
+ * A type where regular expressions are used to define expression to ignore\r
+ */\r
+ public static final int REG_EXPR = 1;\r
+// public static final int GLOB_EXPR = 2;\r
+//}}}\r
+//{{{ ~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ *\r
+ * @param inText the text to search.\r
+ */\r
+ public TeXWordFinder(String inText) {\r
+ super(inText);\r
+ }\r
+ \r
+ /**\r
+ * Creates a new DefaultWordFinder object.\r
+ */\r
+ public TeXWordFinder() {\r
+ super();\r
+ }\r
+//}}}\r
+//{{{ ~ Methods .................................................................\r
+\r
+ /**\r
+ * This method scans the text from the end of the last word, and returns a\r
+ * new Word object corresponding to the next word.\r
+ *\r
+ * @return the next word.\r
+ * @throws WordNotFoundException search string contains no more words.\r
+ */\r
+ @Override\r
+public Word next() {\r
+//{{{\r
+\r
+ if (!hasNext())//currentWord == null)\r
+ throw new WordNotFoundException("No more words found.");\r
+\r
+ currentWord.copy(nextWord);\r
+ setSentenceIterator(currentWord);\r
+\r
+\r
+ int i = currentWord.getEnd();\r
+ boolean finished = false;\r
+ boolean started = false;\r
+\r
+ search:\r
+ while (i < text.length() && !finished) {\r
+\r
+//{{{ Find words.\r
+ if (!started && isWordChar(i)) {\r
+ nextWord.setStart(i++);\r
+ started = true;\r
+ continue search;\r
+ } else if (started) {\r
+ if (isWordChar(i)) {\r
+ i++;\r
+ continue search;\r
+ } else {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ finished = true;\r
+ break search;\r
+ }\r
+ } //}}}\r
+// Ignores should be in order of importance and then specificity.\r
+ int j = i;\r
+// Ignore Comments:\r
+ j = ignore(j, '%', '\n');\r
+ \r
+// Ignore Maths:\r
+ j = ignore(j, "$$", "$$");\r
+ j = ignore(j, '$', '$');\r
+ \r
+// Ignore user defined.\r
+ j = ignoreUserDefined(j);\r
+ \r
+// Ignore certain command parameters.\r
+ j = ignore(j, "\\newcommand", "}");\r
+ j = ignore(j, "\\documentclass", "}");\r
+ j = ignore(j, "\\usepackage", "}");\r
+ j = ignore(j, "\\newcounter{", "}");\r
+ j = ignore(j, "\\setcounter{", "}");\r
+ j = ignore(j, "\\addtocounter{", "}");\r
+ j = ignore(j, "\\value{", "}");\r
+ j = ignore(j, "\\arabic{", "}");\r
+ j = ignore(j, "\\usecounter{", "}");\r
+ j = ignore(j, "\\newenvironment", "}");\r
+ j = ignore(j, "\\setlength", "}");\r
+ j = ignore(j, "\\setkeys", "}");\r
+ \r
+// Ignore environment names.\r
+ j = ignore(j, "\\begin{", "}");\r
+ j = ignore(j, "\\end{", "}"); \r
+ if (i != j){\r
+ i = j;\r
+ continue search;\r
+ }\r
+ \r
+// Ignore commands.\r
+ j = ignore(j, '\\');\r
+ \r
+ if (i != j){\r
+ i = j;\r
+ continue search;\r
+ }\r
+ i++;\r
+ }\r
+\r
+ if (!started) {\r
+ nextWord = null;\r
+ } else if (!finished) {\r
+ nextWord.setText(text.substring(nextWord.getStart(), i));\r
+ }\r
+\r
+ return currentWord;\r
+ }\r
+//}}}\r
+ /**\r
+ * This method is used to import a user defined set of either strings or regular expressions to ignore.\r
+ * @param expressions a collection of Objects whose toString() value should be the expression. Typically String objects.\r
+ * @param regex is an integer specifying the type of expression to use. e.g. REG_EXPR, STRING_EXPR.\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+public void addUserDefinedIgnores(Collection expressions, int regex){\r
+ user_defined_ignores.addAll(expressions);\r
+ regex_user_defined_ignores = regex;\r
+ }\r
+\r
+ private int ignoreUserDefined(int i){\r
+ return i;\r
+ }\r
+ \r
+ /**\r
+ * Define if comments contents are ignored during spell checking\r
+ * @param ignore an indication if comments content is to be ignored\r
+ */\r
+ public void setIgnoreComments(boolean ignore) {\r
+ IGNORE_COMMENTS = ignore;\r
+ }\r
+//}}}\r
+}\r
--- /dev/null
+/*\r
+Jazzy - a Java library for Spell Checking\r
+Copyright (C) 2001 Mindaugas Idzelis\r
+Full text of license can be found in LICENSE.txt\r
+\r
+This library is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU Lesser General Public\r
+License as published by the Free Software Foundation; either\r
+version 2.1 of the License, or (at your option) any later version.\r
+\r
+This library is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+Lesser General Public License for more details.\r
+\r
+You should have received a copy of the GNU Lesser General Public\r
+License along with this library; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+*/\r
+package com.swabunga.spell.event;\r
+\r
+/**\r
+ * Offers basic methods to manipulate a text string representing a word.\r
+ */\r
+public class Word {\r
+\r
+ //~ Instance/static variables ...............................................\r
+\r
+ private int end;\r
+ private int start;\r
+ private String text;\r
+\r
+ //~ Constructors ............................................................\r
+\r
+ /**\r
+ * Creates a new Word object.\r
+ *\r
+ * @param text the String representing the word.\r
+ * @param start the start index of the word.\r
+ */\r
+ public Word(String text, int start) {\r
+ this.text = text;\r
+ this.start = start;\r
+ setEnd();\r
+ }\r
+\r
+ /**\r
+ * Creates a new Word object by cloning an existing Word object.\r
+ *\r
+ * @param w the word object to clone.\r
+ */\r
+ public Word(Word w) {\r
+ this.copy(w);\r
+ }\r
+\r
+ //~ Methods .................................................................\r
+\r
+ /**\r
+ * Evaluate the end of word position.\r
+ * @return the end index of the word.\r
+ */\r
+ public int getEnd() {\r
+\r
+ return end;\r
+ }\r
+\r
+ /**\r
+ * Set the start index of the word.\r
+ *\r
+ * @param s the start index.\r
+ */\r
+ public void setStart(int s) {\r
+ start = s;\r
+ setEnd();\r
+ }\r
+\r
+ /**\r
+ * Evaluate the start of word position.\r
+ * @return the start index.\r
+ */\r
+ public int getStart() {\r
+\r
+ return start;\r
+ }\r
+\r
+ /**\r
+ * Set the text to a new string value.\r
+ *\r
+ * @param s the new text\r
+ */\r
+ public void setText(String s) {\r
+ text = s;\r
+ setEnd();\r
+ }\r
+\r
+ /**\r
+ * Supply the text string representing the word\r
+ * @return the String representing the word.\r
+ */\r
+ public String getText() {\r
+\r
+ return text;\r
+ }\r
+\r
+ /**\r
+ * Sets the value of this Word to be a copy of another.\r
+ *\r
+ * @param w the Word to copy.\r
+ */\r
+ public void copy(Word w) {\r
+ text = w.toString();\r
+ start = w.getStart();\r
+ setEnd();\r
+ }\r
+\r
+ /**\r
+ * Evaluate the length of the word.\r
+ * @return the length of the word.\r
+ */\r
+ public int length() {\r
+\r
+ return text.length();\r
+ }\r
+\r
+ /**\r
+ * Supply the text representing the word.\r
+ * @return the text representing the word.\r
+ */\r
+ public String toString() {\r
+\r
+ return text;\r
+ }\r
+\r
+ /**\r
+ * Set the end index of the word.\r
+ *\r
+ */\r
+ private void setEnd() {\r
+ end = start + text.length();\r
+ }\r
+}\r