4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
5 * Copyright (C) 2003-2009 The Nucleus Group
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 * (see nucleus/documentation/index.html#license for more info)
14 * SEARCH(querystring) offers different functionality to create an
15 * SQL query to find certain items. (and comments)
17 * based on code by David Altherr:
18 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
19 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
21 * @license http://nucleuscms.org/license.txt GNU General Public License
22 * @copyright Copyright (C) 2002-2009 The Nucleus Group
24 * @version $NucleusJP: SEARCH.php,v 1.7 2006/07/20 08:01:52 kimitake Exp $
35 function SEARCH($text)
38 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
39 /* * * for jp * * * * * * * * * * */
40 $this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
41 if ($this->encoding != 'utf-8') {
42 $text = mb_convert_encoding($text, "UTF-8", $this->encoding);
44 $text = str_replace ("\xE3\x80\x80",' ',$text);
45 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
47 $this->ascii = '[\x00-\x7F]';
48 $this->two = '[\xC0-\xDF][\x80-\xBF]';
49 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
51 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
52 /* * * * * * * * * * * * * * * * */
54 $this->querystring = $text;
55 // $this->marked = $this->boolean_mark_atoms($text);
56 $this->inclusive = $this->boolean_inclusive_atoms($text);
57 $this->blogs = array();
59 // get all public searchable blogs, no matter what, include the current blog allways.
60 $res = sql_query('SELECT bnumber FROM ' . sql_table('blog') . ' WHERE bincludesearch=1 ');
61 while ($obj = sql_fetch_object($res)) {
62 $this->blogs[] = intval($obj->bnumber);
66 function boolean_sql_select($match)
68 if (!isset($stringsum)) {
71 if (strlen($this->inclusive) > 0) {
72 /* build sql for determining score for each record */
73 $result=explode(" ",$this->inclusive);
74 if (!isset($stringsum_long)) {
77 for ($cth = 0; $cth < count($result); $cth++) {
78 if (strlen($result[$cth])>=4) {
79 $stringsum_long .= " $result[$cth] ";
81 $stringsum_a[] = ' ' . $this->boolean_sql_select_short($result[$cth], $match) . ' ';
85 if (strlen($stringsum_long) > 0) {
86 $stringsum_long = addslashes($stringsum_long);
87 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
90 $stringsum .= implode("+", $stringsum_a);
98 function boolean_inclusive_atoms($string)
100 $result = trim($string);
101 $result = preg_replace("/([[:space:]]{2,})/", ' ', $result);
103 /* convert normal boolean operators to shortened syntax */
104 $result = eregi_replace(' not ', ' -', $result);
105 $result = eregi_replace(' and ', ' ', $result);
106 $result = eregi_replace(' or ', ', ', $result);
108 /* drop unnecessary spaces */
109 $result = str_replace(' ,', ',', $result);
110 $result = str_replace(', ', ',', $result);
111 $result = str_replace('- ', '-', $result);
112 $result = str_replace('+', '', $result);
114 /* strip exlusive atoms */
115 $result = preg_replace(
116 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
117 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
121 /* $result=preg_replace(
122 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
123 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
127 $result = str_replace('(', ' ', $result);
128 $result = str_replace(')', ' ', $result);
129 $result = str_replace(',', ' ', $result);
130 if ($this->encoding != 'utf-8') {
131 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
136 function boolean_sql_where($match)
139 $result = $this->marked;
140 $result = preg_replace(
141 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
142 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
144 $result = preg_replace(
145 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
146 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
149 $result = $this->jpmarked; /* for jp */
150 $result = $this->boolean_sql_where_jp_short($result, $match);/* for jp */
151 if ($this->encoding != 'utf-8') {
152 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
157 // there must be a simple way to simply copy a value with backslashes in it through
158 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
159 function copyvalue($foo)
164 function boolean_mark_atoms($string){
165 $result=trim($string);
166 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
168 // convert normal boolean operators to shortened syntax
169 $result=eregi_replace(' not ',' -',$result);
170 $result=eregi_replace(' and ',' ',$result);
171 $result=eregi_replace(' or ',',',$result);
173 // strip excessive whitespace
174 $result=str_replace('( ','(',$result);
175 $result=str_replace(' )',')',$result);
176 $result=str_replace(', ',',',$result);
177 $result=str_replace(' ,',',',$result);
178 $result=str_replace('- ','-',$result);
179 $result=str_replace('+','',$result);
181 // remove double spaces (we might have introduced some new ones above)
182 $result=trim($result);
183 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
185 // apply arbitrary function to all 'word' atoms
187 $result_a = explode(" ",$result);
188 for($word=0;$word<count($result_a);$word++){
189 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
191 $result = implode(" ",$result_a);
193 // dispatch ' ' to ' AND '
194 $result=str_replace(' ',' AND ',$result);
196 // dispatch ',' to ' OR '
197 $result=str_replace(',',' OR ',$result);
199 // dispatch '-' to ' NOT '
200 $result=str_replace(' -',' NOT ',$result);
204 function boolean_sql_where_short($string,$match){
205 $match_a = explode(',',$match);
206 for($ith=0;$ith<count($match_a);$ith++){
207 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
209 $like = implode(" OR ",$like_a);
215 function boolean_sql_select_short($string, $match)
217 $match_a = explode(',', $match);
218 $score_unit_weight = .2;
219 for ($ith = 0; $ith< count($match_a); $ith++){
221 " $score_unit_weight*(
222 LENGTH(" . addslashes($match_a[$ith]) . ") -
223 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
224 /LENGTH('" . addslashes($string) . "') ";
226 $score = implode(" + ", $score_a);
231 /***********************************************
233 ***********************************************/
235 function boolean_mark_atoms_jp($string)
237 $result = trim($string);
238 $result = preg_replace("/([[:space:]]{2,})/", ' ', $result);
240 /* convert normal boolean operators to shortened syntax */
241 $result = eregi_replace(' not ', ' -', $result);
242 $result = eregi_replace(' and ', ' ', $result);
243 $result = eregi_replace(' or ', ',', $result);
245 /* strip excessive whitespace */
246 $result = str_replace(', ', ',', $result);
247 $result = str_replace(' ,', ',', $result);
248 $result = str_replace('- ', '-', $result);
249 $result = str_replace('+', '', $result);
250 $result = str_replace(',', ' ,', $result);
255 function boolean_sql_where_jp_short($string, $match)
257 $match_a = explode(',', $match);
258 $key_a = explode(' ', $string);
260 for ($ith=0; $ith<count($match_a); $ith++) {
261 // $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
262 $binKey = preg_match('/[a-zA-Z]/', $key_a[0]) ? '' : 'BINARY';
263 $temp_a[$ith] = "(i.$match_a[$ith] LIKE " . $binKey . " '%" . addslashes($key_a[0]) . "%') ";
265 $like = '('.implode(' or ',$temp_a).')';
267 for ($kn = 1; $kn < count($key_a); $kn++) {
268 $binKey = preg_match('/[a-zA-Z]/', $key_a[$kn]) ? '' : 'BINARY';
269 if (substr($key_a[$kn], 0, 1) == ",") {
270 for($ith = 0; $ith < count($match_a); $ith++) {
271 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
272 $temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . addslashes(substr($key_a[$kn], 1)) . "%') ";
274 $like .=' OR ('. implode(' or ', $temp_a).')';
275 }elseif(substr($key_a[$kn],0,1) != '-'){
276 for($ith=0;$ith<count($match_a);$ith++){
277 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
278 $temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . addslashes($key_a[$kn]) . "%') ";
280 $like .=' AND ('. implode(' or ', $temp_a).')';
282 for($ith=0;$ith<count($match_a);$ith++){
283 // $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
284 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE " . $binKey . " '%" . addslashes(substr($key_a[$kn], 1)) . "%') ";
286 $like .=' AND ('. implode(' and ', $temp_a).')';
290 $like = '('.$like.')';
294 /***********************************************/