4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
5 * Copyright (C) 2003-2009 The Nucleus Group
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 * (see nucleus/documentation/index.html#license for more info)
14 * SEARCH(querystring) offers different functionality to create an
15 * SQL query to find certain items. (and comments)
17 * based on code by David Altherr:
18 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
19 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
21 * @license http://nucleuscms.org/license.txt GNU General Public License
22 * @copyright Copyright (C) 2002-2009 The Nucleus Group
24 * @version $NucleusJP: SEARCH.php,v 1.7 2006/07/20 08:01:52 kimitake Exp $
37 function SEARCH($text) {
39 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
42 /* * * for jp * * * * * * * * * * */
43 $this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
44 if ($this->encoding != 'utf-8') {
45 $text = mb_convert_encoding($text, "UTF-8", $this->encoding);
47 $text = str_replace ("\xE3\x80\x80",' ',$text);
48 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
50 $this->ascii = '[\x00-\x7F]';
51 $this->two = '[\xC0-\xDF][\x80-\xBF]';
52 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
54 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
55 /* * * * * * * * * * * * * * * * */
57 $this->querystring = $text;
58 // $this->marked = $this->boolean_mark_atoms($text);
59 $this->inclusive = $this->boolean_inclusive_atoms($text);
60 $this->blogs = array();
62 // get all public searchable blogs, no matter what, include the current blog allways.
63 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
64 while ($obj = mysql_fetch_object($res))
65 $this->blogs[] = intval($obj->bnumber);
68 function boolean_sql_select($match){
69 if (strlen($this->inclusive) > 0) {
70 /* build sql for determining score for each record */
71 $result=explode(" ",$this->inclusive);
72 for($cth=0;$cth<count($result);$cth++){
73 if(strlen($result[$cth])>=4){
74 $stringsum_long .= " $result[$cth] ";
76 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
80 if(strlen($stringsum_long)>0){
81 $stringsum_long = addslashes($stringsum_long);
82 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
85 $stringsum .= implode("+",$stringsum_a);
93 function boolean_inclusive_atoms($string){
94 $result=trim($string);
95 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
97 /* convert normal boolean operators to shortened syntax */
98 $result=eregi_replace(' not ',' -',$result);
99 $result=eregi_replace(' and ',' ',$result);
100 $result=eregi_replace(' or ',',',$result);
102 /* drop unnecessary spaces */
103 $result=str_replace(' ,',',',$result);
104 $result=str_replace(', ',',',$result);
105 $result=str_replace('- ','-',$result);
106 $result=str_replace('+','',$result);
108 /* strip exlusive atoms */
109 $result=preg_replace(
110 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
111 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
115 /* $result=preg_replace(
116 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
117 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
121 $result=str_replace('(',' ',$result);
122 $result=str_replace(')',' ',$result);
123 $result=str_replace(',',' ',$result);
124 if ($this->encoding != 'utf-8') {
125 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
130 function boolean_sql_where($match){
132 $result = $this->marked;
133 $result = preg_replace(
134 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
135 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
144 $result = preg_replace(
145 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
146 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
154 $result = $this->jpmarked; /* for jp */
155 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
156 if ($this->encoding != 'utf-8') {
157 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
165 // there must be a simple way to simply copy a value with backslashes in it through
166 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
167 function copyvalue($foo) {
183 function boolean_mark_atoms($string){
184 $result=trim($string);
185 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
187 // convert normal boolean operators to shortened syntax
188 $result=eregi_replace(' not ',' -',$result);
189 $result=eregi_replace(' and ',' ',$result);
190 $result=eregi_replace(' or ',',',$result);
193 // strip excessive whitespace
194 $result=str_replace('( ','(',$result);
195 $result=str_replace(' )',')',$result);
196 $result=str_replace(', ',',',$result);
197 $result=str_replace(' ,',',',$result);
198 $result=str_replace('- ','-',$result);
199 $result=str_replace('+','',$result);
201 // remove double spaces (we might have introduced some new ones above)
202 $result=trim($result);
203 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
205 // apply arbitrary function to all 'word' atoms
207 $result_a = explode(" ",$result);
208 for($word=0;$word<count($result_a);$word++){
209 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
211 $result = implode(" ",$result_a);
213 // dispatch ' ' to ' AND '
214 $result=str_replace(' ',' AND ',$result);
216 // dispatch ',' to ' OR '
217 $result=str_replace(',',' OR ',$result);
219 // dispatch '-' to ' NOT '
220 $result=str_replace(' -',' NOT ',$result);
224 function boolean_sql_where_short($string,$match){
225 $match_a = explode(',',$match);
226 for($ith=0;$ith<count($match_a);$ith++){
227 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
229 $like = implode(" OR ",$like_a);
235 function boolean_sql_select_short($string,$match){
236 $match_a = explode(',',$match);
237 $score_unit_weight = .2;
238 for($ith=0;$ith<count($match_a);$ith++){
240 " $score_unit_weight*(
241 LENGTH(" . addslashes($match_a[$ith]) . ") -
242 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
243 /LENGTH('" . addslashes($string) . "') ";
245 $score = implode(" + ",$score_a);
250 /***********************************************
252 ***********************************************/
254 function boolean_mark_atoms_jp($string){
256 $result=trim($string);
257 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
259 /* convert normal boolean operators to shortened syntax */
260 $result=eregi_replace(' not ',' -',$result);
261 $result=eregi_replace(' and ',' ',$result);
262 $result=eregi_replace(' or ',',',$result);
264 /* strip excessive whitespace */
265 $result=str_replace(', ',',',$result);
266 $result=str_replace(' ,',',',$result);
267 $result=str_replace('- ','-',$result);
268 $result=str_replace('+','',$result);
270 $result=str_replace(',',' ,',$result);
275 function boolean_sql_where_jp_short($string,$match){
276 $match_a = explode(',',$match);
277 $key_a = explode(' ',$string);
279 for($ith=0;$ith<count($match_a);$ith++){
280 // $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
281 $temp_a[$ith] = "(i.$match_a[$ith] LIKE "
282 . preg_match('/[a-zA-Z]/', $key_a[0]) ?
285 . " '%" . addslashes($key_a[0]) . "%') ";
287 $like = '('.implode(' or ',$temp_a).')';
289 for($kn=1; $kn<count($key_a); $kn++){
290 if(substr($key_a[$kn],0,1) == ","){
291 for($ith=0;$ith<count($match_a);$ith++){
292 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
293 $temp_a[$ith] = " (i.$match_a[$ith] LIKE "
294 . preg_match('/[a-zA-Z]/',$key_a[$kn]) ?
297 . " '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
299 $like .=' OR ('. implode(' or ',$temp_a).')';
300 }elseif(substr($key_a[$kn],0,1) != '-'){
301 for($ith=0;$ith<count($match_a);$ith++){
302 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
303 $temp_a[$ith] = " (i.$match_a[$ith] LIKE "
304 . preg_match('/[a-zA-Z]/', $key_a[$kn]) ?
307 . " '%" . addslashes($key_a[$kn]) . "%') ";
309 $like .=' AND ('. implode(' or ',$temp_a).')';
311 for($ith=0;$ith<count($match_a);$ith++){
312 // $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
313 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE "
314 . preg_match('/[a-zA-Z]/', $key_a[$kn]) ?
317 . " '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
319 $like .=' AND ('. implode(' and ',$temp_a).')';
323 $like = '('.$like.')';
327 /***********************************************/