4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
\r
5 * Copyright (C) 2003-2004 The Nucleus Group
\r
7 * This program is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation; either version 2
\r
10 * of the License, or (at your option) any later version.
\r
11 * (see nucleus/documentation/index.html#license for more info)
\r
13 * SEARCH(querystring) offers different functionality to create an
\r
14 * SQL query to find certain items. (and comments)
\r
16 * based on code by David Altherr:
\r
17 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
\r
18 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
\r
31 function SEARCH($text) {
\r
34 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
\r
36 /* * * for jp * * * * * * * * * * */
\r
37 $text = str_replace ("\xE3\x80\x80",' ',$text);
\r
38 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
\r
40 $this->ascii = '[\x00-\x7F]';
\r
41 $this->two = '[\xC0-\xDF][\x80-\xBF]';
\r
42 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
\r
44 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
\r
45 /* * * * * * * * * * * * * * * * */
\r
47 $this->querystring = $text;
\r
48 // $this->marked = $this->boolean_mark_atoms($text);
\r
49 $this->inclusive = $this->boolean_inclusive_atoms($text);
\r
50 $this->blogs = array();
\r
52 // get all public searchable blogs, no matter what, include the current blog allways.
\r
53 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
\r
54 while ($obj = mysql_fetch_object($res))
\r
55 $this->blogs[] = intval($obj->bnumber);
\r
58 function boolean_sql_select($match){
\r
59 // $string = $this->inclusive;
\r
60 // if (strlen($string) > 0) {
\r
61 /* build sql for determining score for each record */
\r
64 "([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
\r
67 $result = $result[0];
\r
68 for($cth=0;$cth<count($result);$cth++){
\r
69 if(strlen($result[$cth])>=4){
\r
70 $stringsum_long .= " $result[$cth] ";
\r
72 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
\r
75 if(strlen($stringsum_long)>0){
\r
76 $stringsum_long = addslashes($stringsum_long);
\r
77 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
\r
79 $stringsum .= implode("+",$stringsum_a);
\r
87 function boolean_inclusive_atoms($string){
\r
88 $result=trim($string);
\r
89 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
91 /* convert normal boolean operators to shortened syntax */
\r
92 $result=eregi_replace(' not ',' -',$result);
\r
93 $result=eregi_replace(' and ',' ',$result);
\r
94 $result=eregi_replace(' or ',',',$result);
\r
96 /* drop unnecessary spaces */
\r
97 $result=str_replace(' ,',',',$result);
\r
98 $result=str_replace(', ',',',$result);
\r
99 $result=str_replace('- ','-',$result);
\r
100 $result=str_replace('+','',$result);
\r
102 /* strip exlusive atoms */
\r
103 $result=preg_replace(
\r
104 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
\r
105 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
\r
109 $result=preg_replace(
\r
110 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
\r
111 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
\r
115 $result=str_replace('(',' ',$result);
\r
116 $result=str_replace(')',' ',$result);
\r
117 $result=str_replace(',',' ',$result);
\r
122 function boolean_sql_where($match){
\r
124 $result = $this->marked;
\r
125 $result = preg_replace(
\r
126 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
\r
127 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
\r
130 $result = preg_replace(
\r
131 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
\r
132 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
\r
135 $result = $this->jpmarked; /* for jp */
\r
136 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
\r
140 // there must be a simple way to simply copy a value with backslashes in it through
\r
141 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
\r
142 function copyvalue($foo) {
\r
146 /***********************************************
\r
148 ***********************************************/
\r
150 function boolean_mark_atoms_jp($string){
\r
152 $result=trim($string);
\r
153 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
155 /* convert normal boolean operators to shortened syntax */
\r
156 $result=eregi_replace(' not ',' -',$result);
\r
157 $result=eregi_replace(' and ',' ',$result);
\r
158 $result=eregi_replace(' or ',',',$result);
\r
160 /* strip excessive whitespace */
\r
161 $result=str_replace(', ',',',$result);
\r
162 $result=str_replace(' ,',',',$result);
\r
163 $result=str_replace('- ','-',$result);
\r
164 $result=str_replace('+','',$result);
\r
166 $result=str_replace(',',' ,',$result);
\r
172 function boolean_sql_where_jp_short($string,$match){
\r
173 $match_a = explode(',',$match);
\r
174 $key_a = explode(' ',$string);
\r
176 for($ith=0;$ith<count($match_a);$ith++){
\r
177 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
\r
179 $like = '('.implode(' or ',$temp_a).')';
\r
181 for($kn=1; $kn<count($key_a); $kn++){
\r
182 if(substr($key_a[$kn],0,1) == ","){
\r
183 for($ith=0;$ith<count($match_a);$ith++){
\r
184 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
186 $like .=' OR ('. implode(' or ',$temp_a).')';
\r
187 }elseif(substr($key_a[$kn],0,1) != '-'){
\r
188 for($ith=0;$ith<count($match_a);$ith++){
\r
189 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
\r
191 $like .=' AND ('. implode(' or ',$temp_a).')';
\r
193 for($ith=0;$ith<count($match_a);$ith++){
\r
194 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
196 $like .=' AND ('. implode(' and ',$temp_a).')';
\r
200 $like = '('.$like.')';
\r
204 /***********************************************/
\r
208 function boolean_mark_atoms($string){
\r
209 $result=trim($string);
\r
210 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
212 //convert normal boolean operators to shortened syntax
\r
213 $result=eregi_replace(' not ',' -',$result);
\r
214 $result=eregi_replace(' and ',' ',$result);
\r
215 $result=eregi_replace(' or ',',',$result);
\r
218 //strip excessive whitespace
\r
219 $result=str_replace('( ','(',$result);
\r
220 $result=str_replace(' )',')',$result);
\r
221 $result=str_replace(', ',',',$result);
\r
222 $result=str_replace(' ,',',',$result);
\r
223 $result=str_replace('- ','-',$result);
\r
224 $result=str_replace('+','',$result);
\r
225 // apply arbitrary function to all 'word' atoms
\r
227 $result_a = explode(" ",$result);
\r
228 for($word=0;$word<count($result_a);$word++){
\r
229 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
\r
231 $result = implode(" ",$result_a);
\r
233 // dispatch ' ' to ' AND '
\r
234 $result=str_replace(' ',' AND ',$result);
\r
236 // dispatch ',' to ' OR '
\r
237 $result=str_replace(',',' OR ',$result);
\r
239 // dispatch '-' to ' NOT '
\r
240 $result=str_replace(' -',' NOT ',$result);
\r
244 function boolean_sql_where_short($string,$match){
\r
245 $match_a = explode(',',$match);
\r
246 for($ith=0;$ith<count($match_a);$ith++){
\r
247 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
\r
249 $like = implode(" OR ",$like_a);
\r
253 function boolean_sql_select_short($string,$match){
\r
254 $match_a = explode(',',$match);
\r
255 $score_unit_weight = .2;
\r
256 for($ith=0;$ith<count($match_a);$ith++){
\r
258 " $score_unit_weight*(
\r
259 LENGTH(" . addslashes($match_a[$ith]) . ") -
\r
260 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
\r
261 /LENGTH('" . addslashes($string) . "') ";
\r
263 $score = implode(" + ",$score_a);
\r