4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
\r
5 * Copyright (C) 2003-2005 The Nucleus Group
\r
7 * This program is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation; either version 2
\r
10 * of the License, or (at your option) any later version.
\r
11 * (see nucleus/documentation/index.html#license for more info)
\r
13 * SEARCH(querystring) offers different functionality to create an
\r
14 * SQL query to find certain items. (and comments)
\r
16 * based on code by David Altherr:
\r
17 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
\r
18 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
\r
21 * $Id: SEARCH.php,v 1.5 2005-03-16 08:10:35 kimitake Exp $
\r
22 $ $NucleusJP: SEARCH.php,v 1.4 2005/03/12 06:19:05 kimitake Exp $
\r
34 function SEARCH($text) {
\r
37 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
\r
39 /* * * for jp * * * * * * * * * * */
\r
40 $text = $this->zenspace_replace($text);
\r
41 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
\r
43 $this->ascii = '[\x00-\x7F]';
\r
44 $this->two = '[\x8E\xA1-\xFE][\xA1-\xFE]';
\r
45 $this->three = '\x8F[\xA1-\xFE][\xA1-\xFE]';
\r
47 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
\r
48 /* * * * * * * * * * * * * * * * */
\r
50 $this->querystring = $text;
\r
51 // $this->marked = $this->boolean_mark_atoms($text);
\r
52 $this->inclusive = $this->boolean_inclusive_atoms($text);
\r
53 $this->blogs = array();
\r
55 // get all public searchable blogs, no matter what, include the current blog allways.
\r
56 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
\r
57 while ($obj = mysql_fetch_object($res))
\r
58 $this->blogs[] = intval($obj->bnumber);
\r
60 /***********************************************
\r
61 zenkaku space to space
\r
62 ***********************************************/
\r
64 function zenspace_replace($text){
\r
65 $ta = unpack("C*",$text);
\r
68 for($i=1; $i<=$len; $i++){
\r
69 if($ta[$i]>= 0x8e){ // ja
\r
70 if($ta[$i]>0xa0 || $ta[$i]<0x8f){ // 2byte
\r
71 $char = chr($ta[$i]).chr($ta[$i+1]);
\r
72 if($char == "\xA1\xA1") $char = "\x20";
\r
76 $temp .= chr($ta[$i]).chr($ta[$i+1]).chr($ta[$i+2]);
\r
80 $temp .= chr($ta[$i]);
\r
85 /***********************************************/
\r
87 function boolean_sql_select($match){
\r
88 // $string = $this->inclusive;
\r
89 // if (strlen($string) > 0) {
\r
90 /* build sql for determining score for each record */
\r
93 "([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
\r
96 $result = $result[0];
\r
97 for($cth=0;$cth<count($result);$cth++){
\r
98 if(strlen($result[$cth])>=4){
\r
99 $stringsum_long .= " $result[$cth] ";
\r
101 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
\r
104 if(strlen($stringsum_long)>0){
\r
105 $stringsum_long = addslashes($stringsum_long);
\r
106 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
\r
108 $stringsum .= implode("+",$stringsum_a);
\r
116 function boolean_inclusive_atoms($string){
\r
117 $result=trim($string);
\r
118 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
120 /* convert normal boolean operators to shortened syntax */
\r
121 $result=eregi_replace(' not ',' -',$result);
\r
122 $result=eregi_replace(' and ',' ',$result);
\r
123 $result=eregi_replace(' or ',',',$result);
\r
125 /* drop unnecessary spaces */
\r
126 $result=str_replace(' ,',',',$result);
\r
127 $result=str_replace(', ',',',$result);
\r
128 $result=str_replace('- ','-',$result);
\r
129 $result=str_replace('+','',$result);
\r
131 /* strip exlusive atoms */
\r
132 $result=preg_replace(
\r
133 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
\r
134 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
\r
138 $result=preg_replace(
\r
139 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
\r
140 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
\r
144 $result=str_replace('(',' ',$result);
\r
145 $result=str_replace(')',' ',$result);
\r
146 $result=str_replace(',',' ',$result);
\r
151 function boolean_sql_where($match){
\r
153 $result = $this->marked;
\r
154 $result = preg_replace(
\r
155 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
\r
156 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
\r
159 $result = preg_replace(
\r
160 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
\r
161 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
\r
164 $result = $this->jpmarked; /* for jp */
\r
165 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
\r
169 // there must be a simple way to simply copy a value with backslashes in it through
\r
170 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
\r
171 function copyvalue($foo) {
\r
175 /***********************************************
\r
177 ***********************************************/
\r
179 function boolean_mark_atoms_jp($string){
\r
181 $result=trim($string);
\r
182 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
184 /* convert normal boolean operators to shortened syntax */
\r
185 $result=eregi_replace(' not ',' -',$result);
\r
186 $result=eregi_replace(' and ',' ',$result);
\r
187 $result=eregi_replace(' or ',',',$result);
\r
189 /* strip excessive whitespace */
\r
190 $result=str_replace(', ',',',$result);
\r
191 $result=str_replace(' ,',',',$result);
\r
192 $result=str_replace('- ','-',$result);
\r
193 $result=str_replace('+','',$result);
\r
195 $result=str_replace(',',' ,',$result);
\r
201 function boolean_sql_where_jp_short($string,$match){
\r
202 $match_a = explode(',',$match);
\r
203 $key_a = explode(' ',$string);
\r
205 for($ith=0;$ith<count($match_a);$ith++){
\r
206 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
\r
208 $like = '('.implode(' or ',$temp_a).')';
\r
210 for($kn=1; $kn<count($key_a); $kn++){
\r
211 if(substr($key_a[$kn],0,1) == ","){
\r
212 for($ith=0;$ith<count($match_a);$ith++){
\r
213 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
215 $like .=' OR ('. implode(' or ',$temp_a).')';
\r
216 }elseif(substr($key_a[$kn],0,1) != '-'){
\r
217 for($ith=0;$ith<count($match_a);$ith++){
\r
218 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
\r
220 $like .=' AND ('. implode(' or ',$temp_a).')';
\r
222 for($ith=0;$ith<count($match_a);$ith++){
\r
223 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
225 $like .=' AND ('. implode(' and ',$temp_a).')';
\r
229 $like = '('.$like.')';
\r
233 /***********************************************/
\r
237 function boolean_mark_atoms($string){
\r
238 $result=trim($string);
\r
239 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
241 //convert normal boolean operators to shortened syntax
\r
242 $result=eregi_replace(' not ',' -',$result);
\r
243 $result=eregi_replace(' and ',' ',$result);
\r
244 $result=eregi_replace(' or ',',',$result);
\r
247 //strip excessive whitespace
\r
248 $result=str_replace('( ','(',$result);
\r
249 $result=str_replace(' )',')',$result);
\r
250 $result=str_replace(', ',',',$result);
\r
251 $result=str_replace(' ,',',',$result);
\r
252 $result=str_replace('- ','-',$result);
\r
253 $result=str_replace('+','',$result);
\r
255 // remove double spaces (we might have introduced some new ones above)
\r
256 $result=trim($result);
\r
257 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
259 // apply arbitrary function to all 'word' atoms
\r
261 $result_a = explode(" ",$result);
\r
262 for($word=0;$word<count($result_a);$word++){
\r
263 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
\r
265 $result = implode(" ",$result_a);
\r
267 // dispatch ' ' to ' AND '
\r
268 $result=str_replace(' ',' AND ',$result);
\r
270 // dispatch ',' to ' OR '
\r
271 $result=str_replace(',',' OR ',$result);
\r
273 // dispatch '-' to ' NOT '
\r
274 $result=str_replace(' -',' NOT ',$result);
\r
278 function boolean_sql_where_short($string,$match){
\r
279 $match_a = explode(',',$match);
\r
280 for($ith=0;$ith<count($match_a);$ith++){
\r
281 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
\r
283 $like = implode(" OR ",$like_a);
\r
287 function boolean_sql_select_short($string,$match){
\r
288 $match_a = explode(',',$match);
\r
289 $score_unit_weight = .2;
\r
290 for($ith=0;$ith<count($match_a);$ith++){
\r
292 " $score_unit_weight*(
\r
293 LENGTH(" . addslashes($match_a[$ith]) . ") -
\r
294 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
\r
295 /LENGTH('" . addslashes($string) . "') ";
\r
297 $score = implode(" + ",$score_a);
\r