4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
\r
5 * Copyright (C) 2003-2004 The Nucleus Group
\r
7 * This program is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation; either version 2
\r
10 * of the License, or (at your option) any later version.
\r
11 * (see nucleus/documentation/index.html#license for more info)
\r
13 * SEARCH(querystring) offers different functionality to create an
\r
14 * SQL query to find certain items. (and comments)
\r
16 * based on code by David Altherr:
\r
17 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
\r
18 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
\r
31 function SEARCH($text) {
\r
33 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
\r
35 /* * * for jp * * * * * * * * * * */
\r
36 $text = $this->zenspace_replace($text);
\r
37 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
\r
39 $this->ascii = '[\x00-\x7F]';
\r
40 $this->two = '[\x8E\xA1-\xFE][\xA1-\xFE]';
\r
41 $this->three = '\x8F[\xA1-\xFE][\xA1-\xFE]';
\r
43 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
\r
44 /* * * * * * * * * * * * * * * * */
\r
46 $this->querystring = $text;
\r
47 // $this->marked = $this->boolean_mark_atoms($text);
\r
48 $this->inclusive = $this->boolean_inclusive_atoms($text);
\r
49 $this->blogs = array();
\r
51 // get all public searchable blogs, no matter what, include the current blog allways.
\r
52 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
\r
53 while ($obj = mysql_fetch_object($res))
\r
54 $this->blogs[] = intval($obj->bnumber);
\r
56 /***********************************************
\r
57 zenkaku space to space
\r
58 ***********************************************/
\r
60 function zenspace_replace($text){
\r
61 $ta = unpack("C*",$text);
\r
64 for($i=1; $i<=$len; $i++){
\r
65 if($ta[$i]>= 0x8e){ // ja
\r
66 if($ta[$i]>0xa0 || $ta[$i]<0x8f){ // 2byte
\r
67 $char = chr($ta[$i]).chr($ta[$i+1]);
\r
68 if($char == "\xA1\xA1") $char = "\x20";
\r
72 $temp .= chr($ta[$i]).chr($ta[$i+1]).chr($ta[$i+2]);
\r
76 $temp .= chr($ta[$i]);
\r
81 /***********************************************/
\r
83 function boolean_sql_select($match){
\r
84 // if (strlen($this->inclusive) > 0) {
\r
85 /* build sql for determining score for each record */
\r
86 /* $result=explode(" ",$this->inclusive);
\r
87 $result = $result[0];
\r
88 for($cth=0;$cth<count($result);$cth++){
\r
89 if(strlen($result[$cth])>=4){
\r
90 $stringsum_long .= " $result[$cth] ";
\r
92 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
\r
95 if(strlen($stringsum_long)>0){
\r
96 $stringsum_long = addslashes($stringsum_long);
\r
97 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
\r
99 $stringsum .= implode("+",$stringsum_a);
\r
105 function boolean_inclusive_atoms($string){
\r
106 $result=trim($string);
\r
107 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
109 /* convert normal boolean operators to shortened syntax */
\r
110 $result=eregi_replace(' not ',' -',$result);
\r
111 $result=eregi_replace(' and ',' ',$result);
\r
112 $result=eregi_replace(' or ',',',$result);
\r
114 /* drop unnecessary spaces */
\r
115 $result=str_replace(' ,',',',$result);
\r
116 $result=str_replace(', ',',',$result);
\r
117 $result=str_replace('- ','-',$result);
\r
118 $result=str_replace('+','',$result);
\r
120 /* strip exlusive atoms */
\r
121 $result=preg_replace(
\r
122 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
\r
123 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
\r
126 $result=preg_replace(
\r
127 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
\r
128 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
\r
131 $result=str_replace('(',' ',$result);
\r
132 $result=str_replace(')',' ',$result);
\r
133 $result=str_replace(',',' ',$result);
\r
138 function boolean_sql_where($match){
\r
139 /* $result = $this->marked;
\r
140 $result = preg_replace(
\r
141 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
\r
142 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
\r
145 $result = preg_replace(
\r
146 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
\r
147 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
\r
149 $result = $this->jpmarked; /* for jp */
\r
150 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
\r
154 // there must be a simple way to simply copy a value with backslashes in it through
\r
155 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
\r
156 function copyvalue($foo) {
\r
160 /***********************************************
\r
162 ***********************************************/
\r
164 function boolean_mark_atoms_jp($string){
\r
166 $result=trim($string);
\r
167 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
169 /* convert normal boolean operators to shortened syntax */
\r
170 $result=eregi_replace(' not ',' -',$result);
\r
171 $result=eregi_replace(' and ',' ',$result);
\r
172 $result=eregi_replace(' or ',',',$result);
\r
174 /* strip excessive whitespace */
\r
175 $result=str_replace(', ',',',$result);
\r
176 $result=str_replace(' ,',',',$result);
\r
177 $result=str_replace('- ','-',$result);
\r
178 $result=str_replace('+','',$result);
\r
180 $result=str_replace(',',' ,',$result);
\r
186 function boolean_sql_where_jp_short($string,$match){
\r
187 $match_a = explode(',',$match);
\r
188 $key_a = explode(' ',$string);
\r
190 for($ith=0;$ith<count($match_a);$ith++){
\r
191 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
\r
193 $like = '('.implode(' or ',$temp_a).')';
\r
195 for($kn=1; $kn<count($key_a); $kn++){
\r
196 if(substr($key_a[$kn],0,1) == ","){
\r
197 for($ith=0;$ith<count($match_a);$ith++){
\r
198 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
200 $like .=' OR ('. implode(' or ',$temp_a).')';
\r
201 }elseif(substr($key_a[$kn],0,1) != '-'){
\r
202 for($ith=0;$ith<count($match_a);$ith++){
\r
203 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
\r
205 $like .=' AND ('. implode(' or ',$temp_a).')';
\r
207 for($ith=0;$ith<count($match_a);$ith++){
\r
208 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
\r
210 $like .=' AND ('. implode(' and ',$temp_a).')';
\r
214 $like = '('.$like.')';
\r
218 /***********************************************/
\r
220 /* function boolean_mark_atoms($string){
\r
221 $result=trim($string);
\r
222 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
224 // convert normal boolean operators to shortened syntax
\r
225 $result=eregi_replace(' not ',' -',$result);
\r
226 $result=eregi_replace(' and ',' ',$result);
\r
227 $result=eregi_replace(' or ',',',$result);
\r
229 // strip excessive whitespace
\r
230 $result=str_replace('( ','(',$result);
\r
231 $result=str_replace(' )',')',$result);
\r
232 $result=str_replace(', ',',',$result);
\r
233 $result=str_replace(' ,',',',$result);
\r
234 $result=str_replace('- ','-',$result);
\r
235 $result=str_replace('+','',$result);
\r
237 // remove double spaces (we might have introduced some new ones above)
\r
238 $result=trim($result);
\r
239 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
\r
241 // apply arbitrary function to all 'word' atoms
\r
243 $result_a = explode(" ",$result);
\r
244 for($word=0;$word<count($result_a);$word++){
\r
245 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
\r
247 $result = implode(" ",$result_a);
\r
249 // dispatch ' ' to ' AND '
\r
250 $result=str_replace(' ',' AND ',$result);
\r
252 // dispatch ',' to ' OR '
\r
253 $result=str_replace(',',' OR ',$result);
\r
255 // dispatch '-' to ' NOT '
\r
256 $result=str_replace(' -',' NOT ',$result);
\r
260 function boolean_sql_where_short($string,$match){
\r
261 $match_a = explode(',',$match);
\r
262 for($ith=0;$ith<count($match_a);$ith++){
\r
263 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
\r
265 $like = implode(" or ",$like_a);
\r
269 function boolean_sql_select_short($string,$match){
\r
270 $match_a = explode(',',$match);
\r
271 $score_unit_weight = .2;
\r
272 for($ith=0;$ith<count($match_a);$ith++){
\r
274 " $score_unit_weight*(
\r
275 LENGTH(" . addslashes($match_a[$ith]) . ") -
\r
276 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
\r
277 /LENGTH('" . addslashes($string) . "') ";
\r
279 $score = implode(" + ",$score_a);
\r