4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
5 * Copyright (C) 2003-2009 The Nucleus Group
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 * (see nucleus/documentation/index.html#license for more info)
14 * SEARCH(querystring) offers different functionality to create an
15 * SQL query to find certain items. (and comments)
17 * based on code by David Altherr:
18 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
19 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
21 * @license http://nucleuscms.org/license.txt GNU General Public License
22 * @copyright Copyright (C) 2002-2011 The Nucleus Group
24 * @version $NucleusJP: SEARCH.php,v 1.7 2006/07/20 08:01:52 kimitake Exp $
34 function SEARCH($text) {
36 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
37 /* * * for jp * * * * * * * * * * */
38 $this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
39 if ($this->encoding != 'utf-8') {
40 $text = mb_convert_encoding($text, "UTF-8", $this->encoding);
42 $text = str_replace ("\xE3\x80\x80",' ',$text);
43 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
45 $this->ascii = '[\x00-\x7F]';
46 $this->two = '[\xC0-\xDF][\x80-\xBF]';
47 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
49 $this->jpmarked = $this->boolean_mark_atoms_jp($text);
50 /* * * * * * * * * * * * * * * * */
52 $this->querystring = $text;
53 // $this->marked = $this->boolean_mark_atoms($text);
54 $this->inclusive = $this->boolean_inclusive_atoms($text);
55 $this->blogs = array();
57 // get all public searchable blogs, no matter what, include the current blog allways.
58 $res = sql_query('SELECT bnumber FROM ' . sql_table('blog') . ' WHERE bincludesearch=1 ');
59 while ($obj = sql_fetch_object($res)) {
60 $this->blogs[] = intval($obj->bnumber);
64 function boolean_sql_select($match) {
65 if (!isset($stringsum)) {
68 if (strlen($this->inclusive) > 0) {
69 /* build sql for determining score for each record */
70 $result=explode(" ",$this->inclusive);
71 if (!isset($stringsum_long)) {
74 for ($cth = 0; $cth < count($result); $cth++) {
75 if (strlen($result[$cth])>=4) {
76 $stringsum_long .= " $result[$cth] ";
78 $stringsum_a[] = ' ' . $this->boolean_sql_select_short($result[$cth], $match) . ' ';
82 if (strlen($stringsum_long) > 0) {
83 $stringsum_long = sql_real_escape_string($stringsum_long);
84 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
87 $stringsum .= implode("+", $stringsum_a);
95 function boolean_inclusive_atoms($string) {
96 $result = trim($string);
97 $result = preg_replace("#([[:space:]]{2,})#", ' ', $result);
99 # replaced eregi_replace() below with preg_replace(). ereg* functions are deprecated in PHP 5.3.0
100 # just added delimiters to regex and the 'i' for case-insensitive matching
102 /* convert normal boolean operators to shortened syntax */
103 $result = preg_replace('# not #i', ' -', $result);
104 $result = preg_replace('# and #i', ' ', $result);
105 $result = preg_replace('# or #i', ',', $result);
107 /* drop unnecessary spaces */
108 $result = str_replace(' ,', ',', $result);
109 $result = str_replace(', ', ',', $result);
110 $result = str_replace('- ', '-', $result);
111 $result = str_replace('+', '', $result);
113 /* strip exlusive atoms */
114 $result = preg_replace(
115 "#\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\)#",
119 $result = str_replace('(', ' ', $result);
120 $result = str_replace(')', ' ', $result);
121 $result = str_replace(',', ' ', $result);
122 if ($this->encoding != 'utf-8') {
123 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
128 function boolean_sql_where($match) {
130 $result = $this->marked;
131 $result = preg_replace(
132 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
133 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
135 $result = preg_replace(
136 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
137 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
140 $result = $this->jpmarked; /* for jp */
141 $result = $this->boolean_sql_where_jp_short($result, $match);/* for jp */
142 if ($this->encoding != 'utf-8') {
143 $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
148 // there must be a simple way to simply copy a value with backslashes in it through
149 // the preg_replace, but I cannot currently find it (karma 2003-12-30)
150 function copyvalue($foo) {
154 function boolean_mark_atoms($string){
155 $result=trim($string);
156 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
158 # replaced eregi_replace() below with preg_replace(). ereg* functions are deprecated in PHP 5.3.0
159 # just added delimiters to regex and the 'i' for case-insensitive matching
161 $result = preg_replace('# not #i', ' -', $result);
162 $result = preg_replace('# and #i', ' ', $result);
163 $result = preg_replace('# or #i', ',', $result);
165 // strip excessive whitespace
166 $result=str_replace('( ','(',$result);
167 $result=str_replace(' )',')',$result);
168 $result=str_replace(', ',',',$result);
169 $result=str_replace(' ,',',',$result);
170 $result=str_replace('- ','-',$result);
171 $result=str_replace('+','',$result);
173 // remove double spaces (we might have introduced some new ones above)
174 $result=trim($result);
175 $result=preg_replace("#([[:space:]]{2,})#",' ',$result);
177 // apply arbitrary function to all 'word' atoms
179 $result_a = explode(' ',$result);
180 for($word=0;$word<count($result_a);$word++)
182 $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
184 $result = implode(" ",$result_a);
186 // dispatch ' ' to ' AND '
187 $result=str_replace(' ',' AND ',$result);
189 // dispatch ',' to ' OR '
190 $result=str_replace(',',' OR ',$result);
192 // dispatch '-' to ' NOT '
193 $result=str_replace(' -',' NOT ',$result);
197 function boolean_sql_where_short($string,$match){
198 $match_a = explode(',',$match);
199 for($ith=0;$ith<count($match_a);$ith++){
200 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
202 $like = implode(" OR ",$like_a);
208 function boolean_sql_select_short($string, $match) {
209 $match_a = explode(',', $match);
210 $score_unit_weight = .2;
211 for ($ith = 0; $ith< count($match_a); $ith++){
213 " $score_unit_weight*(
214 LENGTH(" . sql_real_escape_string($match_a[$ith]) . ") -
215 LENGTH(REPLACE(LOWER(" . sql_real_escape_string($match_a[$ith]) . "),LOWER('" . sql_real_escape_string($string) . "'),'')))
216 /LENGTH('" . sql_real_escape_string($string) . "') ";
218 $score = implode(" + ", $score_a);
223 /***********************************************
225 ***********************************************/
227 function boolean_mark_atoms_jp($string) {
228 $result = trim($string);
229 $result = preg_replace("/([[:space:]]{2,})/", ' ', $result);
231 /* convert normal boolean operators to shortened syntax */
232 $result = preg_replace('# not #i', ' -', $result);
233 $result = preg_replace('# and #i', ' ', $result);
234 $result = preg_replace('# or #i', ',', $result);
236 /* strip excessive whitespace */
237 $result = str_replace(', ', ',', $result);
238 $result = str_replace(' ,', ',', $result);
239 $result = str_replace('- ', '-', $result);
240 $result = str_replace('+', '', $result);
241 $result = str_replace(',', ' ,', $result);
246 function boolean_sql_where_jp_short($string, $match) {
247 $match_a = explode(',', $match);
248 $key_a = explode(' ', $string);
250 for ($ith=0; $ith<count($match_a); $ith++) {
251 // $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . sql_real_escape_string($key_a[0]) . "%') ";
252 $binKey = preg_match('/[a-zA-Z]/', $key_a[0]) ? '' : 'BINARY';
253 $temp_a[$ith] = "(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[0]) . "%') ";
255 $like = '('.implode(' or ',$temp_a).')';
257 for ($kn = 1; $kn < count($key_a); $kn++) {
258 $binKey = preg_match('/[a-zA-Z]/', $key_a[$kn]) ? '' : 'BINARY';
259 if (substr($key_a[$kn], 0, 1) == ",") {
260 for($ith = 0; $ith < count($match_a); $ith++) {
261 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . sql_real_escape_string(substr($key_a[$kn],1)) . "%') ";
262 $temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
264 $like .=' OR ('. implode(' or ', $temp_a).')';
265 }elseif(substr($key_a[$kn],0,1) != '-'){
266 for($ith=0;$ith<count($match_a);$ith++){
267 // $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . sql_real_escape_string($key_a[$kn]) . "%') ";
268 $temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[$kn]) . "%') ";
270 $like .=' AND ('. implode(' or ', $temp_a).')';
272 for($ith=0;$ith<count($match_a);$ith++){
273 // $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . sql_real_escape_string(substr($key_a[$kn],1)) . "%') ";
274 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
276 $like .=' AND ('. implode(' and ', $temp_a).')';
280 $like = '('.$like.')';