OSDN Git Service

# FIXED: atom.phpとxml-rss2.phpで_CHARSETをチェックして文字コード変換(Nicleus3.41 へアップグレード後 xml-rss2...
[nucleus-jp/nucleus-jp-ancient.git] / utf8 / nucleus / libs / SEARCH.php
1 <?php
2
3 /*
4  * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
5  * Copyright (C) 2003-2009 The Nucleus Group
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  * (see nucleus/documentation/index.html#license for more info)
12  */
13 /**
14  * SEARCH(querystring) offers different functionality to create an
15  * SQL query to find certain items. (and comments)
16  *
17  * based on code by David Altherr:
18  * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
19  * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
20  *
21  * @license http://nucleuscms.org/license.txt GNU General Public License
22  * @copyright Copyright (C) 2002-2009 The Nucleus Group
23  * @version $Id$
24  * @version $NucleusJP: SEARCH.php,v 1.7 2006/07/20 08:01:52 kimitake Exp $
25  */
26
27
28
29 class SEARCH {
30
31         var $querystring;
32         var $marked;
33         var $inclusive;
34         var $blogs;
35
36
37         function SEARCH($text) {
38                 global $blogid;
39 //              $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
40
41
42                 /* * * for jp * * * * * * * * * * */
43                 $this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
44                 if ($this->encoding != 'utf-8') {
45                         $text = mb_convert_encoding($text, "UTF-8", $this->encoding);
46                 }
47                 $text = str_replace ("\xE3\x80\x80",' ',$text);
48                 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
49
50                 $this->ascii = '[\x00-\x7F]';
51                 $this->two = '[\xC0-\xDF][\x80-\xBF]';
52                 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
53
54                 $this->jpmarked      = $this->boolean_mark_atoms_jp($text);
55                 /* * * * * * * * * * * * * * * * */
56
57                 $this->querystring      = $text;
58 //              $this->marked           = $this->boolean_mark_atoms($text);
59                 $this->inclusive        = $this->boolean_inclusive_atoms($text);
60                 $this->blogs            = array();
61
62                 // get all public searchable blogs, no matter what, include the current blog allways.
63                 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
64                 while ($obj = mysql_fetch_object($res)) 
65                         $this->blogs[] = intval($obj->bnumber);
66                 }
67
68         function  boolean_sql_select($match){
69                 if (strlen($this->inclusive) > 0) {
70                         /* build sql for determining score for each record */
71                         $result=explode(" ",$this->inclusive);
72                         for($cth=0;$cth<count($result);$cth++){
73                                 if(strlen($result[$cth])>=4){
74                                         $stringsum_long .=  " $result[$cth] ";
75                                 }else{
76                                         $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
77                                 }
78                         }
79
80                         if(strlen($stringsum_long)>0){
81                                 $stringsum_long = addslashes($stringsum_long);
82                                 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
83                         }
84
85                         $stringsum .= implode("+",$stringsum_a);
86
87                         return $stringsum;
88                 }
89         }
90
91         
92
93         function boolean_inclusive_atoms($string){
94                 $result=trim($string);
95                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
96
97                 /* convert normal boolean operators to shortened syntax */
98                 $result=eregi_replace(' not ',' -',$result);
99                 $result=eregi_replace(' and ',' ',$result);
100                 $result=eregi_replace(' or ',',',$result);
101
102                 /* drop unnecessary spaces */
103                 $result=str_replace(' ,',',',$result);
104                 $result=str_replace(', ',',',$result);
105                 $result=str_replace('- ','-',$result);
106                 $result=str_replace('+','',$result);
107
108                 /* strip exlusive atoms */
109                 $result=preg_replace(
110 //                      "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
111                         "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
112                         '',
113                         $result);
114
115 /*              $result=preg_replace(
116 //                      "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
117                         "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
118                         '',
119                         $result);
120 */
121                 $result=str_replace('(',' ',$result);
122                 $result=str_replace(')',' ',$result);
123                 $result=str_replace(',',' ',$result);
124                 if ($this->encoding != 'utf-8') {
125                         $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
126                 }
127                 return $result;
128         }
129
130         function boolean_sql_where($match){
131 /*
132                 $result = $this->marked;
133                 $result = preg_replace(
134                         "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
135                         " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
136
137
138
139
140
141
142                         $result);
143
144                 $result = preg_replace(
145                         "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
146                         " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
147
148
149
150
151
152                         $result);
153 */
154                 $result = $this->jpmarked; /* for jp */
155                 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
156                 if ($this->encoding != 'utf-8') {
157                         $result = mb_convert_encoding($result, $this->encoding, "UTF-8");
158                 }
159                 return $result;
160
161         }
162
163
164
165         // there must be a simple way to simply copy a value with backslashes in it through
166         // the preg_replace, but I cannot currently find it (karma 2003-12-30)
167         function copyvalue($foo) {
168                 return $foo;
169
170
171         }
172
173
174
175 /*
176
177
178
179
180
181
182
183         function boolean_mark_atoms($string){
184                 $result=trim($string);
185                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
186
187                 // convert normal boolean operators to shortened syntax
188                 $result=eregi_replace(' not ',' -',$result);
189                 $result=eregi_replace(' and ',' ',$result);
190                 $result=eregi_replace(' or ',',',$result);
191
192
193                 // strip excessive whitespace
194                 $result=str_replace('( ','(',$result);
195                 $result=str_replace(' )',')',$result);
196                 $result=str_replace(', ',',',$result);
197                 $result=str_replace(' ,',',',$result);
198                 $result=str_replace('- ','-',$result);
199                 $result=str_replace('+','',$result);
200
201                 // remove double spaces (we might have introduced some new ones above)
202                 $result=trim($result);
203                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
204
205                 // apply arbitrary function to all 'word' atoms
206
207                 $result_a = explode(" ",$result);
208                 for($word=0;$word<count($result_a);$word++){
209                         $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
210                 }
211                 $result = implode(" ",$result_a);
212
213                 // dispatch ' ' to ' AND '
214                 $result=str_replace(' ',' AND ',$result);
215
216                 // dispatch ',' to ' OR '
217                 $result=str_replace(',',' OR ',$result);
218
219                 // dispatch '-' to ' NOT '
220                 $result=str_replace(' -',' NOT ',$result);
221                 return $result;
222         }
223
224         function boolean_sql_where_short($string,$match){
225                 $match_a = explode(',',$match);
226                 for($ith=0;$ith<count($match_a);$ith++){
227                         $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
228                 }
229                 $like = implode(" OR ",$like_a);
230
231                 return $like;
232         }
233 */
234
235         function boolean_sql_select_short($string,$match){
236                 $match_a = explode(',',$match);
237                 $score_unit_weight = .2;
238                 for($ith=0;$ith<count($match_a);$ith++){
239                         $score_a[$ith] =
240                                                         " $score_unit_weight*(
241                                                         LENGTH(" . addslashes($match_a[$ith]) . ") -
242                                                         LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
243                                                         /LENGTH('" . addslashes($string) . "') ";
244                 }
245                 $score = implode(" + ",$score_a);
246
247                 return $score;
248         }
249
250 /***********************************************
251         Make "WHERE" (jp)
252 ***********************************************/
253
254         function boolean_mark_atoms_jp($string){
255
256                 $result=trim($string);
257                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
258                 
259                 /* convert normal boolean operators to shortened syntax */
260                 $result=eregi_replace(' not ',' -',$result);
261                 $result=eregi_replace(' and ',' ',$result);
262                 $result=eregi_replace(' or ',',',$result);
263
264                 /* strip excessive whitespace */
265                 $result=str_replace(', ',',',$result);
266                 $result=str_replace(' ,',',',$result);
267                 $result=str_replace('- ','-',$result);
268                 $result=str_replace('+','',$result);
269                 
270                 $result=str_replace(',',' ,',$result);
271
272                 return $result;
273         }
274
275         function boolean_sql_where_jp_short($string,$match){
276                 $match_a = explode(',',$match);
277                         $key_a = explode(' ',$string);
278                         
279                 for($ith=0;$ith<count($match_a);$ith++){
280 //                      $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
281                         $temp_a[$ith] = "(i.$match_a[$ith] LIKE "
282                                                         . preg_match('/[a-zA-Z]/', $key_a[0]) ?
283                                                                 '' :
284                                                                 'BINARY'
285                                                         . " '%" . addslashes($key_a[0]) . "%') ";
286                 }
287                 $like = '('.implode(' or ',$temp_a).')';
288
289                 for($kn=1; $kn<count($key_a); $kn++){
290                         if(substr($key_a[$kn],0,1) == ","){
291                                 for($ith=0;$ith<count($match_a);$ith++){
292 //                                      $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
293                                         $temp_a[$ith] = " (i.$match_a[$ith] LIKE "
294                                                                         . preg_match('/[a-zA-Z]/',$key_a[$kn]) ?
295                                                                                 '' :
296                                                                                 'BINARY'
297                                                                         . " '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
298                                 }
299                                 $like .=' OR ('. implode(' or ',$temp_a).')';
300                         }elseif(substr($key_a[$kn],0,1) != '-'){
301                                 for($ith=0;$ith<count($match_a);$ith++){
302 //                                      $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
303                                         $temp_a[$ith] = " (i.$match_a[$ith] LIKE "
304                                                                   . preg_match('/[a-zA-Z]/', $key_a[$kn]) ?
305                                                                                 '' :
306                                                                                 'BINARY'
307                                                                   . " '%" . addslashes($key_a[$kn]) . "%') ";
308                                 }
309                                 $like .=' AND ('. implode(' or ',$temp_a).')';
310                         }else{
311                                 for($ith=0;$ith<count($match_a);$ith++){
312 //                                      $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
313                                         $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE "
314                                                                   . preg_match('/[a-zA-Z]/', $key_a[$kn]) ?
315                                                                                 '' :
316                                                                                 'BINARY'
317                                                                   . " '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
318                                 }
319                                 $like .=' AND ('. implode(' and ',$temp_a).')';
320                         }
321                 }
322                 
323                 $like = '('.$like.')';
324                 return $like;
325         }
326
327 /***********************************************/
328 }
329 ?>