OSDN Git Service

fix copyright year.
[bbk/bchan.git] / src / retriever.c
1 /*
2  * retriever.c
3  *
4  * Copyright (c) 2009-2010 project bchan
5  *
6  * This software is provided 'as-is', without any express or implied
7  * warranty. In no event will the authors be held liable for any damages
8  * arising from the use of this software.
9  *
10  * Permission is granted to anyone to use this software for any purpose,
11  * including commercial applications, and to alter it and redistribute it
12  * freely, subject to the following restrictions:
13  *
14  * 1. The origin of this software must not be misrepresented; you must not
15  *    claim that you wrote the original software. If you use this software
16  *    in a product, an acknowledgment in the product documentation would be
17  *    appreciated but is not required.
18  *
19  * 2. Altered source versions must be plainly marked as such, and must not be
20  *    misrepresented as being the original software.
21  *
22  * 3. This notice may not be removed or altered from any source
23  *    distribution.
24  *
25  */
26
27 #include        <basic.h>
28 #include        <bstdlib.h>
29 #include        <bstdio.h>
30 #include        <bstring.h>
31 #include        <errcode.h>
32 #include        <btron/btron.h>
33 #include        <btron/bsocket.h>
34 #include        <util/zlib.h>
35
36 #include    "retriever.h"
37 #include    "http.h"
38
39 #ifdef BCHAN_CONFIG_DEBUG
40 # define DP(arg) printf arg
41 # define DP_ER(msg, err) printf("%s (%d/%x)\n", msg, err>>16, err)
42 #else
43 # define DP(arg) /**/
44 # define DP_ER(msg, err) /**/
45 #endif
46
47 struct datretriever_t_ {
48         datcache_t *cache;
49         B *server;
50         B *board;
51         B *thread;
52         W server_len;
53         W board_len;
54         W thread_len;
55         http_t *http;
56 };
57
58 LOCAL VOID itoa(UB *str, UW i)
59 {
60         UB conv[] = "0123456789";
61         W j=0;
62
63         if (i > 1000000000) {
64                 str[j++] = conv[(i/1000000000)%10];
65         }
66         if (i > 100000000) {
67                 str[j++] = conv[(i/100000000)%10];
68         }
69         if (i > 10000000) {
70                 str[j++] = conv[(i/10000000)%10];
71         }
72         if (i > 1000000) {
73                 str[j++] = conv[(i/1000000)%10];
74         }
75         if (i > 100000) {
76                 str[j++] = conv[(i/100000)%10];
77         }
78         if (i > 10000) {
79                 str[j++] = conv[(i/10000)%10];
80         }
81         if (i > 1000) {
82                 str[j++] = conv[(i/1000)%10];
83         }
84         if (i > 100) {
85                 str[j++] = conv[(i/100)%10];
86         }
87         if (i > 10) {
88                 str[j++] = conv[(i/10)%10];
89         }
90         str[j++] = conv[i%10];
91         str[j++] = '\0';
92 }
93
94 LOCAL W datretriever_checkcacheheader_LastModified(datretriever_t *retriever, UB **str, W *len)
95 {
96         UB *header,*p,*start,*end;
97         W header_len,i;
98
99         datcache_getlatestheader(retriever->cache, &header, &header_len);
100         if (header == NULL) {
101                 return -1;
102         }
103         p = strstr(header, "Last-Modified:");
104         if (p == NULL) {
105                 return -1;
106         }
107         p = strchr(p, ':');
108         for (i=1; p[i] != '\r'; i++) {
109                 if (p[i] != ' ') {
110                         break;
111                 }
112         }
113         start = p+i;
114         end = strchr(start, '\r');
115         if (end == NULL) {
116                 return -1;
117         }
118
119         *str = start;
120         *len = end - start;
121
122         return 0;
123 }
124
125 LOCAL W datretriever_checkcacheheader_Etag(datretriever_t *retriever, UB **str, W *len)
126 {
127         UB *header,*p,*start,*end;
128         W header_len,i;
129
130         datcache_getlatestheader(retriever->cache, &header, &header_len);
131         if (header == NULL) {
132                 return -1;
133         }
134         p = strstr(header, "ETag:");
135         if (p == NULL) {
136                 return -1;
137         }
138         p = strchr(p, ':');
139         for (i=1; p[i] != '\r'; i++) {
140                 if (p[i] != ' ') {
141                         break;
142                 }
143         }
144         start = p+i;
145         end = strchr(start+1, '\"');
146         if (end == NULL) {
147                 return -1;
148         }
149
150         *str = start;
151         *len = end - start + 1;
152
153         return 0;
154 }
155
156 EXPORT W datretriever_check_rangerequest(datretriever_t *retriever, UB **last_modified, W *last_modified_len, UB **etag, W *etag_len, W *size)
157 {
158         W err;
159         W lm_len,et_len;
160         UB *lm, *et;
161
162         err = datretriever_checkcacheheader_LastModified(retriever, &lm, &lm_len);
163         if (err < 0) {
164                 return err;
165         }
166         err = datretriever_checkcacheheader_Etag(retriever, &et, &et_len);
167         if (err < 0) {
168                 return err;
169         }
170
171         *last_modified = lm;
172         *last_modified_len = lm_len;
173         *etag = et;
174         *etag_len = et_len;
175         *size = datcache_datasize(retriever->cache);
176
177         return 0;
178 }
179
180 #define HTTP_ERR_SEND_LEN(http, str, len) \
181    err = http_send((http), (str), (len)); \
182    if(err < 0){ \
183      return err; \
184    }
185
186 #define HTTP_ERR_SEND(http, str) HTTP_ERR_SEND_LEN((http), (str), strlen((str)))
187
188 LOCAL W datretriever_http_sendheder(datretriever_t *retriever)
189 {
190         W err;
191
192         HTTP_ERR_SEND(retriever->http, "GET /");
193         HTTP_ERR_SEND(retriever->http, retriever->board);
194         HTTP_ERR_SEND(retriever->http, "/dat/");
195         HTTP_ERR_SEND(retriever->http, retriever->thread);
196         HTTP_ERR_SEND(retriever->http, ".dat HTTP/1.1\r\n");
197         HTTP_ERR_SEND(retriever->http, "Accept-Encoding: gzip\r\n");
198         HTTP_ERR_SEND(retriever->http, "HOST: ");
199         HTTP_ERR_SEND(retriever->http, retriever->server);
200         HTTP_ERR_SEND(retriever->http, "\r\n");
201         HTTP_ERR_SEND(retriever->http, "Accept: */*\r\n");
202         HTTP_ERR_SEND(retriever->http, "Referer: http://");
203         HTTP_ERR_SEND(retriever->http, retriever->server);
204         HTTP_ERR_SEND(retriever->http, "/test/read.cgi/");
205         HTTP_ERR_SEND(retriever->http, retriever->board);
206         HTTP_ERR_SEND(retriever->http, "/");
207         HTTP_ERR_SEND(retriever->http, retriever->thread);
208         HTTP_ERR_SEND(retriever->http, "/\r\n");
209         HTTP_ERR_SEND(retriever->http, "Accept-Language: ja\r\nUser-Agent: Monazilla/1.00 (bchan/0.201)\r\nConnection: close\r\n\r\n");
210
211         return 0;
212 }
213
214 LOCAL W datretriever_http_sendheader_partial(datretriever_t *retriever, UB *etag, W etag_len, UB *lastmodified, W lastmodified_len, W rangebytes)
215 {
216         W err;
217         UB numstr[11];
218
219         HTTP_ERR_SEND(retriever->http, "GET /");
220         HTTP_ERR_SEND(retriever->http, retriever->board);
221         HTTP_ERR_SEND(retriever->http, "/dat/");
222         HTTP_ERR_SEND(retriever->http, retriever->thread);
223         HTTP_ERR_SEND(retriever->http, ".dat HTTP/1.1\r\n");
224         HTTP_ERR_SEND(retriever->http, "HOST: ");
225         HTTP_ERR_SEND(retriever->http, retriever->server);
226         HTTP_ERR_SEND(retriever->http, "\r\n");
227         HTTP_ERR_SEND(retriever->http, "Accept: */*\r\n");
228         HTTP_ERR_SEND(retriever->http, "Referer: http://");
229         HTTP_ERR_SEND(retriever->http, retriever->server);
230         HTTP_ERR_SEND(retriever->http, "/test/read.cgi/");
231         HTTP_ERR_SEND(retriever->http, retriever->board);
232         HTTP_ERR_SEND(retriever->http, "/");
233         HTTP_ERR_SEND(retriever->http, retriever->thread);
234         HTTP_ERR_SEND(retriever->http, "/\r\n");
235
236         HTTP_ERR_SEND(retriever->http, "If-Modified-Since: ");
237         HTTP_ERR_SEND_LEN(retriever->http, lastmodified, lastmodified_len);
238         HTTP_ERR_SEND(retriever->http, "\r\n");
239         HTTP_ERR_SEND(retriever->http, "If-None-Match: ");
240         HTTP_ERR_SEND_LEN(retriever->http, etag, etag_len);
241         HTTP_ERR_SEND(retriever->http, "\r\n");
242         HTTP_ERR_SEND(retriever->http, "Range: bytes=");
243         itoa(numstr, rangebytes);
244         HTTP_ERR_SEND(retriever->http, numstr);
245         HTTP_ERR_SEND(retriever->http, "-\r\n");
246         HTTP_ERR_SEND(retriever->http, "\r\n");
247
248         HTTP_ERR_SEND(retriever->http, "Accept-Language: ja\r\nUser-Agent: Monazilla/1.00 (bchan/0.201)\r\nConnection: close\r\n\r\n");
249
250         return 0;
251 }
252
253 /* from http://www.monazilla.org/index.php?e=198 */
254 #if 0
255 "
256 GET /[ÈÄ̾]/dat/[¥¹¥ì¥Ã¥ÉÈÖ¹æ].dat HTTP/1.1
257 Accept-Encoding: gzip
258 Host: [¥µ¡¼¥Ð¡¼]
259 Accept: */*
260 Referer: http://[¥µ¡¼¥Ð¡¼]/test/read.cgi/[ÈÄ̾]/[¥¹¥ì¥Ã¥ÉÈÖ¹æ]/
261 Accept-Language: ja
262 User-Agent: Monazilla/1.00 (¥Ö¥é¥¦¥¶Ì¾/¥Ð¡¼¥¸¥ç¥ó)
263 Connection: close
264 "
265 #endif
266
267 EXPORT W datretriever_request(datretriever_t *retriever)
268 {
269         W err, ret = -1, len, status, lm_len, et_len, data_len;
270         UB *bin, *lm, *et;
271         http_responsecontext_t *ctx;
272
273         err = http_connect(retriever->http, retriever->server, retriever->server_len);
274         if (err < 0) {
275                 return err;
276         }
277
278         err = datretriever_check_rangerequest(retriever, &lm, &lm_len, &et, &et_len, &data_len);
279         if (err >= 0) {
280                 err = datretriever_http_sendheader_partial(retriever, et, et_len, lm, lm_len, data_len - 1);
281                 if (err < 0) {
282                         DP_ER("datretriever_http_sendheader_partial:", err);
283                         http_close(retriever->http);
284                         return err;
285                 }
286                 err = http_waitresponseheader(retriever->http);
287                 if (err < 0) {
288                         DP_ER("http_waitresponseheader:", err);
289                         http_close(retriever->http);
290                         return err;
291                 }
292
293                 status = http_getstatus(retriever->http);
294                 if (status < 0) {
295                         DP_ER("http_getstatus error:", status);
296                         http_close(retriever->http);
297                         return status;
298                 }
299                 DP(("HTTP/1.1 %d\n", status));
300                 if (status == 206) {
301                         ctx = http_startresponseread(retriever->http);
302                         if (ctx == NULL) {
303                                 DP(("http_startresponseread error\n"));
304                                 http_close(retriever->http);
305                                 return -1; /* TODO */
306                         }
307
308                         err = http_responsecontext_nextdata(ctx, &bin, &len);
309                         if (err < 0) {
310                                 DP_ER("http_responsecontext_nextdata error", err);
311                                 http_endresponseread(retriever->http, ctx);
312                                 return -1;
313                         }
314                         if (bin != NULL) {
315                                 if (bin[0] != '\n') {
316                                         /* todo all reloading. */
317                                         DP(("todo all reloading\n"));
318                                         http_endresponseread(retriever->http, ctx);
319                                         return -1;
320                                 }
321                                 datcache_appenddata(retriever->cache, bin+1, len-1);
322                         }
323
324                         for (;;) {
325                                 err = http_responsecontext_nextdata(ctx, &bin, &len);
326                                 if (err < 0) {
327                                         DP_ER("http_responsecontext_nextdata error", err);
328                                         http_endresponseread(retriever->http, ctx);
329                                         return -1;
330                                 }
331                                 if (bin == NULL) {
332                                         break;
333                                 }
334
335                                 datcache_appenddata(retriever->cache, bin, len);
336                         }
337
338                         http_endresponseread(retriever->http, ctx);
339
340                         bin = http_getheader(retriever->http);
341                         len = http_getheaderlength(retriever->http);
342                         err = datcache_updatelatestheader(retriever->cache, bin, len);
343                         if (err < 0) {
344                                 DP_ER("datcache_updatelatestheader error", err);
345                                 return err;
346                         }
347
348                         DP(("dat append\n"));
349                         ret = DATRETRIEVER_REQUEST_PARTIAL_CONTENT;
350                 } else if (status == 304) {
351                         DP(("not modified\n"));
352                         http_close(retriever->http);
353                         ret = DATRETRIEVER_REQUEST_NOT_MODIFIED;
354                 } else if (status == 203) {
355                         DP(("non-authoritative\n"));
356                         http_close(retriever->http);
357                         ret = DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
358                 } else if (status == 404) { /* for vip2ch.com */
359                         DP(("not found\n"));
360                         http_close(retriever->http);
361                         ret = DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
362                 } else if (status == 416) {
363                         /* todo all reloading. */
364                         http_close(retriever->http);
365                         ret = DATRETRIEVER_REQUEST_ALLRELOAD;
366                 } else {
367                         DP(("another status = %d\n", status));
368                         http_close(retriever->http);
369                 }
370         } else {
371                 err = datretriever_http_sendheder(retriever);
372                 if (err < 0) {
373                         http_close(retriever->http);
374                         return err;
375                 }
376
377                 err = http_waitresponseheader(retriever->http);
378                 if (err < 0) {
379                         http_close(retriever->http);
380                         return err;
381                 }
382
383                 status = http_getstatus(retriever->http);
384                 if (status < 0) {
385                         http_close(retriever->http);
386                         return status;
387                 }
388                 DP(("HTTP/1.1 %d\n", status));
389                 if (status == 203) {
390                         http_close(retriever->http);
391                         DP(("non-authoritative\n"));
392                         return DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
393                 } else if (status == 302) {
394                         /* TODO: check Location header */
395                         /*  Location:http://qb6.2ch.net/_403/ */
396                         /*  Location:http://www2.2ch.net/403/ */
397                         http_close(retriever->http);
398                         DP(("Found\n"));
399                         return DATRETRIEVER_REQUEST_NOT_FOUND;
400                 } else if (status == 404) {
401                         http_close(retriever->http);
402                         DP(("not-found\n"));
403                         return DATRETRIEVER_REQUEST_NOT_FOUND;
404                 } else if (status != 200) {
405                         DP(("another status = %d\n", status));
406                         http_close(retriever->http);
407                         return DATRETRIEVER_REQUEST_UNEXPECTED;
408                 }
409
410                 ctx = http_startresponseread(retriever->http);
411                 if (ctx == NULL) {
412                         return -1; /* TODO */
413                 }
414
415                 datcache_cleardata(retriever->cache);
416
417                 for (;;) {
418                         err = http_responsecontext_nextdata(ctx, &bin, &len);
419                         if (err < 0) {
420                                 DP_ER("error http_responsecontext_nextdata", err);
421                                 http_endresponseread(retriever->http, ctx);
422                                 return -1;
423                         }
424                         if (bin == NULL) {
425                                 break;
426                         }
427
428                         datcache_appenddata(retriever->cache, bin, len);
429                 }
430
431                 http_endresponseread(retriever->http, ctx);
432
433                 bin = http_getheader(retriever->http);
434                 len = http_getheaderlength(retriever->http);
435                 err = datcache_updatelatestheader(retriever->cache, bin, len);
436                 if (err < 0) {
437                         return err;
438                 }
439
440                 ret = DATRETRIEVER_REQUEST_ALLRELOAD;
441         }
442
443         return ret;
444 }
445
446 EXPORT Bool datretriever_isenablenetwork(datretriever_t *retriever)
447 {
448         if ((retriever->server != NULL)
449                 &&(retriever->board != NULL)
450                 &&(retriever->thread != NULL)) {
451                 return True;
452         }
453         return False;
454 }
455
456 #ifdef BCHAN_CONFIG_DEBUG
457 EXPORT VOID DATRETRIEVER_DP(datretriever_t *retriever)
458 {
459         UB *header;
460
461         header = http_getheader(retriever->http);
462         if (header == NULL) {
463                 return;
464         }
465
466         printf("%s\n\n", header);
467         {
468                 UB *str;
469                 W i,len,err;
470                 err = datretriever_checkcacheheader_LastModified(retriever, &str, &len);
471                 if (err >= 0) {
472                         printf("Last-Modified: ");
473                         for (i=0;i<len;i++) {
474                                 putchar(str[i]);
475                         }
476                         printf("\n");
477                 }
478         }
479         {
480                 UB *str;
481                 W i,len,err;
482                 err = datretriever_checkcacheheader_Etag(retriever, &str, &len);
483                 if (err >= 0) {
484                         printf("Etag: ");
485                         for (i=0;i<len;i++) {
486                                 putchar(str[i]);
487                         }
488                         printf("\n");
489                 }
490         }
491
492         printf("\n");
493         printf("\n");
494 }
495 #endif
496
497 LOCAL W datretriever_new_prepareinfo(datretriever_t *retriever, datcache_t *cache)
498 {
499         UB *str;
500         W len, ret = -1;
501
502         datcache_gethost(cache, &str, &len);
503         if (str == NULL) {
504                 ret = 0;
505                 goto error_server;
506         }
507         retriever->server = malloc(sizeof(UB)*(len+1));
508         if (retriever->server == NULL) {
509                 ret = -1; /* TODO */
510                 goto error_server;
511         }
512         memcpy(retriever->server, str, len);
513         retriever->server[len] = '\0';
514         retriever->server_len = len;
515
516         datcache_getborad(cache, &str, &len);
517         if (str == NULL) {
518                 ret = 0;
519                 goto error_board;
520         }
521         retriever->board = malloc(sizeof(UB)*(len+1));
522         if (retriever->board == NULL) {
523                 ret = -1; /* TODO */
524                 goto error_board;
525         }
526         memcpy(retriever->board, str, len);
527         retriever->board[len] = '\0';
528         retriever->board_len = len;
529
530         datcache_getthread(cache, &str, &len);
531         if (str == NULL) {
532                 ret = 0;
533                 goto error_thread;
534         }
535         retriever->thread = malloc(sizeof(UB)*(len+1));
536         if (retriever->thread == NULL) {
537                 ret = -1; /* TODO */
538                 goto error_thread;
539         }
540         memcpy(retriever->thread, str, len);
541         retriever->thread[len] = '\0';
542         retriever->thread_len = len;
543
544         return 1;
545
546 error_thread:
547         free(retriever->board);
548 error_board:
549         free(retriever->server);
550 error_server:
551         retriever->thread = NULL;
552         retriever->board = NULL;
553         retriever->server = NULL;
554         return ret;
555 }
556
557 EXPORT datretriever_t* datretriever_new(datcache_t *cache)
558 {
559         datretriever_t *retriever;
560         W err;
561
562         retriever = malloc(sizeof(datretriever_t));
563         if (retriever == NULL) {
564                 goto error_fetch;
565         }
566         retriever->cache = cache;
567
568         retriever->http = http_new();
569         if (retriever->http == NULL) {
570                 goto error_http;
571         }
572
573         err = datretriever_new_prepareinfo(retriever, cache);
574         if (err < 0) {
575                 goto error_info;
576         }
577
578         return retriever;
579
580 error_info:
581         http_delete(retriever->http);
582 error_http:
583         free(retriever);
584 error_fetch:
585         return NULL;
586 }
587
588 EXPORT VOID datretriever_delete(datretriever_t *retriever)
589 {
590         if (retriever->http != NULL) {
591                 http_delete(retriever->http);
592         }
593
594         if (retriever->thread != NULL) {
595                 free(retriever->thread);
596         }
597         if (retriever->board != NULL) {
598                 free(retriever->board);
599         }
600         if (retriever->server != NULL) {
601                 free(retriever->server);
602         }
603         free(retriever);
604 }