4 * Copyright (c) 2009-2010 project bchan
6 * This software is provided 'as-is', without any express or implied
7 * warranty. In no event will the authors be held liable for any damages
8 * arising from the use of this software.
10 * Permission is granted to anyone to use this software for any purpose,
11 * including commercial applications, and to alter it and redistribute it
12 * freely, subject to the following restrictions:
14 * 1. The origin of this software must not be misrepresented; you must not
15 * claim that you wrote the original software. If you use this software
16 * in a product, an acknowledgment in the product documentation would be
17 * appreciated but is not required.
19 * 2. Altered source versions must be plainly marked as such, and must not be
20 * misrepresented as being the original software.
22 * 3. This notice may not be removed or altered from any source
32 #include <btron/btron.h>
33 #include <btron/bsocket.h>
34 #include <util/zlib.h>
36 #include "retriever.h"
39 #ifdef BCHAN_CONFIG_DEBUG
40 # define DP(arg) printf arg
41 # define DP_ER(msg, err) printf("%s (%d/%x)\n", msg, err>>16, err)
44 # define DP_ER(msg, err) /**/
47 struct datretriever_t_ {
58 LOCAL VOID itoa(UB *str, UW i)
60 UB conv[] = "0123456789";
64 str[j++] = conv[(i/1000000000)%10];
67 str[j++] = conv[(i/100000000)%10];
70 str[j++] = conv[(i/10000000)%10];
73 str[j++] = conv[(i/1000000)%10];
76 str[j++] = conv[(i/100000)%10];
79 str[j++] = conv[(i/10000)%10];
82 str[j++] = conv[(i/1000)%10];
85 str[j++] = conv[(i/100)%10];
88 str[j++] = conv[(i/10)%10];
90 str[j++] = conv[i%10];
94 LOCAL W datretriever_checkcacheheader_LastModified(datretriever_t *retriever, UB **str, W *len)
96 UB *header,*p,*start,*end;
99 datcache_getlatestheader(retriever->cache, &header, &header_len);
100 if (header == NULL) {
103 p = strstr(header, "Last-Modified:");
108 for (i=1; p[i] != '\r'; i++) {
114 end = strchr(start, '\r');
125 LOCAL W datretriever_checkcacheheader_Etag(datretriever_t *retriever, UB **str, W *len)
127 UB *header,*p,*start,*end;
130 datcache_getlatestheader(retriever->cache, &header, &header_len);
131 if (header == NULL) {
134 p = strstr(header, "ETag:");
139 for (i=1; p[i] != '\r'; i++) {
145 end = strchr(start+1, '\"');
151 *len = end - start + 1;
156 EXPORT W datretriever_check_rangerequest(datretriever_t *retriever, UB **last_modified, W *last_modified_len, UB **etag, W *etag_len, W *size)
162 err = datretriever_checkcacheheader_LastModified(retriever, &lm, &lm_len);
166 err = datretriever_checkcacheheader_Etag(retriever, &et, &et_len);
172 *last_modified_len = lm_len;
175 *size = datcache_datasize(retriever->cache);
180 #define HTTP_ERR_SEND_LEN(http, str, len) \
181 err = http_send((http), (str), (len)); \
186 #define HTTP_ERR_SEND(http, str) HTTP_ERR_SEND_LEN((http), (str), strlen((str)))
188 LOCAL W datretriever_http_sendheder(datretriever_t *retriever)
192 HTTP_ERR_SEND(retriever->http, "GET /");
193 HTTP_ERR_SEND(retriever->http, retriever->board);
194 HTTP_ERR_SEND(retriever->http, "/dat/");
195 HTTP_ERR_SEND(retriever->http, retriever->thread);
196 HTTP_ERR_SEND(retriever->http, ".dat HTTP/1.1\r\n");
197 HTTP_ERR_SEND(retriever->http, "Accept-Encoding: gzip\r\n");
198 HTTP_ERR_SEND(retriever->http, "HOST: ");
199 HTTP_ERR_SEND(retriever->http, retriever->server);
200 HTTP_ERR_SEND(retriever->http, "\r\n");
201 HTTP_ERR_SEND(retriever->http, "Accept: */*\r\n");
202 HTTP_ERR_SEND(retriever->http, "Referer: http://");
203 HTTP_ERR_SEND(retriever->http, retriever->server);
204 HTTP_ERR_SEND(retriever->http, "/test/read.cgi/");
205 HTTP_ERR_SEND(retriever->http, retriever->board);
206 HTTP_ERR_SEND(retriever->http, "/");
207 HTTP_ERR_SEND(retriever->http, retriever->thread);
208 HTTP_ERR_SEND(retriever->http, "/\r\n");
209 HTTP_ERR_SEND(retriever->http, "Accept-Language: ja\r\nUser-Agent: Monazilla/1.00 (bchan/0.201)\r\nConnection: close\r\n\r\n");
214 LOCAL W datretriever_http_sendheader_partial(datretriever_t *retriever, UB *etag, W etag_len, UB *lastmodified, W lastmodified_len, W rangebytes)
219 HTTP_ERR_SEND(retriever->http, "GET /");
220 HTTP_ERR_SEND(retriever->http, retriever->board);
221 HTTP_ERR_SEND(retriever->http, "/dat/");
222 HTTP_ERR_SEND(retriever->http, retriever->thread);
223 HTTP_ERR_SEND(retriever->http, ".dat HTTP/1.1\r\n");
224 HTTP_ERR_SEND(retriever->http, "HOST: ");
225 HTTP_ERR_SEND(retriever->http, retriever->server);
226 HTTP_ERR_SEND(retriever->http, "\r\n");
227 HTTP_ERR_SEND(retriever->http, "Accept: */*\r\n");
228 HTTP_ERR_SEND(retriever->http, "Referer: http://");
229 HTTP_ERR_SEND(retriever->http, retriever->server);
230 HTTP_ERR_SEND(retriever->http, "/test/read.cgi/");
231 HTTP_ERR_SEND(retriever->http, retriever->board);
232 HTTP_ERR_SEND(retriever->http, "/");
233 HTTP_ERR_SEND(retriever->http, retriever->thread);
234 HTTP_ERR_SEND(retriever->http, "/\r\n");
236 HTTP_ERR_SEND(retriever->http, "If-Modified-Since: ");
237 HTTP_ERR_SEND_LEN(retriever->http, lastmodified, lastmodified_len);
238 HTTP_ERR_SEND(retriever->http, "\r\n");
239 HTTP_ERR_SEND(retriever->http, "If-None-Match: ");
240 HTTP_ERR_SEND_LEN(retriever->http, etag, etag_len);
241 HTTP_ERR_SEND(retriever->http, "\r\n");
242 HTTP_ERR_SEND(retriever->http, "Range: bytes=");
243 itoa(numstr, rangebytes);
244 HTTP_ERR_SEND(retriever->http, numstr);
245 HTTP_ERR_SEND(retriever->http, "-\r\n");
246 HTTP_ERR_SEND(retriever->http, "\r\n");
248 HTTP_ERR_SEND(retriever->http, "Accept-Language: ja\r\nUser-Agent: Monazilla/1.00 (bchan/0.201)\r\nConnection: close\r\n\r\n");
253 /* from http://www.monazilla.org/index.php?e=198 */
256 GET /[ÈÄ̾]/dat/[¥¹¥ì¥Ã¥ÉÈÖ¹æ].dat HTTP/1.1
257 Accept-Encoding: gzip
260 Referer: http://[¥µ¡¼¥Ð¡¼]/test/read.cgi/[ÈÄ̾]/[¥¹¥ì¥Ã¥ÉÈÖ¹æ]/
262 User-Agent: Monazilla/1.00 (¥Ö¥é¥¦¥¶Ì¾/¥Ð¡¼¥¸¥ç¥ó)
267 EXPORT W datretriever_request(datretriever_t *retriever)
269 W err, ret = -1, len, status, lm_len, et_len, data_len;
271 http_responsecontext_t *ctx;
273 err = http_connect(retriever->http, retriever->server, retriever->server_len);
278 err = datretriever_check_rangerequest(retriever, &lm, &lm_len, &et, &et_len, &data_len);
280 err = datretriever_http_sendheader_partial(retriever, et, et_len, lm, lm_len, data_len - 1);
282 DP_ER("datretriever_http_sendheader_partial:", err);
283 http_close(retriever->http);
286 err = http_waitresponseheader(retriever->http);
288 DP_ER("http_waitresponseheader:", err);
289 http_close(retriever->http);
293 status = http_getstatus(retriever->http);
295 DP_ER("http_getstatus error:", status);
296 http_close(retriever->http);
299 DP(("HTTP/1.1 %d\n", status));
301 ctx = http_startresponseread(retriever->http);
303 DP(("http_startresponseread error\n"));
304 http_close(retriever->http);
305 return -1; /* TODO */
308 err = http_responsecontext_nextdata(ctx, &bin, &len);
310 DP_ER("http_responsecontext_nextdata error", err);
311 http_endresponseread(retriever->http, ctx);
315 if (bin[0] != '\n') {
316 /* todo all reloading. */
317 DP(("todo all reloading\n"));
318 http_endresponseread(retriever->http, ctx);
321 datcache_appenddata(retriever->cache, bin+1, len-1);
325 err = http_responsecontext_nextdata(ctx, &bin, &len);
327 DP_ER("http_responsecontext_nextdata error", err);
328 http_endresponseread(retriever->http, ctx);
335 datcache_appenddata(retriever->cache, bin, len);
338 http_endresponseread(retriever->http, ctx);
340 bin = http_getheader(retriever->http);
341 len = http_getheaderlength(retriever->http);
342 err = datcache_updatelatestheader(retriever->cache, bin, len);
344 DP_ER("datcache_updatelatestheader error", err);
348 DP(("dat append\n"));
349 ret = DATRETRIEVER_REQUEST_PARTIAL_CONTENT;
350 } else if (status == 304) {
351 DP(("not modified\n"));
352 http_close(retriever->http);
353 ret = DATRETRIEVER_REQUEST_NOT_MODIFIED;
354 } else if (status == 203) {
355 DP(("non-authoritative\n"));
356 http_close(retriever->http);
357 ret = DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
358 } else if (status == 404) { /* for vip2ch.com */
360 http_close(retriever->http);
361 ret = DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
362 } else if (status == 416) {
363 /* todo all reloading. */
364 http_close(retriever->http);
365 ret = DATRETRIEVER_REQUEST_ALLRELOAD;
367 DP(("another status = %d\n", status));
368 http_close(retriever->http);
371 err = datretriever_http_sendheder(retriever);
373 http_close(retriever->http);
377 err = http_waitresponseheader(retriever->http);
379 http_close(retriever->http);
383 status = http_getstatus(retriever->http);
385 http_close(retriever->http);
388 DP(("HTTP/1.1 %d\n", status));
390 http_close(retriever->http);
391 DP(("non-authoritative\n"));
392 return DATRETRIEVER_REQUEST_NON_AUTHORITATIVE;
393 } else if (status == 302) {
394 /* TODO: check Location header */
395 /* Location:http://qb6.2ch.net/_403/ */
396 /* Location:http://www2.2ch.net/403/ */
397 http_close(retriever->http);
399 return DATRETRIEVER_REQUEST_NOT_FOUND;
400 } else if (status == 404) {
401 http_close(retriever->http);
403 return DATRETRIEVER_REQUEST_NOT_FOUND;
404 } else if (status != 200) {
405 DP(("another status = %d\n", status));
406 http_close(retriever->http);
407 return DATRETRIEVER_REQUEST_UNEXPECTED;
410 ctx = http_startresponseread(retriever->http);
412 return -1; /* TODO */
415 datcache_cleardata(retriever->cache);
418 err = http_responsecontext_nextdata(ctx, &bin, &len);
420 DP_ER("error http_responsecontext_nextdata", err);
421 http_endresponseread(retriever->http, ctx);
428 datcache_appenddata(retriever->cache, bin, len);
431 http_endresponseread(retriever->http, ctx);
433 bin = http_getheader(retriever->http);
434 len = http_getheaderlength(retriever->http);
435 err = datcache_updatelatestheader(retriever->cache, bin, len);
440 ret = DATRETRIEVER_REQUEST_ALLRELOAD;
446 EXPORT Bool datretriever_isenablenetwork(datretriever_t *retriever)
448 if ((retriever->server != NULL)
449 &&(retriever->board != NULL)
450 &&(retriever->thread != NULL)) {
456 #ifdef BCHAN_CONFIG_DEBUG
457 EXPORT VOID DATRETRIEVER_DP(datretriever_t *retriever)
461 header = http_getheader(retriever->http);
462 if (header == NULL) {
466 printf("%s\n\n", header);
470 err = datretriever_checkcacheheader_LastModified(retriever, &str, &len);
472 printf("Last-Modified: ");
473 for (i=0;i<len;i++) {
482 err = datretriever_checkcacheheader_Etag(retriever, &str, &len);
485 for (i=0;i<len;i++) {
497 LOCAL W datretriever_new_prepareinfo(datretriever_t *retriever, datcache_t *cache)
502 datcache_gethost(cache, &str, &len);
507 retriever->server = malloc(sizeof(UB)*(len+1));
508 if (retriever->server == NULL) {
512 memcpy(retriever->server, str, len);
513 retriever->server[len] = '\0';
514 retriever->server_len = len;
516 datcache_getborad(cache, &str, &len);
521 retriever->board = malloc(sizeof(UB)*(len+1));
522 if (retriever->board == NULL) {
526 memcpy(retriever->board, str, len);
527 retriever->board[len] = '\0';
528 retriever->board_len = len;
530 datcache_getthread(cache, &str, &len);
535 retriever->thread = malloc(sizeof(UB)*(len+1));
536 if (retriever->thread == NULL) {
540 memcpy(retriever->thread, str, len);
541 retriever->thread[len] = '\0';
542 retriever->thread_len = len;
547 free(retriever->board);
549 free(retriever->server);
551 retriever->thread = NULL;
552 retriever->board = NULL;
553 retriever->server = NULL;
557 EXPORT datretriever_t* datretriever_new(datcache_t *cache)
559 datretriever_t *retriever;
562 retriever = malloc(sizeof(datretriever_t));
563 if (retriever == NULL) {
566 retriever->cache = cache;
568 retriever->http = http_new();
569 if (retriever->http == NULL) {
573 err = datretriever_new_prepareinfo(retriever, cache);
581 http_delete(retriever->http);
588 EXPORT VOID datretriever_delete(datretriever_t *retriever)
590 if (retriever->http != NULL) {
591 http_delete(retriever->http);
594 if (retriever->thread != NULL) {
595 free(retriever->thread);
597 if (retriever->board != NULL) {
598 free(retriever->board);
600 if (retriever->server != NULL) {
601 free(retriever->server);