2 # -*- coding: euc-jp -*-
5 # Convert ~/howm/ to HTML or other formats.
6 # Only RD format is supported unless you will give me patches. :p
11 name = File::basename $0
13 #{name}: howm ¥á¥â¤ò¥Õ¥©¡¼¥Þ¥Ã¥È
14 ¡¦¤ä¤Ð¤¤Ê¸»ú¤ò¥¨¥¹¥±¡¼¥×
15 ¡¦¥¡¼¥ï¡¼¥É¤ò¥ê¥ó¥¯¤ËÊÑ´¹
16 ¡¦¥Ø¥Ã¥À¤È¥Õ¥Ã¥¿¤ò¤Ä¤±¤ë
18 #{name} ~/howm/ ~/converted/
19 #{name} -type=rd ~/howm/ ~/converted/
20 ls ~/howm/*/*/*7-*.txt | #{name} -list ~/converted/
21 grep -rl '¤Û¤²' ~/howm/ | #{name} -list ~/converted/
23 -type=rd ¤Þ¤¿¤Ï -t=rd ¢ª ¥Õ¥©¡¼¥Þ¥Ã¥È¤Î¼ïÎà¤ò»ØÄê
25 rd ¡Ä see http://www2.pos.to/~tosh/ruby/rdtool/ja/
26 rdbody ¡Ä rd ¤Î =begin ¤È =end ¤¬¤Ê¤¤¤â¤Î¤òÆþÎÏ. ¤³¤ì¤é¤òÊä¤Ã¤ÆÀ¸À®.
27 -list ¢ª ¥á¥â¥Õ¥¡¥¤¥ë¤Î¥ê¥¹¥È¤òɸ½àÆþÎϤ«¤éÆɤà
28 -exclude='^[.]|CVS' ¢ª Âоݳ°¤Î¥Õ¥¡¥¤¥ë¤òÀµµ¬É½¸½¤Ç»ØÄê
29 -r ¢ª ¥á¥â°ìÍ÷¤ò¿·¤·¤¤½ç¤Ëʤ٤ë
30 -i ¢ª <<< ¤ÇÂçʸ»ú¾®Ê¸»ú¤ò¶èÊ̤·¤Ê¤¤
31 -title='Index' ¢ª index ¥Ú¡¼¥¸¤ÎÂê̾
32 -silent ¤Þ¤¿¤Ï -s ¢ª ¿ÊĽɽ¼¨¤ò¤·¤Ê¤¤
33 -goto='>>>' ¢ª goto link ¤Î½ñ¼°
34 -comefrom='<<<' ¢ª come-from link ¤Î½ñ¼°
35 -no_alias ¢ª come-from ¥¡¼¥ï¡¼¥É¤Î alias ¤ò̵»ë
36 -help ¤Þ¤¿¤Ï -h ¢ª ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨
37 (-debug ¢ª ¥Ç¥Ð¥Ã¥°ÍѽÐÎÏ)
41 argv_len = $list ? 1 : 2
42 if ($help || $h || ARGV.length != argv_len)
47 #####################################
49 $type ||= $t || 'html'
50 $exclude ||= "^[.\#]|CVS|~$"
57 $url_regexp = %r!((http|file)://\S+)!
59 def come_go_match(str)
61 when /#$comefrom|#$goto/
64 while s =~ /((#$comefrom|#$goto) *(.+?)) *($|(#$comefrom|#$goto).*)/
66 type = ($2 == $comefrom) ? :comefrom : :goto
69 r.push [type, key, raw]
85 #####################################
87 def empty(); lambda{|*dummy| ""}; end
88 def constant(str); lambda{|*dummy| str}; end
89 def appender(str); lambda{|x| x + str}; end
90 def no_change(); lambda{|*x| x[0]}; end
92 $formatter = Hash::new
94 $formatter['html'] = {
95 :escaper => lambda{|str| CGI::escapeHTML str},
96 :unescaper => lambda{|str| CGI::unescapeHTML str},
98 :namer => appender('.b.html'),
99 :header => lambda{|file|
100 %!<HTML><TITLE>#{file}</TITLE><BODY><H1>#{file}</A></H1><HR><PRE>\n!
102 :come_tag => lambda{|a|
104 %!<A NAME="#{a[:occur][0][:anch]}"></A><A HREF="#{a[:rpath]}\##{a[:anch_n]}" NAME="#{a[:anch]}">#{a[:orig]}</A>!
106 :come_jump => lambda{|a|
107 %!<A HREF="#{a[:path]}\##{a[:anch]}">#{a[:orig]}</A>!
109 :come_anchor => lambda{|a|
110 %!<A NAME="#{a[:occur][0][:anch]}"></A>!
112 :go_tag => lambda{|a|
113 %!<A NAME="#{a[:occur][0][:anch]}"></A><A HREF="#{a[:rpath]}\##{a[:anch]}" NAME="#{a[:anch]}">#{a[:orig]}</A>!
115 :go_anchor => lambda{|a|
116 %!<A NAME="#{a[:occur][0][:anch]}"></A>!
117 # %!<A NAME="#{a[:occur][0][:anch]}">#{a[:key]}</A>!
120 %!<A HREF="#{a[:url]}">#{CGI::unescapeHTML a[:url]}</A>!
122 :footer => lambda{|file|
123 %!</PRE><HR><A HREF="#{to_index file}">index</A></BODY></HTML>\n!
126 :ref_namer => appender('.r.html'),
127 :ref_header => lambda{|file|
128 "<HTML><TITLE>#{file}</TITLE><BODY><H1>References: #{file}</H1>\n"
130 :ref_itemer => lambda{|a|
132 url = go ? "file://#{a[:goto_file]}" : "#{a[:path]}\##{a[:anch]}"
134 %!<A HREF="#{url}" NAME="#{a[:anch]}"><H2>#{a[:key]} (#{ocs.length})</H2></A>\n<OL>\n! +
136 %!<LI><A HREF="#{oc[:path]}\##{oc[:anch]}">#{oc[:file]}</A> #{oc[:text]}\n!
140 :ref_footer => constant("</BODY></HTML>\n"),
142 :index_namer => constant('index.html'),
143 :index_header => constant("<HTML><TITLE>#{$title}</TITLE><BODY><H1>#{$title}</H1>\n"),
144 :index_keyworder => lambda{|as|
145 "<H2>Keywords (#{as.length})</H2>\n" +
146 as.map{|a| %!<A HREF="#{a[:dest]}\##{a[:anch]}">#{a[:key]}</A>!}.join(" /\n") +
149 :index_filer => lambda{|as|
150 "<H2>Files (#{as.length})</H2>\n<OL>\n" +
151 as.map{|a| %!<LI><A HREF="#{a[:dest]}">#{a[:file]}</A>: #{a[:title]}\n!}.join +
154 :index_footer => constant("</BODY></HTML>\n"),
157 $formatter['rd'] = { # RD doesn't have anchor?
158 :escaper => no_change,
159 :unescaper => no_change,
161 :namer => appender('.b.rd'),
163 :come_tag => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:rpath]}>))!},
164 :come_jump => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:path]}>))!},
165 :come_anchor => constant(''),
167 :go_tag => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:rpath]}>))!},
168 :go_anchor => constant(''),
169 :url => lambda{|a| %!((<"#{a[:url]}"|URL:#{a[:url]}>))!},
171 :ref_namer => appender('.r.rd'),
172 :ref_header => lambda{|file| "=begin\n= References: #{file}\n"},
173 :ref_itemer => lambda{|a|
175 url = go ? "file://#{go}" : "#{a[:path]}"
176 %!== ((<"#{a[:key]}"|URL:#{url}>))\n! +
178 %!* ((<"#{oc[:file]}"|URL:#{oc[:path]}>)) #{oc[:text]}\n!
182 :ref_footer => constant("=end\n"),
184 :index_namer => constant('index.rd'),
185 :index_header => constant("=begin\n= #{$title}\n"),
186 :index_keyworder => lambda{|as|
187 "== Keywords (#{as.length})\n" +
188 as.map{|a| %!((<"#{a[:key]}"|URL:#{a[:dest]}>))!}.join(" /\n") +
191 :index_filer => lambda{|as|
192 "== Files (#{as.length})\n" +
193 as.map{|a| %!* ((<"#{a[:file]}"|URL:#{a[:dest]}>)): #{a[:title]}\n!}.join
195 :index_footer => constant("=end\n"),
198 b = $formatter['rd'].dup
199 b[:header] = constant "=begin\n"
200 b[:footer] = constant "=end\n"
201 $formatter['rdbody'] = b
203 #####################################
209 while (i = index substr, pos)
210 a.push(substr.is_a?(Regexp) ? [i, $&] : i)
217 class HashList < Hash
219 self[key] ||= Array::new
226 Dir::open(dir){|d| d.each{|f| a.push f}} # map doesn't work??
229 next if f =~ /#$exclude/
230 path = File::expand_path f, dir
231 b.push f if FileTest::file? path
232 b += ls_R(path).map{|g| "#{f}/#{g}"} if FileTest::directory? path
238 def bundle(file_list)
239 fs = file_list.map{|f| File::expand_path f}
240 ds = fs.map{|f| File::dirname f}
244 if common.length <= d.length
247 common = File::dirname common
251 rs = fs.map{|f| f[(common.length + 1)..-1]} # +1 for '/'
257 parent = File::dirname path
258 return true if parent == path # root dir
260 if !FileTest::exist? path
266 def relative_path(target, origin)
267 return target if origin == '.'
270 root = origin.split(sep).map{|any| parent}.join(sep)
271 return root + sep + target
275 relative_path $formatter[$type][:index_namer].call, File::dirname(origin)
278 $unique_id = ':000000'
279 def unique_name(base)
280 base + $unique_id.succ!.dup
283 #####################################
285 $titles_in_file = HashList::new # dirty!
286 def come_go_master(files, dir, formatter)
287 h = HashList::new # key => master files
290 open(File::expand_path(f, dir)){|io|
292 if (t = title_match line)
293 $titles_in_file.cons f, t
295 if (found = come_go_match line)
300 s = formatter[:escaper].call raw
301 k = formatter[:escaper].call key
302 g = formatter[:namer].call f
303 r = formatter[:ref_namer].call f
309 :occur => Array::new,
317 h.cons s, [:come_tag, arg]
318 h.cons k, [:come_jump, arg]
319 h.cons k, [:come_anchor, arg]
323 h.cons s, [:go_tag, arg]
324 h.cons k, [:go_anchor, arg]
327 if equiv_key.length > 1
328 aliases += [equiv_key, equiv_raw]
337 def format_line(line, prog)
338 match = HashList::new # pos => key
340 regexp, func, greedy = [:regexp, :func, :greedy].map{|k| rule[k]}
341 line.indices(regexp).each{|r|
343 match.cons i, [k, func, greedy]
349 match.keys.sort.each{|i|
350 skipping = (i < cursor)
352 done += line[cursor..(i - 1)] if i > 0 # 'foobar'[0..-1] is 'foobar'
356 key, func, greedy = com
357 next if greedy && skipping
358 done += func.call(key, line)
360 cursor = i + key.length
365 if (cursor <= (len = line.length))
366 done += line[cursor..len]
371 def format_io(input, output, prog_src, compiler, escaper)
372 a = input.readlines.map{|s| escaper.call s}
374 matched_rules = prog_src.select{|rule|
375 whole =~ rule[:regexp]
377 prog = matched_rules.map{|r| compiler.call r}
379 output.print format_line(line, prog)
383 #####################################
386 STDERR.print str if !$silent
390 dest_dir = ARGV.shift
391 src_dir, files = bundle(STDIN.readlines.map{|s| s.chomp})
393 src_dir, dest_dir = ARGV
396 notice "#{files.length} files "
397 fmt = $formatter[$type]
398 k2m, aliases = come_go_master files, src_dir, fmt
399 aliases = [] if $no_alias
400 notice "(#{k2m.length} entries)\n"
405 type0, arg0 = k2m[key0][0]
409 [:occur, :file, :dest, :ref].each{|x|
412 arg[:anch_alias] = arg0[:anch] if type == :come_tag
417 notice 'body pages: '
419 nongreedy = Array::new
423 r = /#{Regexp::escape k}/
424 g = [:come_tag, :come_jump, :go_tag].member?(type)
425 z = g ? greedy : nongreedy
426 h = {:raw => k, :regexp => r, :type => type, :arg => arg, :greedy => g}
430 greedy.sort!{|x, y| x[:raw].length <=> y[:raw].length}
433 p nongreedy if $debug
434 u = {:regexp => $url_regexp, :type => :url, :arg => Hash::new, :greedy => true}
435 prog_src = nongreedy + [u] + greedy
438 g = fmt[:namer].call f
439 r = fmt[:ref_namer].call f
440 spath = File::expand_path f, src_dir
441 dpath, rpath = [g, r].map{|x| File::expand_path x, dest_dir}
442 mkdir_p File::dirname(dpath)
443 compiler = lambda{|h|
450 dir = File::dirname(f)
451 arg[:path] = relative_path arg[:dest], dir
452 arg[:rpath] = relative_path arg[:ref], dir
453 a = unique_name arg[:anch]
454 path = relative_path(g, dir)
456 :file => f, :path => path, :text => s.chop,
457 :anch => a, :type => type,
459 arg[:occur].unshift occur
461 arg[:anch_n] = arg[:anch_alias] || arg[:anch]
466 ignore_case = [:come_tag, :come_jump, :come_anchor].member?(h[:type]) && $i
467 reg = /#{reg.source}/i if ignore_case
468 {:regexp => reg, :func => func, :greedy => h[:greedy]}
471 open(dpath, 'w'){|output|
472 output.print fmt[:header].call(f)
473 format_io input, output, prog_src, compiler, fmt[:escaper]
474 output.print fmt[:footer].call(f)
480 notice 'reference pages: '
485 next if arg[:anch_alias]
486 m2a.cons arg[:file], arg if [:come_anchor, :go_anchor].member? type
491 body = fmt[:namer].call f
492 ref = fmt[:ref_namer].call f
493 rpath = File::expand_path ref, dest_dir
494 mkdir_p File::dirname(rpath)
495 open(rpath, 'w'){|output|
496 output.print fmt[:ref_header].call(f)
498 g = fmt[:unescaper].call arg[:key]
499 arg[:goto_file] = g if arg[:type] == :goto && FileTest::exist?(g)
500 arg[:occur].reject!{|oc| ![:come_anchor, :go_anchor].member? oc[:type]}
501 arg[:occur].sort!{|a,b| - (a[:file] <=> b[:file])}
502 output.print fmt[:ref_itemer].call(arg)
504 output.print fmt[:ref_footer].call(f)
509 notice 'index page: '
510 path = File::expand_path fmt[:index_namer].call(), dest_dir
511 open(path, 'w'){|output|
512 output.print fmt[:index_header].call
513 output.print fmt[:index_keyworder].call(k2m.keys.sort.map{|k|
516 [:come_anchor].member?(type) ? arg : nil
519 # alphabet files precede numerical files
520 z = files.sort{|f, g|
521 a, b = [f, g].map{|h| (h =~ /^[0-9]/ ? 'z' : 'a') + h}
525 output.print fmt[:index_filer].call(z.map{|f|
526 g = fmt[:namer].call f
527 # ts = $titles_in_file[f].reject{|t| t =~ /^\s*$/} || []
528 ts = $titles_in_file[f] || []
529 {:file => f, :dest => g, :title => ts.join(' / ')}
531 output.print fmt[:index_footer].call