ext/howm2

   1 #!/usr/bin/ruby -s
   2 # -*- coding: euc-jp -*-
   3 # -*- Ruby -*-
   4
   5 # Convert ~/howm/ to HTML or other formats.
   6 # Only RD format is supported unless you will give me patches. :p
   7
   8 require 'cgi'
   9
  10 def usage
  11   name = File::basename $0
  12   print <<EOU
  13 #{name}: howm ¥á¥â¤ò¥Õ¥©¡¼¥Þ¥Ã¥È
  14 ¡¦¤ä¤Ð¤¤Ê¸»ú¤ò¥¨¥¹¥±¡¼¥×
  15 ¡¦¥¡¼¥ï¡¼¥É¤ò¥ê¥ó¥¯¤ËÊÑ´¹
  16 ¡¦¥Ø¥Ã¥À¤È¥Õ¥Ã¥¿¤ò¤Ä¤±¤ë
  17 (Îã)
  18   #{name} ~/howm/ ~/converted/
  19   #{name} -type=rd ~/howm/ ~/converted/
  20   ls ~/howm/*/*/*7-*.txt | #{name} -list ~/converted/
  21   grep -rl '¤Û¤²' ~/howm/ | #{name} -list ~/converted/
  22 (¥ª¥×¥·¥ç¥óÎã)
  23   -type=rd ¤Þ¤¿¤Ï -t=rd ¢ª ¥Õ¥©¡¼¥Þ¥Ã¥È¤Î¼ïÎà¤ò»ØÄê
  24     html   ¡Ä ¥Ç¥Õ¥©¥ë¥È
  25     rd     ¡Ä see http://www2.pos.to/~tosh/ruby/rdtool/ja/
  26     rdbody ¡Ä rd ¤Î =begin ¤È =end ¤¬¤Ê¤¤¤â¤Î¤òÆþÎÏ. ¤³¤ì¤é¤òÊä¤Ã¤ÆÀ¸À®.
  27   -list                     ¢ª ¥á¥â¥Õ¥¡¥¤¥ë¤Î¥ê¥¹¥È¤òÉ¸½àÆþÎÏ¤«¤éÆÉ¤à
  28   -exclude='^[.]|CVS'       ¢ª ÂÐ¾Ý³°¤Î¥Õ¥¡¥¤¥ë¤òÀµµ¬É½¸½¤Ç»ØÄê
  29   -r                        ¢ª ¥á¥â°ìÍ÷¤ò¿·¤·¤¤½ç¤ËÊÂ¤Ù¤ë
  30   -i                        ¢ª <<< ¤ÇÂçÊ¸»ú¾®Ê¸»ú¤ò¶èÊÌ¤·¤Ê¤¤
  31   -title='Index'            ¢ª index ¥Ú¡¼¥¸¤ÎÂêÌ¾
  32   -silent ¤Þ¤¿¤Ï -s         ¢ª ¿ÊÄ½É½¼¨¤ò¤·¤Ê¤¤
  33   -goto='>>>'               ¢ª goto link ¤Î½ñ¼°
  34   -comefrom='<<<'           ¢ª come-from link ¤Î½ñ¼°
  35   -no_alias                 ¢ª come-from ¥¡¼¥ï¡¼¥É¤Î alias ¤òÌµ»ë
  36   -help ¤Þ¤¿¤Ï -h           ¢ª ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òÉ½¼¨
  37   (-debug                   ¢ª ¥Ç¥Ð¥Ã¥°ÍÑ½ÐÎÏ)
  38 EOU
  39 end
  40
  41 argv_len = $list ? 1 : 2
  42 if ($help || $h || ARGV.length != argv_len)
  43   usage
  44   exit 0
  45 end
  46
  47 #####################################
  48
  49 $type ||= $t || 'html'
  50 $exclude ||= "^[.\#]|CVS|~$"
  51 $silent ||= $s
  52 $title ||= 'Index'
  53 #$r_text_width = 40
  54 $progress = '.'
  55 $goto = '>>>'
  56 $comefrom = '<<<'
  57 $url_regexp = %r!((http|file)://\S+)!
  58
  59 def come_go_match(str)
  60   case str
  61   when /#$comefrom|#$goto/
  62     s = str
  63     r = []
  64     while s =~ /((#$comefrom|#$goto) *(.+?)) *($|(#$comefrom|#$goto).*)/
  65       raw = $1
  66       type = ($2 == $comefrom) ? :comefrom : :goto
  67       key = $3
  68       s = $4
  69       r.push [type, key, raw]
  70     end
  71     return r
  72   else
  73     return false
  74   end
  75 end
  76
  77 def title_match(str)
  78   if str =~ /^= +(.+)$/
  79     return $1
  80   else
  81     return false
  82   end
  83 end
  84
  85 #####################################
  86
  87 def empty(); lambda{|*dummy| ""}; end
  88 def constant(str); lambda{|*dummy| str}; end
  89 def appender(str); lambda{|x| x + str}; end
  90 def no_change(); lambda{|*x| x[0]}; end
  91
  92 $formatter = Hash::new
  93
  94 $formatter['html'] = {
  95   :escaper => lambda{|str| CGI::escapeHTML str},
  96   :unescaper => lambda{|str| CGI::unescapeHTML str},
  97   # body page
  98   :namer => appender('.b.html'),
  99   :header => lambda{|file|
 100     %!<HTML><TITLE>#{file}</TITLE><BODY><H1>#{file}</A></H1><HR><PRE>\n!
 101   },
 102   :come_tag => lambda{|a|
 103     # Fix me.
 104     %!<A NAME="#{a[:occur][0][:anch]}"></A><A HREF="#{a[:rpath]}\##{a[:anch_n]}" NAME="#{a[:anch]}">#{a[:orig]}</A>!
 105   },
 106   :come_jump => lambda{|a|
 107     %!<A HREF="#{a[:path]}\##{a[:anch]}">#{a[:orig]}</A>!
 108   },
 109   :come_anchor => lambda{|a|
 110     %!<A NAME="#{a[:occur][0][:anch]}"></A>!
 111   },
 112   :go_tag => lambda{|a|
 113     %!<A NAME="#{a[:occur][0][:anch]}"></A><A HREF="#{a[:rpath]}\##{a[:anch]}" NAME="#{a[:anch]}">#{a[:orig]}</A>!
 114   },
 115   :go_anchor => lambda{|a|
 116     %!<A NAME="#{a[:occur][0][:anch]}"></A>!
 117 #     %!<A NAME="#{a[:occur][0][:anch]}">#{a[:key]}</A>!
 118   },
 119   :url => lambda{|a|
 120     %!<A HREF="#{a[:url]}">#{CGI::unescapeHTML a[:url]}</A>!
 121   },
 122   :footer => lambda{|file|
 123     %!</PRE><HR><A HREF="#{to_index file}">index</A></BODY></HTML>\n!
 124   },
 125   # reference page
 126   :ref_namer => appender('.r.html'),
 127   :ref_header => lambda{|file|
 128     "<HTML><TITLE>#{file}</TITLE><BODY><H1>References: #{file}</H1>\n"
 129   },
 130   :ref_itemer => lambda{|a|
 131     go = a[:goto_file]
 132     url = go ? "file://#{a[:goto_file]}" : "#{a[:path]}\##{a[:anch]}"
 133     ocs = a[:occur]
 134     %!<A HREF="#{url}" NAME="#{a[:anch]}"><H2>#{a[:key]} (#{ocs.length})</H2></A>\n<OL>\n! +
 135     ocs.map{|oc|
 136       %!<LI><A HREF="#{oc[:path]}\##{oc[:anch]}">#{oc[:file]}</A> #{oc[:text]}\n!
 137     }.join +
 138     "</OL>\n"
 139   },
 140   :ref_footer => constant("</BODY></HTML>\n"),
 141   # index page
 142   :index_namer => constant('index.html'),
 143   :index_header => constant("<HTML><TITLE>#{$title}</TITLE><BODY><H1>#{$title}</H1>\n"),
 144   :index_keyworder => lambda{|as|
 145     "<H2>Keywords (#{as.length})</H2>\n" +
 146     as.map{|a| %!<A HREF="#{a[:dest]}\##{a[:anch]}">#{a[:key]}</A>!}.join(" /\n") +
 147     "\n"
 148   },
 149   :index_filer => lambda{|as|
 150     "<H2>Files (#{as.length})</H2>\n<OL>\n" +
 151     as.map{|a| %!<LI><A HREF="#{a[:dest]}">#{a[:file]}</A>: #{a[:title]}\n!}.join +
 152     "</OL>\n"
 153   },
 154   :index_footer => constant("</BODY></HTML>\n"),
 155 }
 156
 157 $formatter['rd'] = {  # RD doesn't have anchor?
 158   :escaper => no_change,
 159   :unescaper => no_change,
 160   # body page
 161   :namer => appender('.b.rd'),
 162   :header => empty,
 163   :come_tag => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:rpath]}>))!},
 164   :come_jump => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:path]}>))!},
 165   :come_anchor => constant(''),
 166   :footer => empty,
 167   :go_tag => lambda{|a| %!((<"#{a[:orig]}"|URL:#{a[:rpath]}>))!},
 168   :go_anchor => constant(''),
 169   :url => lambda{|a| %!((<"#{a[:url]}"|URL:#{a[:url]}>))!},
 170   # reference page
 171   :ref_namer => appender('.r.rd'),
 172   :ref_header => lambda{|file| "=begin\n= References: #{file}\n"},
 173   :ref_itemer => lambda{|a|
 174     go = a[:goto_file]
 175     url = go ? "file://#{go}" : "#{a[:path]}"
 176     %!== ((<"#{a[:key]}"|URL:#{url}>))\n! +
 177     a[:occur].map{|oc|
 178       %!* ((<"#{oc[:file]}"|URL:#{oc[:path]}>)) #{oc[:text]}\n!
 179     }.join +
 180     "\n"
 181   },
 182   :ref_footer => constant("=end\n"),
 183   # index page
 184   :index_namer => constant('index.rd'),
 185   :index_header => constant("=begin\n= #{$title}\n"),
 186   :index_keyworder => lambda{|as|
 187     "== Keywords (#{as.length})\n" +
 188     as.map{|a| %!((<"#{a[:key]}"|URL:#{a[:dest]}>))!}.join(" /\n") +
 189     "\n"
 190   },
 191   :index_filer => lambda{|as|
 192     "== Files (#{as.length})\n" +
 193     as.map{|a| %!* ((<"#{a[:file]}"|URL:#{a[:dest]}>)): #{a[:title]}\n!}.join
 194   },
 195   :index_footer => constant("=end\n"),
 196 }
 197
 198 b = $formatter['rd'].dup
 199 b[:header] = constant "=begin\n"
 200 b[:footer] = constant "=end\n"
 201 $formatter['rdbody'] = b
 202
 203 #####################################
 204
 205 class String
 206   def indices(substr)
 207     a = Array::new
 208     pos = 0
 209     while (i = index substr, pos)
 210       a.push(substr.is_a?(Regexp) ? [i, $&] : i)
 211       pos = i + 1
 212     end
 213     return a
 214   end
 215 end
 216
 217 class HashList < Hash
 218   def cons(key, val)
 219     self[key] ||= Array::new
 220     self[key].push val
 221   end
 222 end
 223
 224 def ls_R(dir)
 225   a = Array::new
 226   Dir::open(dir){|d| d.each{|f| a.push f}}  # map doesn't work??
 227   b = Array::new
 228   a.each{|f|
 229     next if f =~ /#$exclude/
 230     path = File::expand_path f, dir
 231     b.push f if FileTest::file? path
 232     b += ls_R(path).map{|g| "#{f}/#{g}"} if FileTest::directory? path
 233   }
 234   return b
 235 end
 236
 237 # FixMe :-(
 238 def bundle(file_list)
 239   fs = file_list.map{|f| File::expand_path f}
 240   ds = fs.map{|f| File::dirname f}
 241   common = ds[0] || ''
 242   ds.each{|d|
 243     while common != d
 244       if common.length <= d.length
 245         d = File::dirname d
 246       else
 247         common = File::dirname common
 248       end
 249     end
 250   }
 251   rs = fs.map{|f| f[(common.length + 1)..-1]}  # +1 for '/'
 252   return [common, rs]
 253 end
 254
 255 # Fixme :-(
 256 def mkdir_p(path)
 257   parent = File::dirname path
 258   return true if parent == path  # root dir
 259   mkdir_p parent
 260   if !FileTest::exist? path
 261     Dir::mkdir path
 262   end
 263 end
 264
 265 # Fixme :-(
 266 def relative_path(target, origin)
 267   return target if origin == '.'
 268   sep = '/'
 269   parent = '..'
 270   root = origin.split(sep).map{|any| parent}.join(sep)
 271   return root + sep + target
 272 end
 273
 274 def to_index(origin)
 275   relative_path $formatter[$type][:index_namer].call, File::dirname(origin)
 276 end
 277
 278 $unique_id = ':000000'
 279 def unique_name(base)
 280   base + $unique_id.succ!.dup
 281 end
 282
 283 #####################################
 284
 285 $titles_in_file = HashList::new  # dirty!
 286 def come_go_master(files, dir, formatter)
 287   h = HashList::new  # key => master files
 288   aliases = []
 289   files.each{|f|
 290     open(File::expand_path(f, dir)){|io|
 291       io.each_line{|line|
 292         if (t = title_match line)
 293           $titles_in_file.cons f, t
 294         end
 295         if (found = come_go_match line)
 296           equiv_key = []
 297           equiv_raw = []
 298           found.each{|m|
 299             type, key, raw = m
 300             s = formatter[:escaper].call raw
 301             k = formatter[:escaper].call key
 302             g = formatter[:namer].call f
 303             r = formatter[:ref_namer].call f
 304             a = CGI::escape(k)
 305             arg = {
 306               :type => type,
 307               :raw => s,
 308               :key => k,
 309               :occur => Array::new,
 310               :file => f,
 311               :dest => g,
 312               :ref => r,
 313               :anch => a,
 314             }
 315             case type
 316             when :comefrom
 317               h.cons s, [:come_tag, arg]
 318               h.cons k, [:come_jump, arg]
 319               h.cons k, [:come_anchor, arg]
 320               equiv_key.push k
 321               equiv_raw.push s
 322             when :goto
 323               h.cons s, [:go_tag, arg]
 324               h.cons k, [:go_anchor, arg]
 325             end
 326           }
 327            if equiv_key.length > 1
 328              aliases += [equiv_key, equiv_raw]
 329            end
 330         end
 331       }
 332     }
 333   }
 334   return h, aliases
 335 end
 336
 337 def format_line(line, prog)
 338   match = HashList::new  # pos => key
 339   prog.each{|rule|
 340     regexp, func, greedy = [:regexp, :func, :greedy].map{|k| rule[k]}
 341     line.indices(regexp).each{|r|
 342       i, k = r
 343       match.cons i, [k, func, greedy]
 344     }
 345   }
 346   p match if $debug
 347   cursor = 0
 348   done = ""
 349   match.keys.sort.each{|i|
 350     skipping = (i < cursor)
 351     if !skipping
 352       done += line[cursor..(i - 1)] if i > 0 # 'foobar'[0..-1] is 'foobar'
 353       cursor = i
 354     end
 355     match[i].each{|com|
 356       key, func, greedy = com
 357       next if greedy && skipping
 358       done += func.call(key, line)
 359       if greedy
 360         cursor = i + key.length
 361         break
 362       end
 363     }
 364   }
 365   if (cursor <= (len = line.length))
 366     done += line[cursor..len]
 367   end
 368   return done
 369 end
 370
 371 def format_io(input, output, prog_src, compiler, escaper)
 372   a = input.readlines.map{|s| escaper.call s}
 373   whole = a.join
 374   matched_rules = prog_src.select{|rule|
 375     whole =~ rule[:regexp]
 376   }
 377   prog = matched_rules.map{|r| compiler.call r}
 378   a.each{|line|
 379     output.print format_line(line, prog)
 380   }
 381 end
 382
 383 #####################################
 384
 385 def notice(str)
 386   STDERR.print str if !$silent
 387 end
 388
 389 if $list
 390   dest_dir = ARGV.shift
 391   src_dir, files = bundle(STDIN.readlines.map{|s| s.chomp})
 392 else
 393   src_dir, dest_dir = ARGV
 394   files = ls_R src_dir
 395 end
 396 notice "#{files.length} files "
 397 fmt = $formatter[$type]
 398 k2m, aliases = come_go_master files, src_dir, fmt
 399 aliases = [] if $no_alias
 400 notice "(#{k2m.length} entries)\n"
 401 p k2m if $debug
 402
 403 aliases.each{|equiv|
 404   key0 = equiv.shift
 405   type0, arg0 = k2m[key0][0]
 406   equiv.each{|key|
 407     k2m[key].each{|m|
 408       type, arg = m
 409       [:occur, :file, :dest, :ref].each{|x|
 410         arg[x] = arg0[x]
 411       }
 412       arg[:anch_alias] = arg0[:anch] if type == :come_tag
 413     }
 414   }
 415 }
 416
 417 notice 'body pages: '
 418 greedy = Array::new
 419 nongreedy = Array::new
 420 k2m.each_pair{|k, v|
 421   v.each{|m|
 422     type, arg = m
 423     r = /#{Regexp::escape k}/
 424     g = [:come_tag, :come_jump, :go_tag].member?(type)
 425     z = g ? greedy : nongreedy
 426     h = {:raw => k, :regexp => r, :type => type, :arg => arg, :greedy => g}
 427     z.push h
 428   }
 429 }
 430 greedy.sort!{|x, y| x[:raw].length <=> y[:raw].length}
 431 greedy.reverse!
 432 p greedy if $debug
 433 p nongreedy if $debug
 434 u = {:regexp => $url_regexp, :type => :url, :arg => Hash::new, :greedy => true}
 435 prog_src = nongreedy + [u] + greedy
 436 files.each{|f|
 437   notice $progress
 438   g = fmt[:namer].call f
 439   r = fmt[:ref_namer].call f
 440   spath = File::expand_path f, src_dir
 441   dpath, rpath = [g, r].map{|x| File::expand_path x, dest_dir}
 442   mkdir_p File::dirname(dpath)
 443   compiler = lambda{|h|
 444     func = lambda{|k, s|
 445       type = h[:type]
 446       arg = h[:arg]
 447       if type == :url
 448         arg[:url] = k
 449       else
 450         dir = File::dirname(f)
 451         arg[:path] = relative_path arg[:dest], dir
 452         arg[:rpath] = relative_path arg[:ref], dir
 453         a = unique_name arg[:anch]
 454         path = relative_path(g, dir)
 455         occur = {
 456           :file => f, :path => path, :text => s.chop,
 457           :anch => a, :type => type,
 458         }
 459         arg[:occur].unshift occur
 460         arg[:orig] = k
 461         arg[:anch_n] = arg[:anch_alias] || arg[:anch]
 462       end
 463       fmt[type].call arg
 464     }
 465     reg = h[:regexp]
 466     ignore_case = [:come_tag, :come_jump, :come_anchor].member?(h[:type]) && $i
 467     reg = /#{reg.source}/i if ignore_case
 468     {:regexp => reg, :func => func, :greedy => h[:greedy]}
 469   }
 470   open(spath){|input|
 471     open(dpath, 'w'){|output|
 472       output.print fmt[:header].call(f)
 473       format_io input, output, prog_src, compiler, fmt[:escaper]
 474       output.print fmt[:footer].call(f)
 475     }
 476   }
 477 }
 478 notice "\n"
 479
 480 notice 'reference pages: '
 481 m2a = HashList::new
 482 k2m.each_pair{|k, v|
 483   v.each{|z|
 484     type, arg = z
 485     next if arg[:anch_alias]
 486     m2a.cons arg[:file], arg if [:come_anchor, :go_anchor].member? type
 487   }
 488 }
 489 m2a.each_pair{|f, v|
 490   notice $progress
 491   body = fmt[:namer].call f
 492   ref = fmt[:ref_namer].call f
 493   rpath = File::expand_path ref, dest_dir
 494   mkdir_p File::dirname(rpath)
 495   open(rpath, 'w'){|output|
 496     output.print fmt[:ref_header].call(f)
 497     v.each{|arg|
 498       g = fmt[:unescaper].call arg[:key]
 499       arg[:goto_file] = g if arg[:type] == :goto && FileTest::exist?(g)
 500       arg[:occur].reject!{|oc| ![:come_anchor, :go_anchor].member? oc[:type]}
 501       arg[:occur].sort!{|a,b| - (a[:file] <=> b[:file])}
 502       output.print fmt[:ref_itemer].call(arg)
 503     }
 504     output.print fmt[:ref_footer].call(f)
 505   }
 506 }
 507 notice "\n"
 508
 509 notice 'index page: '
 510 path = File::expand_path fmt[:index_namer].call(), dest_dir
 511 open(path, 'w'){|output|
 512   output.print fmt[:index_header].call
 513   output.print fmt[:index_keyworder].call(k2m.keys.sort.map{|k|
 514     k2m[k].map{|m|
 515       type, arg = m
 516       [:come_anchor].member?(type) ? arg : nil
 517     }.select{|a| a}
 518   }.flatten)
 519   # alphabet files precede numerical files
 520   z = files.sort{|f, g|
 521     a, b = [f, g].map{|h| (h =~ /^[0-9]/ ? 'z' : 'a') + h}
 522     a <=> b
 523   }
 524   z.reverse! if $r
 525   output.print fmt[:index_filer].call(z.map{|f|
 526     g = fmt[:namer].call f
 527 #    ts = $titles_in_file[f].reject{|t| t =~ /^\s*$/} || []
 528     ts = $titles_in_file[f] || []
 529     {:file => f, :dest => g, :title => ts.join(' / ')}
 530   })
 531   output.print fmt[:index_footer].call
 532 }
 533 notice ".\n"