lib/ldblogwriter/parser.rb

   1 # -*- coding: utf-8 -*-
   2 require 'open-uri'
   3 require 'pp'
   4 require 'ldblogwriter/entry_manager.rb'
   5 require 'yaml'
   6
   7 # parserは、pukiwikiparser.rbを参考にしています。
   8 # http://jp.rubyist.net/magazine/?0010-CodeReview
   9
  10 # pluginの形式は
  11 # #プラグイン名
  12 # #プラグイン名(arg1, arg2...)
  13
  14 module LDBlogWriter
  15
  16   attr_accessor :entry
  17
  18   class Parser
  19     def initialize(conf, plugin, service = nil)
  20       @conf = conf
  21       @plugin = plugin
  22       @service = service
  23     end
  24
  25     def get_entry(src_text)
  26       lines = src_text.rstrip.split(/\r?\n/)
  27       first_line = lines.shift
  28       category = nil
  29       first_line.gsub!(/^<(.*)>\s+/) do |str|
  30         category = $1
  31         str.replace("")
  32       end
  33       title = first_line
  34       src_text = lines.join("\n")
  35       @entry = BlogEntry.new(title, category)
  36       if @conf.convert_to_html == true
  37 #        src_text = check_image_file(filename, src_text)
  38         content = to_html(src_text)
  39 #        if @conf.html_directory != nil
  40 #          save_html_file(@conf.html_directory, File.basename(filename), content)
  41 #        end
  42       else
  43         content = src_text
  44       end
  45       @entry.content = content
  46       return @entry
  47     end
  48
  49     def escape_html(str)
  50       str.gsub!(/&/, '&amp;')
  51       str.gsub!(/"/, '&quot;')
  52       str.gsub!(/</, '&lt;')
  53       str.gsub!(/>/, '&gt;')
  54       return str
  55     end
  56
  57     def to_html(src, entry = nil)
  58       if entry != nil
  59         @entry = entry
  60       end
  61       buf = []
  62       lines = src.rstrip.split(/\r?\n/).map {|line| line.chomp}
  63
  64       while lines.first
  65         case lines.first
  66         when ''
  67           lines.shift
  68         when /\A----/
  69           lines.shift
  70           buf.push '<hr />'
  71         when /\A#trackback\(.*\)/
  72           buf.push parse_trackback(lines.shift)
  73         when /\A#img\(.*\)/
  74           buf.push parse_img(lines.shift)
  75         when /\A\s/
  76           buf.concat parse_pre(take_block(lines, /\A\s/))
  77         when /\A>/
  78           buf.concat parse_quote(take_block(lines, /\A>/))
  79         when /\A-/
  80           buf.concat parse_list('ul', take_block(lines, /\A-/))
  81         when /\A\+/
  82           buf.concat parse_list('ol', take_block(lines, /\A\+/))
  83         when /\A#.*/
  84             buf.push parse_plugin(lines.shift)
  85         else
  86           buf.push '<p>'
  87           buf.concat parse_p(take_block(lines, /\A(?![*\s>:\-\+]|----|\z)/))
  88           buf.push '</p>'
  89         end
  90       end
  91       buf.join("\n")
  92     end
  93
  94 #private
  95
  96     def take_block(lines, marker)
  97       buf = []
  98       until lines.empty?
  99         break unless marker =~ lines.first
 100         buf.push lines.shift.sub(marker, '')
 101       end
 102       buf
 103     end
 104
 105     def syntax_highlight(lines, lang)
 106       require 'syntax/convertors/html'
 107       convertor = Syntax::Convertors::HTML.for_syntax lang
 108       ["<div class=\"ruby\">" + convertor.convert(lines.join("\n")) + "</div>\n"]
 109     end
 110
 111     def parse_pre(lines)
 112       # コードのハイライト対応
 113       if lines.first =~ /\Ahighlight\((.*)\)/
 114         lines.shift
 115         syntax_highlight(lines, $1)
 116       else
 117         ["<pre>", lines.map {|line| escape_html(line) }.join("\n"),
 118         '</pre>']
 119       end
 120     end
 121
 122     def parse_quote(lines)
 123        [ "<blockquote><p>", lines.join("\n"), "</p></blockquote>"]
 124     end
 125
 126     def parse_list(type, lines)
 127       marker = ((type == 'ul') ? /\A-/ : /\A\+/)
 128       parse_list0(type, lines, marker)
 129     end
 130
 131     def parse_list0(type, lines, marker)
 132       buf = ["<#{type}>"]
 133       closeli = nil
 134       until lines.empty?
 135         if marker =~ lines.first
 136           buf.concat parse_list0(type, take_block(lines, marker), marker)
 137         else
 138           buf.push closeli if closeli;  closeli = '</li>'
 139           buf.push "<li>#{parse_inline(lines.shift)}"
 140         end
 141       end
 142       buf.push closeli if closeli;  closeli = '</li>'
 143       buf.push "</#{type}>"
 144       buf
 145     end
 146
 147     def parse_plugin(line)
 148       eval_string = line.gsub(/\A#/, "")
 149       # regist post process action
 150       post_method_name = eval_string[/\A\w+/]+"_post"
 151       if @plugin.respond_to?(post_method_name)
 152         @plugin.post_process_list.push(eval_string.gsub(/\A(\w+)/) { $1+"_post" })
 153       end
 154
 155       @plugin.eval_src(eval_string)
 156     end
 157
 158     def get_small_img_uri(img_uri)
 159       if $DEBUG
 160         puts img_uri
 161       end
 162       uri = URI.parse(img_uri)
 163       new_path = uri.path.gsub(/\.(\w+)$/, '-s.\1')
 164       uri.path = new_path
 165       return uri.to_s
 166     end
 167
 168     def get_img_html(img_uri, title)
 169       result = ""
 170       small_img_uri = get_small_img_uri(img_uri)
 171       result += "<a href=\"#{img_uri}\" target=\"_blank\">"
 172       result += "<img src=\"#{small_img_uri}\" alt=\"#{title}\" "
 173       result += "hspace=\"5\" class=\"pict\" align=\"left\" />"
 174       result += "</a>"
 175       return result
 176     end
 177
 178     def parse_trackback(line)
 179       buf = []
 180       line.scan(/\#trackback\((.*)\).*/) do |url|
 181         @entry.trackback_url_array += url
 182       end
 183       return buf
 184     end
 185
 186     # TODO: plugin化
 187     def parse_img(line)
 188       buf = []
 189       img_str = ""
 190       if line =~ /\A#img\((.*)\).*/
 191         img_str = $1
 192         img_str.gsub!(/\s+/, "")
 193       end
 194       (img_path, img_title) = img_str.split(",")
 195       if img_title == nil
 196         img_title = File.basename(img_path)
 197       end
 198       img_manager = LDBlogWriter::EntryManager.new(@conf.upload_uri_file)
 199       if img_manager.has_entry?(File.basename(img_path)) == false
 200         # 新規アップロード
 201         img_uri = @service.post_image(img_path, img_title)
 202         if img_uri == false
 203           return buf
 204         end
 205         img_manager.save_edit_uri(File.basename(img_path), img_uri)
 206       else
 207         img_uri = img_manager.get_edit_uri(File.basename(img_path))
 208       end
 209       buf.push(get_img_html(img_uri, img_title))
 210       # アップロードデータ保存
 211       return buf
 212     end
 213
 214     def parse_p(lines)
 215       lines.map {|line| parse_inline(line) }
 216     end
 217
 218     def a_href(uri, label, cssclass)
 219       if @conf.auto_trackback == true
 220         open(uri) do |f|
 221           if f.content_type =~ /^image/
 222             return get_img_html(uri, label)
 223           elsif f.content_type != "text/html"
 224             break
 225           end
 226           contents = f.read
 227           trackback_ping = []
 228           contents.scan(%r|<rdf:Description\s+([^>]+)>|) do |attr|
 229             attr[0].scan(%r|\s+([^=]+)="([^\"]+)"|) do |key, value|
 230               trackback_ping << value if key == 'trackback:ping'
 231             end
 232           end
 233           if @entry != nil
 234             @entry.trackback_url_array += trackback_ping
 235           end
 236         end
 237       end
 238       %Q[<a class="#{cssclass}" href="#{escape_html(uri)}">#{escape_html(label)}</a>]
 239     end
 240
 241     def parse_inline(str)
 242       @inline_re ||= %r<
 243         ([&<>"])                             # $1: HTML escape characters
 244       | \[\[(.+?):\s*(https?://\S+)\s*\]\]   # $2: label, $3: URI
 245       | (#{URI.regexp('http')})              # $5...: URI autolink
 246       >x
 247       str.gsub(@inline_re) {
 248         case
 249         when htmlchar = $1 then escape_html(htmlchar)
 250         when bracket  = $2 then a_href($3, bracket, 'outlink')
 251 #        when pagename = $4 then "not support $3" #a_href(page_uri(pagename), pagename, 'pagelink')
 252         when uri      = $4 then a_href(uri, uri, 'outlink')
 253         else
 254           raise 'must not happen'
 255         end
 256       }
 257     end
 258   end
 259
 260 end
 261