doc/xml2po-modes/docbook.py

   1 # -*- coding: utf-8 -*-\r
   2 # Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.\r
   3 #\r
   4 # This file is part of xml2po.\r
   5 #\r
   6 # xml2po is free software; you can redistribute it and/or modify\r
   7 # it under the terms of the GNU General Public License as published by\r
   8 # the Free Software Foundation; either version 2 of the License, or\r
   9 # (at your option) any later version.\r
  10 #\r
  11 # xml2po is distributed in the hope that it will be useful,\r
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
  14 # GNU General Public License for more details.\r
  15 #\r
  16 # You should have received a copy of the GNU General Public License\r
  17 # along with xml2po; if not, write to the Free Software Foundation, Inc.,\r
  18 # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\r
  19 #\r
  20 \r
  21 # This implements special instructions for handling DocBook XML documents\r
  22 # in a better way.\r
  23 #\r
  24 #  This means:\r
  25 #   — better handling of nested complicated tags (i.e. definitions of\r
  26 #     ignored-tags and final-tags)\r
  27 #   — support for merging translator-credits back into DocBook articles\r
  28 #   — support for setting a language\r
  29 #\r
  30 \r
  31 # We use "currentXmlMode" class name for all modes\r
  32 #  -- it might be better to have it named docbookXmlMode, but it will make loading harder;\r
  33 #     it is also not necessary until we start supporting extracting strings from more\r
  34 #     than one document type at the same time\r
  35 #\r
  36 import re\r
  37 import libxml2\r
  38 import os\r
  39 import md5\r
  40 import sys\r
  41 \r
  42 class docbookXmlMode:\r
  43     """Class for special handling of DocBook document types.\r
  44 \r
  45     It sets lang attribute on article elements, and adds translators\r
  46     to articleinfo/copyright."""\r
  47     def __init__(self):\r
  48         self.lists = ['itemizedlist', 'orderedlist', 'variablelist',\r
  49                       'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]\r
  50         self.objects = [ 'table', 'figure', 'textobject', 'imageobject', 'mediaobject',\r
  51                          'screenshot' ]\r
  52         \r
  53     def getIgnoredTags(self):\r
  54         "Returns array of tags to be ignored."\r
  55         return  self.objects + self.lists\r
  56 \r
  57     def getFinalTags(self):\r
  58         "Returns array of tags to be considered 'final'."\r
  59         return ['para', 'formalpara', 'simpara',\r
  60                 'releaseinfo', 'revnumber', 'title',\r
  61                 'date', 'term', 'programlisting'] + self.objects + self.lists\r
  62 \r
  63     def getSpacePreserveTags(self):\r
  64         "Returns array of tags in which spaces are to be preserved."\r
  65         return [\r
  66             'classsynopsisinfo',\r
  67             'computeroutput',\r
  68             'funcsynopsisinfo',\r
  69             'literallayout',\r
  70             'programlisting',\r
  71             'screen',\r
  72             'synopsis',\r
  73             'userinput'\r
  74             ]\r
  75 \r
  76     def getStringForTranslators(self):\r
  77         """Returns string which will be used to credit translators."""\r
  78         return "translator-credits"\r
  79 \r
  80     def getCommentForTranslators(self):\r
  81         """Returns a comment to be added next to string for crediting translators."""\r
  82         return """Put one translator per line, in the form of NAME <EMAIL>."""\r
  83 \r
  84     def getStringForTranslation(self):\r
  85         """Returns translation of 'translation'."""\r
  86         return "translator-translation"\r
  87 \r
  88     def getCommentForTranslation(self):\r
  89         """Returns a string that explains how 'translation' is to be translated."""\r
  90         return """Place the translation of 'translation' here."""\r
  91 \r
  92     def _find_articleinfo(self, node):\r
  93         if node.name == 'articleinfo' or node.name == 'bookinfo':\r
  94             return node\r
  95         child = node.children\r
  96         while child:\r
  97             ret = self._find_articleinfo(child)\r
  98             if ret:\r
  99                 return ret\r
 100             child = child.next\r
 101         return None\r
 102 \r
 103     def _find_lastcopyright(self, node):\r
 104         if not node.children:\r
 105             return None\r
 106         last = node.lastChild()\r
 107         tmp = last\r
 108         while tmp:\r
 109             if tmp.name == "copyright":\r
 110                 last = tmp\r
 111                 break\r
 112             tmp = tmp.prev\r
 113         return last\r
 114 \r
 115     def _md5_for_file(self, filename):\r
 116         hash = md5.new()\r
 117         input = open(filename, "rb")\r
 118         read = input.read(4096)\r
 119         while read:\r
 120             hash.update(read)\r
 121             read = input.read(4096)\r
 122         input.close()\r
 123         return hash.hexdigest()\r
 124 \r
 125     def _output_images(self, node, msg):\r
 126         if node and node.type=='element' and node.name=='imagedata':\r
 127             # Use .fileref to construct new message\r
 128             attr = node.prop("fileref")\r
 129             if attr:\r
 130                 dir = os.path.dirname(msg.filename)\r
 131                 fullpath = os.path.join(dir, attr)\r
 132                 if os.path.exists(fullpath):\r
 133                     hash = self._md5_for_file(fullpath)\r
 134                 else:\r
 135                     hash = "THIS FILE DOESN'T EXIST"\r
 136                     print >>sys.stderr, "Warning: image file '%s' not found." % fullpath\r
 137                     \r
 138                 msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),\r
 139                                   "When image changes, this message will be marked fuzzy or untranslated for you.\n"+\r
 140                                   "It doesn't matter what you translate it to: it's not used at all.")\r
 141         elif node and node.children:\r
 142             child = node.children\r
 143             while child:\r
 144                 self._output_images(child,msg)\r
 145                 child = child.next\r
 146 \r
 147 \r
 148     def preProcessXml(self, doc, msg):\r
 149         """Add additional messages of interest here."""\r
 150         root = doc.getRootElement()\r
 151         self._output_images(root,msg)\r
 152 \r
 153     def postProcessXmlTranslation(self, doc, language, translators, translation):\r
 154         """Sets a language and translators in "doc" tree.\r
 155         \r
 156         "translators" is a string consisted of "Name <email>" pairs\r
 157         of each translator, separated by newlines."""\r
 158 \r
 159         root = doc.getRootElement()\r
 160         # DocBook documents can be something other than article, handle that as well in the future\r
 161         while root and root.name != 'article' and root.name != 'book':\r
 162             root = root.next\r
 163         if root and (root.name == 'article' or root.name == 'book'):\r
 164             root.setProp('lang', language)\r
 165         else:\r
 166             return\r
 167         \r
 168         if translators == self.getStringForTranslators():\r
 169             return\r
 170         else:\r
 171             # Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')\r
 172             ai = self._find_articleinfo(root)\r
 173             if not ai:\r
 174                 return\r
 175 \r
 176             # Now, lets do one translator at a time\r
 177             transgroup = libxml2.newNode("authorgroup")\r
 178             lines = translators.split("\n")\r
 179             for line in lines:\r
 180                 line = line.strip()\r
 181                 match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?$", line)\r
 182                 if match:\r
 183                     last = self._find_lastcopyright(ai)\r
 184                     copy = libxml2.newNode("othercredit")\r
 185                     if last:\r
 186                         copy = last.addNextSibling(copy)\r
 187                     else:\r
 188                         transgroup.addChild(copy)\r
 189                         ai.addChild(transgroup)\r
 190                     copy.newChild(None, "contrib", translation.encode('utf-8'))\r
 191                     if match.group(1) and match.group(2):\r
 192                         holder = match.group(1)+"(%s)" % match.group(2)\r
 193                     elif match.group(1):\r
 194                         holder = match.group(1)\r
 195                     elif match.group(2):\r
 196                         holder = match.group(2)\r
 197                     else:\r
 198                         holder = "???"\r
 199                     copy.newChild(None, "othername", holder.encode('utf-8'))\r
 200 \r
 201 # Perform some tests when ran standalone\r
 202 if __name__ == '__main__':\r
 203     test = docbookXmlMode()\r
 204     print "Ignored tags       : " + repr(test.getIgnoredTags())\r
 205     print "Final tags         : " + repr(test.getFinalTags())\r
 206     print "Space-preserve tags: " + repr(test.getSpacePreserveTags())\r
 207 \r
 208     print "Credits from string: '%s'" % test.getStringForTranslators()\r
 209     print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()\r
 210     \r
 211     print "String for translation: '%s'" % test.getStringForTranslation()\r
 212     print "Explanation for translation:\n\t'%s'" % test.getCommentForTranslation()\r
 213     \r