1 # -*- coding: utf-8 -*-
\r
2 # Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
\r
4 # This file is part of xml2po.
\r
6 # xml2po is free software; you can redistribute it and/or modify
\r
7 # it under the terms of the GNU General Public License as published by
\r
8 # the Free Software Foundation; either version 2 of the License, or
\r
9 # (at your option) any later version.
\r
11 # xml2po is distributed in the hope that it will be useful,
\r
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 # GNU General Public License for more details.
\r
16 # You should have received a copy of the GNU General Public License
\r
17 # along with xml2po; if not, write to the Free Software Foundation, Inc.,
\r
18 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 # This implements special instructions for handling DocBook XML documents
\r
25 # — better handling of nested complicated tags (i.e. definitions of
\r
26 # ignored-tags and final-tags)
\r
27 # — support for merging translator-credits back into DocBook articles
\r
28 # — support for setting a language
\r
31 # We use "currentXmlMode" class name for all modes
\r
32 # -- it might be better to have it named docbookXmlMode, but it will make loading harder;
\r
33 # it is also not necessary until we start supporting extracting strings from more
\r
34 # than one document type at the same time
\r
42 class docbookXmlMode:
\r
43 """Class for special handling of DocBook document types.
\r
45 It sets lang attribute on article elements, and adds translators
\r
46 to articleinfo/copyright."""
\r
48 self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
\r
49 'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
\r
50 self.objects = [ 'table', 'figure', 'textobject', 'imageobject', 'mediaobject',
\r
53 def getIgnoredTags(self):
\r
54 "Returns array of tags to be ignored."
\r
55 return self.objects + self.lists
\r
57 def getFinalTags(self):
\r
58 "Returns array of tags to be considered 'final'."
\r
59 return ['para', 'formalpara', 'simpara',
\r
60 'releaseinfo', 'revnumber', 'title',
\r
61 'date', 'term', 'programlisting'] + self.objects + self.lists
\r
63 def getSpacePreserveTags(self):
\r
64 "Returns array of tags in which spaces are to be preserved."
\r
66 'classsynopsisinfo',
\r
76 def getStringForTranslators(self):
\r
77 """Returns string which will be used to credit translators."""
\r
78 return "translator-credits"
\r
80 def getCommentForTranslators(self):
\r
81 """Returns a comment to be added next to string for crediting translators."""
\r
82 return """Put one translator per line, in the form of NAME <EMAIL>."""
\r
84 def getStringForTranslation(self):
\r
85 """Returns translation of 'translation'."""
\r
86 return "translator-translation"
\r
88 def getCommentForTranslation(self):
\r
89 """Returns a string that explains how 'translation' is to be translated."""
\r
90 return """Place the translation of 'translation' here."""
\r
92 def _find_articleinfo(self, node):
\r
93 if node.name == 'articleinfo' or node.name == 'bookinfo':
\r
95 child = node.children
\r
97 ret = self._find_articleinfo(child)
\r
103 def _find_lastcopyright(self, node):
\r
104 if not node.children:
\r
106 last = node.lastChild()
\r
109 if tmp.name == "copyright":
\r
115 def _md5_for_file(self, filename):
\r
117 input = open(filename, "rb")
\r
118 read = input.read(4096)
\r
121 read = input.read(4096)
\r
123 return hash.hexdigest()
\r
125 def _output_images(self, node, msg):
\r
126 if node and node.type=='element' and node.name=='imagedata':
\r
127 # Use .fileref to construct new message
\r
128 attr = node.prop("fileref")
\r
130 dir = os.path.dirname(msg.filename)
\r
131 fullpath = os.path.join(dir, attr)
\r
132 if os.path.exists(fullpath):
\r
133 hash = self._md5_for_file(fullpath)
\r
135 hash = "THIS FILE DOESN'T EXIST"
\r
136 print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
\r
138 msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
\r
139 "When image changes, this message will be marked fuzzy or untranslated for you.\n"+
\r
140 "It doesn't matter what you translate it to: it's not used at all.")
\r
141 elif node and node.children:
\r
142 child = node.children
\r
144 self._output_images(child,msg)
\r
148 def preProcessXml(self, doc, msg):
\r
149 """Add additional messages of interest here."""
\r
150 root = doc.getRootElement()
\r
151 self._output_images(root,msg)
\r
153 def postProcessXmlTranslation(self, doc, language, translators, translation):
\r
154 """Sets a language and translators in "doc" tree.
\r
156 "translators" is a string consisted of "Name <email>" pairs
\r
157 of each translator, separated by newlines."""
\r
159 root = doc.getRootElement()
\r
160 # DocBook documents can be something other than article, handle that as well in the future
\r
161 while root and root.name != 'article' and root.name != 'book':
\r
163 if root and (root.name == 'article' or root.name == 'book'):
\r
164 root.setProp('lang', language)
\r
168 if translators == self.getStringForTranslators():
\r
171 # Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
\r
172 ai = self._find_articleinfo(root)
\r
176 # Now, lets do one translator at a time
\r
177 transgroup = libxml2.newNode("authorgroup")
\r
178 lines = translators.split("\n")
\r
180 line = line.strip()
\r
181 match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?$", line)
\r
183 last = self._find_lastcopyright(ai)
\r
184 copy = libxml2.newNode("othercredit")
\r
186 copy = last.addNextSibling(copy)
\r
188 transgroup.addChild(copy)
\r
189 ai.addChild(transgroup)
\r
190 copy.newChild(None, "contrib", translation.encode('utf-8'))
\r
191 if match.group(1) and match.group(2):
\r
192 holder = match.group(1)+"(%s)" % match.group(2)
\r
193 elif match.group(1):
\r
194 holder = match.group(1)
\r
195 elif match.group(2):
\r
196 holder = match.group(2)
\r
199 copy.newChild(None, "othername", holder.encode('utf-8'))
\r
201 # Perform some tests when ran standalone
\r
202 if __name__ == '__main__':
\r
203 test = docbookXmlMode()
\r
204 print "Ignored tags : " + repr(test.getIgnoredTags())
\r
205 print "Final tags : " + repr(test.getFinalTags())
\r
206 print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
\r
208 print "Credits from string: '%s'" % test.getStringForTranslators()
\r
209 print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
\r
211 print "String for translation: '%s'" % test.getStringForTranslation()
\r
212 print "Explanation for translation:\n\t'%s'" % test.getCommentForTranslation()
\r