libjava/contrib/aotcompile.py.in

   1 # -*- python -*-
   2
   3 ## Copyright (C) 2005, 2006, 2008 Free Software Foundation
   4 ## Written by Gary Benson <gbenson@redhat.com>
   5 ##
   6 ## This program is free software; you can redistribute it and/or modify
   7 ## it under the terms of the GNU General Public License as published by
   8 ## the Free Software Foundation; either version 2 of the License, or
   9 ## (at your option) any later version.
  10 ##
  11 ## This program is distributed in the hope that it will be useful,
  12 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 ## GNU General Public License for more details.
  15
  16 import classfile
  17 import copy
  18 # The md5 module is deprecated in Python 2.5
  19 try:
  20     from hashlib import md5
  21 except ImportError:
  22     from md5 import md5
  23 import operator
  24 import os
  25 import sys
  26 import cStringIO as StringIO
  27 import zipfile
  28
  29 PATHS = {"make":   "@MAKE@",
  30          "gcj":    "@prefix@/bin/gcj@gcc_suffix@",
  31          "dbtool": "@prefix@/bin/gcj-dbtool@gcc_suffix@"}
  32
  33 MAKEFLAGS = []
  34 GCJFLAGS = ["-fPIC", "-findirect-dispatch", "-fjni"]
  35 LDFLAGS = ["-Wl,-Bsymbolic"]
  36
  37 MAX_CLASSES_PER_JAR = 1024
  38 MAX_BYTES_PER_JAR = 1048576
  39
  40 MAKEFILE = "Makefile"
  41
  42 MAKEFILE_HEADER = '''\
  43 GCJ = %(gcj)s
  44 DBTOOL = %(dbtool)s
  45 GCJFLAGS = %(gcjflags)s
  46 LDFLAGS = %(ldflags)s
  47
  48 %%.o: %%.jar
  49         $(GCJ) -c $(GCJFLAGS) $< -o $@
  50
  51 TARGETS = \\
  52 %(targets)s
  53
  54 all: $(TARGETS)'''
  55
  56 MAKEFILE_JOB = '''
  57 %(base)s_SOURCES = \\
  58 %(jars)s
  59
  60 %(base)s_OBJECTS = \\
  61 $(%(base)s_SOURCES:.jar=.o)
  62
  63 %(dso)s: $(%(base)s_OBJECTS)
  64         $(GCJ) -shared $(GCJFLAGS) $(LDFLAGS) $^ -o $@
  65
  66 %(db)s: $(%(base)s_SOURCES)
  67         $(DBTOOL) -n $@ 64
  68         for jar in $^; do \\
  69             $(DBTOOL) -f $@ $$jar \\
  70                 %(libdir)s/%(dso)s; \\
  71         done'''
  72
  73 ZIPMAGIC, CLASSMAGIC = "PK\x03\x04", "\xca\xfe\xba\xbe"
  74
  75 class Error(Exception):
  76     pass
  77
  78 class Compiler:
  79     def __init__(self, srcdir, libdir, prefix = None):
  80         self.srcdir = os.path.abspath(srcdir)
  81         self.libdir = os.path.abspath(libdir)
  82         if prefix is None:
  83             self.dstdir = self.libdir
  84         else:
  85             self.dstdir = os.path.join(prefix, self.libdir.lstrip(os.sep))
  86
  87         # Calling code may modify these parameters
  88         self.gcjflags = copy.copy(GCJFLAGS)
  89         self.ldflags = copy.copy(LDFLAGS)
  90         self.makeflags = copy.copy(MAKEFLAGS)
  91         self.exclusions = []
  92
  93     def compile(self):
  94         """Search srcdir for classes and jarfiles, then generate
  95         solibs and mappings databases for them all in libdir."""
  96         if not os.path.isdir(self.dstdir):
  97             os.makedirs(self.dstdir)
  98         oldcwd = os.getcwd()
  99         os.chdir(self.dstdir)
 100         try:
 101             jobs = self.getJobList()
 102             if not jobs:
 103                 raise Error, "nothing to do"
 104             self.writeMakefile(MAKEFILE, jobs)
 105             for job in jobs:
 106                 job.writeJars()
 107             system([PATHS["make"]] + self.makeflags)
 108             for job in jobs:
 109                 job.clean()
 110             os.unlink(MAKEFILE)
 111         finally:
 112             os.chdir(oldcwd)
 113
 114     def getJobList(self):
 115         """Return all jarfiles and class collections in srcdir."""
 116         jobs = weed_jobs(find_jobs(self.srcdir, self.exclusions))
 117         set_basenames(jobs)
 118         return jobs
 119
 120     def writeMakefile(self, path, jobs):
 121         """Generate a makefile to build the solibs and mappings
 122         databases for the specified list of jobs."""
 123         fp = open(path, "w")
 124         print >>fp, MAKEFILE_HEADER % {
 125             "gcj": PATHS["gcj"],
 126             "dbtool": PATHS["dbtool"],
 127             "gcjflags": " ".join(self.gcjflags),
 128             "ldflags": " ".join(self.ldflags),
 129             "targets": " \\\n".join(reduce(operator.add, [
 130                 (job.dsoName(), job.dbName()) for job in jobs]))}
 131         for job in jobs:
 132             values = job.ruleArguments()
 133             values["libdir"] = self.libdir
 134             print >>fp, MAKEFILE_JOB % values
 135         fp.close()
 136
 137 def find_jobs(dir, exclusions = ()):
 138     """Scan a directory and find things to compile: jarfiles (zips,
 139     wars, ears, rars, etc: we go by magic rather than file extension)
 140     and directories of classes."""
 141     def visit((classes, zips), dir, items):
 142         for item in items:
 143             path = os.path.join(dir, item)
 144             if os.path.islink(path) or not os.path.isfile(path):
 145                 continue
 146             magic = open(path, "r").read(4)
 147             if magic == ZIPMAGIC:
 148                 zips.append(path)
 149             elif magic == CLASSMAGIC:
 150                 classes.append(path)
 151     classes, paths = [], []
 152     os.path.walk(dir, visit, (classes, paths))
 153     # Convert the list of classes into a list of directories
 154     while classes:
 155         # XXX this requires the class to be correctly located in its heirachy.
 156         path = classes[0][:-len(os.sep + classname(classes[0]) + ".class")]
 157         paths.append(path)
 158         classes = [cls for cls in classes if not cls.startswith(path)]
 159     # Handle exclusions.  We're really strict about them because the
 160     # option is temporary in aot-compile-rpm and dead options left in
 161     # specfiles will hinder its removal.
 162     for path in exclusions:
 163         if path in paths:
 164             paths.remove(path)
 165         else:
 166             raise Error, "%s: path does not exist or is not a job" % path
 167     # Build the list of jobs
 168     jobs = []
 169     paths.sort()
 170     for path in paths:
 171         if os.path.isfile(path):
 172             job = JarJob(path)
 173         else:
 174             job = DirJob(path)
 175         if len(job.classes):
 176             jobs.append(job)
 177     return jobs
 178
 179 class Job:
 180     """A collection of classes that will be compiled as a unit."""
 181
 182     def __init__(self, path):
 183         self.path, self.classes, self.blocks = path, {}, None
 184         self.classnames = {}
 185
 186     def addClass(self, bytes, name):
 187         """Subclasses call this from their __init__ method for
 188         every class they find."""
 189         digest = md5(bytes).digest()
 190         self.classes[digest] = bytes
 191         self.classnames[digest] = name
 192
 193     def __makeBlocks(self):
 194         """Split self.classes into chunks that can be compiled to
 195         native code by gcj.  In the majority of cases this is not
 196         necessary -- the job will have come from a jarfile which will
 197         be equivalent to the one we generate -- but this only happens
 198         _if_ the job was a jarfile and _if_ the jarfile isn't too big
 199         and _if_ the jarfile has the correct extension and _if_ all
 200         classes are correctly named and _if_ the jarfile has no
 201         embedded jarfiles.  Fitting a special case around all these
 202         conditions is tricky to say the least.
 203
 204         Note that this could be called at the end of each subclass's
 205         __init__ method.  The reason this is not done is because we
 206         need to parse every class file.  This is slow, and unnecessary
 207         if the job is subsetted."""
 208         names = {}
 209         for hash, bytes in self.classes.items():
 210             try:
 211                 name = classname(bytes)
 212             except:
 213                 warn("job %s: class %s malformed or not a valid class file" \
 214                      % (self.path, self.classnames[hash]))
 215                 raise
 216             if not names.has_key(name):
 217                 names[name] = []
 218             names[name].append(hash)
 219         names = names.items()
 220         # We have to sort somehow, or the jars we generate
 221         # We sort by name in a simplistic attempt to keep related
 222         # classes together so inter-class optimisation can happen.
 223         names.sort()
 224         self.blocks, bytes = [[]], 0
 225         for name, hashes in names:
 226             for hash in hashes:
 227                 if len(self.blocks[-1]) >= MAX_CLASSES_PER_JAR \
 228                    or bytes >= MAX_BYTES_PER_JAR:
 229                     self.blocks.append([])
 230                     bytes = 0
 231                 self.blocks[-1].append((name, hash))
 232                 bytes += len(self.classes[hash])
 233
 234     # From Archit Shah:
 235     #   The implementation and the documentation don't seem to match.
 236     #
 237     #    [a, b].isSubsetOf([a]) => True
 238     #
 239     #   Identical copies of all classes this collection do not exist
 240     #   in the other. I think the method should be named isSupersetOf
 241     #   and the documentation should swap uses of "this" and "other"
 242     #
 243     # XXX think about this when I've had more sleep...
 244     def isSubsetOf(self, other):
 245         """Returns True if identical copies of all classes in this
 246         collection exist in the other."""
 247         for item in other.classes.keys():
 248             if not self.classes.has_key(item):
 249                 return False
 250         return True
 251
 252     def __targetName(self, ext):
 253         return self.basename + ext
 254
 255     def tempJarName(self, num):
 256         return self.__targetName(".%d.jar" % (num + 1))
 257
 258     def tempObjName(self, num):
 259         return self.__targetName(".%d.o" % (num + 1))
 260
 261     def dsoName(self):
 262         """Return the filename of the shared library that will be
 263         built from this job."""
 264         return self.__targetName(".so")
 265
 266     def dbName(self):
 267         """Return the filename of the mapping database that will be
 268         built from this job."""
 269         return self.__targetName(".db")
 270
 271     def ruleArguments(self):
 272         """Return a dictionary of values that when substituted
 273         into MAKEFILE_JOB will create the rules required to build
 274         the shared library and mapping database for this job."""
 275         if self.blocks is None:
 276             self.__makeBlocks()
 277         return {
 278             "base": "".join(
 279                 [c.isalnum() and c or "_" for c in self.dsoName()]),
 280             "jars": " \\\n".join(
 281                 [self.tempJarName(i) for i in xrange(len(self.blocks))]),
 282             "dso": self.dsoName(),
 283             "db": self.dbName()}
 284
 285     def writeJars(self):
 286         """Generate jarfiles that can be native compiled by gcj."""
 287         if self.blocks is None:
 288             self.__makeBlocks()
 289         for block, i in zip(self.blocks, xrange(len(self.blocks))):
 290             jar = zipfile.ZipFile(self.tempJarName(i), "w", zipfile.ZIP_STORED)
 291             for name, hash in block:
 292                 jar.writestr(
 293                     zipfile.ZipInfo("%s.class" % name), self.classes[hash])
 294             jar.close()
 295
 296     def clean(self):
 297         """Delete all temporary files created during this job's build."""
 298         if self.blocks is None:
 299             self.__makeBlocks()
 300         for i in xrange(len(self.blocks)):
 301             os.unlink(self.tempJarName(i))
 302             os.unlink(self.tempObjName(i))
 303
 304 class JarJob(Job):
 305     """A Job whose origin was a jarfile."""
 306
 307     def __init__(self, path):
 308         Job.__init__(self, path)
 309         self._walk(zipfile.ZipFile(path, "r"))
 310
 311     def _walk(self, zf):
 312         for name in zf.namelist():
 313             bytes = zf.read(name)
 314             if bytes.startswith(ZIPMAGIC):
 315                 self._walk(zipfile.ZipFile(StringIO.StringIO(bytes)))
 316             elif bytes.startswith(CLASSMAGIC):
 317                 self.addClass(bytes, name)
 318
 319 class DirJob(Job):
 320     """A Job whose origin was a directory of classfiles."""
 321
 322     def __init__(self, path):
 323         Job.__init__(self, path)
 324         os.path.walk(path, DirJob._visit, self)
 325
 326     def _visit(self, dir, items):
 327         for item in items:
 328             path = os.path.join(dir, item)
 329             if os.path.islink(path) or not os.path.isfile(path):
 330                 continue
 331             fp = open(path, "r")
 332             magic = fp.read(4)
 333             if magic == CLASSMAGIC:
 334                 self.addClass(magic + fp.read(), name)
 335
 336 def weed_jobs(jobs):
 337     """Remove any jarfiles that are completely contained within
 338     another.  This is more common than you'd think, and we only
 339     need one nativified copy of each class after all."""
 340     jobs = copy.copy(jobs)
 341     while True:
 342         for job1 in jobs:
 343             for job2 in jobs:
 344                 if job1 is job2:
 345                     continue
 346                 if job1.isSubsetOf(job2):
 347                     msg = "subsetted %s" % job2.path
 348                     if job2.isSubsetOf(job1):
 349                         if (isinstance(job1, DirJob) and
 350                             isinstance(job2, JarJob)):
 351                             # In the braindead case where a package
 352                             # contains an expanded copy of a jarfile
 353                             # the jarfile takes precedence.
 354                             continue
 355                         msg += " (identical)"
 356                     warn(msg)
 357                     jobs.remove(job2)
 358                     break
 359             else:
 360                 continue
 361             break
 362         else:
 363             break
 364         continue
 365     return jobs
 366
 367 def set_basenames(jobs):
 368     """Ensure that each jarfile has a different basename."""
 369     names = {}
 370     for job in jobs:
 371         name = os.path.basename(job.path)
 372         if not names.has_key(name):
 373             names[name] = []
 374         names[name].append(job)
 375     for name, set in names.items():
 376         if len(set) == 1:
 377             set[0].basename = name
 378             continue
 379         # prefix the jar filenames to make them unique
 380         # XXX will not work in most cases -- needs generalising
 381         set = [(job.path.split(os.sep), job) for job in set]
 382         minlen = min([len(bits) for bits, job in set])
 383         set = [(bits[-minlen:], job) for bits, job in set]
 384         bits = apply(zip, [bits for bits, job in set])
 385         while True:
 386             row = bits[-2]
 387             for bit in row[1:]:
 388                 if bit != row[0]:
 389                     break
 390             else:
 391                 del bits[-2]
 392                 continue
 393             break
 394         set = zip(
 395             ["_".join(name) for name in apply(zip, bits[-2:])],
 396             [job for bits, job in set])
 397         for name, job in set:
 398             warn("building %s as %s" % (job.path, name))
 399             job.basename = name
 400     # XXX keep this check until we're properly general
 401     names = {}
 402     for job in jobs:
 403         name = job.basename
 404         if names.has_key(name):
 405             raise Error, "%s: duplicate jobname" % name
 406         names[name] = 1
 407
 408 def system(command):
 409     """Execute a command."""
 410     status = os.spawnv(os.P_WAIT, command[0], command)
 411     if status > 0:
 412         raise Error, "%s exited with code %d" % (command[0], status)
 413     elif status < 0:
 414         raise Error, "%s killed by signal %d" % (command[0], -status)
 415
 416 def warn(msg):
 417     """Print a warning message."""
 418     print >>sys.stderr, "%s: warning: %s" % (
 419         os.path.basename(sys.argv[0]), msg)
 420
 421 def classname(bytes):
 422     """Extract the class name from the bytes of a class file."""
 423     klass = classfile.Class(bytes)
 424     return klass.constants[klass.constants[klass.name][1]][1]