// verify.cc - verify bytecode
-/* Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation
+/* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation
This file is part of libgcj.
#include <config.h>
+#include <string.h>
+
#include <jvm.h>
#include <gcj/cni.h>
#include <java-insns.h>
#include <java-interp.h>
+// On Solaris 10/x86, <signal.h> indirectly includes <ia32/sys/reg.h>, which
+// defines PC since g++ predefines __EXTENSIONS__. Undef here to avoid clash
+// with PC member of class _Jv_BytecodeVerifier below.
+#undef PC
+
#ifdef INTERPRETER
#include <java/lang/Class.h>
#include <java/lang/Throwable.h>
#include <java/lang/reflect/Modifier.h>
#include <java/lang/StringBuffer.h>
+#include <java/lang/NoClassDefFoundError.h>
#ifdef VERIFY_DEBUG
#include <stdio.h>
__attribute__ ((format (printf, 1, 2)));
static inline void
-debug_print (const char *fmt, ...)
+debug_print (MAYBE_UNUSED const char *fmt, ...)
{
#ifdef VERIFY_DEBUG
va_list ap;
// subroutine is exited via `goto' or `athrow' and not `ret'.
//
// In some other areas the JVM specification is (mildly) incorrect,
-// but we still implement what is specified. For instance, you cannot
+// so we diverge. For instance, you cannot
// violate type safety by allocating an object with `new' and then
// failing to initialize it, no matter how one branches or where one
// stores the uninitialized reference. See "Improving the official
// specification of Java bytecode verification" by Alessandro Coglio.
-// Similarly, there's no real point in enforcing that padding bytes or
-// the mystery byte of invokeinterface must be 0, but we do that too.
+//
+// Note that there's no real point in enforcing that padding bytes or
+// the mystery byte of invokeinterface must be 0, but we do that
+// regardless.
//
// The verifier is currently neither completely lazy nor eager when it
// comes to loading classes. It tries to represent types by name when
// This method.
_Jv_InterpMethod *current_method;
- // A linked list of utf8 objects we allocate. This is really ugly,
- // but without this our utf8 objects would be collected.
+ // A linked list of utf8 objects we allocate.
linked<_Jv_Utf8Const> *utf8_list;
// A linked list of all ref_intersection objects we allocate.
ref_intersection *isect_list;
// Create a new Utf-8 constant and return it. We do this to avoid
- // having our Utf-8 constants prematurely collected. FIXME this is
- // ugly.
+ // having our Utf-8 constants prematurely collected.
_Jv_Utf8Const *make_utf8_const (char *s, int len)
{
- _Jv_Utf8Const *val = _Jv_makeUtf8Const (s, len);
- _Jv_Utf8Const *r = (_Jv_Utf8Const *) _Jv_Malloc (sizeof (_Jv_Utf8Const)
- + val->length
- + 1);
- r->length = val->length;
- r->hash = val->hash;
- memcpy (r->data, val->data, val->length + 1);
-
- linked<_Jv_Utf8Const> *lu
- = (linked<_Jv_Utf8Const> *) _Jv_Malloc (sizeof (linked<_Jv_Utf8Const>));
+ linked<_Jv_Utf8Const> *lu = (linked<_Jv_Utf8Const> *)
+ _Jv_Malloc (sizeof (linked<_Jv_Utf8Const>)
+ + _Jv_Utf8Const::space_needed(s, len));
+ _Jv_Utf8Const *r = (_Jv_Utf8Const *) (lu + 1);
+ r->init(s, len);
lu->val = r;
lu->next = utf8_list;
utf8_list = lu;
return r;
}
- __attribute__ ((__noreturn__)) void verify_fail (char *s, jint pc = -1)
+ __attribute__ ((__noreturn__)) void verify_fail (const char *s, jint pc = -1)
{
using namespace java::lang;
StringBuffer *buf = new StringBuffer ();
buf->append (JvNewStringLatin1 (" in "));
buf->append (current_class->getName());
buf->append ((jchar) ':');
- buf->append (JvNewStringUTF (method->get_method()->name->data));
+ buf->append (method->get_method()->name->toString());
buf->append ((jchar) '(');
- buf->append (JvNewStringUTF (method->get_method()->signature->data));
+ buf->append (method->get_method()->signature->toString());
buf->append ((jchar) ')');
buf->append (JvNewStringLatin1 (": "));
bool equals (ref_intersection *other, _Jv_BytecodeVerifier *verifier)
{
if (! is_resolved && ! other->is_resolved
- && _Jv_equalUtf8Consts (data.name, other->data.name))
+ && _Jv_equalUtf8Classnames (data.name, other->data.name))
return true;
if (! is_resolved)
resolve (verifier);
if (is_resolved)
return;
+ // This is useful if you want to see which classes have to be resolved
+ // while doing the class verification.
+ debug_print("resolving class: %s\n", data.name->chars());
+
using namespace java::lang;
java::lang::ClassLoader *loader
= verifier->current_class->getClassLoaderInternal();
- // We might see either kind of name. Sigh.
- if (data.name->data[0] == 'L'
- && data.name->data[data.name->length - 1] == ';')
- data.klass = _Jv_FindClassFromSignature (data.name->data, loader);
+
+ // Due to special handling in to_array() array classes will always
+ // be of the "L ... ;" kind. The separator char ('.' or '/' may vary
+ // however.
+ if (data.name->limit()[-1] == ';')
+ {
+ data.klass = _Jv_FindClassFromSignature (data.name->chars(), loader);
+ if (data.klass == NULL)
+ throw new java::lang::NoClassDefFoundError(data.name->toString());
+ }
else
data.klass = Class::forName (_Jv_NewStringUtf8Const (data.name),
false, loader);
// Avoid resolving if possible.
if (! self->is_resolved
&& ! other_iter->is_resolved
- && _Jv_equalUtf8Consts (self->data.name,
- other_iter->data.name))
+ && _Jv_equalUtf8Classnames (self->data.name,
+ other_iter->data.name))
continue;
if (! self->is_resolved)
self->resolve(verifier);
+
+ // If the LHS of the expression is of type
+ // java.lang.Object, assignment will succeed, no matter
+ // what the type of the RHS is. Using this short-cut we
+ // don't need to resolve the class of the RHS at
+ // verification time.
+ if (self->data.klass == &java::lang::Object::class$)
+ continue;
+
if (! other_iter->is_resolved)
other_iter->resolve(verifier);
if (is_resolved)
return data.klass->isArray ();
else
- return data.name->data[0] == '[';
+ return data.name->first() == '[';
}
bool isinterface (_Jv_BytecodeVerifier *verifier)
}
else
{
- char *p = data.name->data;
+ char *p = data.name->chars();
while (*p++ == '[')
++ndims;
}
{
// We use a recursive call because we also need to
// check superinterfaces.
- if (is_assignable_from_slow (target, source->interfaces[i]))
+ if (is_assignable_from_slow (target, source->getInterface (i)))
return true;
}
}
//
// First, when constructing a new object, it is the PC of the
// `new' instruction which created the object. We use the special
- // value UNINIT to mean that this is uninitialized, and the
- // special value SELF for the case where the current method is
- // itself the <init> method.
+ // value UNINIT to mean that this is uninitialized. The special
+ // value SELF is used for the case where the current method is
+ // itself the <init> method. the special value EITHER is used
+ // when we may optionally allow either an uninitialized or
+ // initialized reference to match.
//
// Second, when the key is return_address_type, this holds the PC
// of the instruction following the `jsr'.
static const int UNINIT = -2;
static const int SELF = -1;
+ static const int EITHER = -3;
// Basic constructor.
type ()
if (k.klass == NULL)
verifier->verify_fail ("programmer error in type::compatible");
- // An initialized type and an uninitialized type are not
- // compatible.
- if (isinitialized () != k.isinitialized ())
- return false;
-
- // Two uninitialized objects are compatible if either:
- // * The PCs are identical, or
- // * One PC is UNINIT.
- if (! isinitialized ())
+ // Handle the special 'EITHER' case, which is only used in a
+ // special case of 'putfield'. Note that we only need to handle
+ // this on the LHS of a check.
+ if (! isinitialized () && pc == EITHER)
{
- if (pc != k.pc && pc != UNINIT && k.pc != UNINIT)
+ // If the RHS is uninitialized, it must be an uninitialized
+ // 'this'.
+ if (! k.isinitialized () && k.pc != SELF)
return false;
}
+ else if (isinitialized () != k.isinitialized ())
+ {
+ // An initialized type and an uninitialized type are not
+ // otherwise compatible.
+ return false;
+ }
+ else
+ {
+ // Two uninitialized objects are compatible if either:
+ // * The PCs are identical, or
+ // * One PC is UNINIT.
+ if (! isinitialized ())
+ {
+ if (pc != k.pc && pc != UNINIT && k.pc != UNINIT)
+ return false;
+ }
+ }
return klass->compatible(k.klass, verifier);
}
+ bool equals (const type &other, _Jv_BytecodeVerifier *vfy)
+ {
+ // Only works for reference types.
+ if ((key != reference_type
+ && key != uninitialized_reference_type)
+ || (other.key != reference_type
+ && other.key != uninitialized_reference_type))
+ return false;
+ // Only for single-valued types.
+ if (klass->ref_next || other.klass->ref_next)
+ return false;
+ return klass->equals (other.klass, vfy);
+ }
+
bool isvoid () const
{
return key == void_type;
if (key != reference_type)
verifier->verify_fail ("internal error in type::to_array()");
- jclass k = klass->getclass (verifier);
- return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
- verifier);
+ // In case the class is already resolved we can simply ask the runtime
+ // to give us the array version.
+ // If it is not resolved we prepend "[" to the classname to make the
+ // array usage verification more lazy. In other words: makes new Foo[300]
+ // pass the verifier if Foo.class is missing.
+ if (klass->is_resolved)
+ {
+ jclass k = klass->getclass (verifier);
+
+ return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
+ verifier);
+ }
+ else
+ {
+ int len = klass->data.name->len();
+
+ // If the classname is given in the Lp1/p2/cn; format we only need
+ // to add a leading '['. The same procedure has to be done for
+ // primitive arrays (ie. provided "[I", the result should be "[[I".
+ // If the classname is given as p1.p2.cn we have to embed it into
+ // "[L" and ';'.
+ if (klass->data.name->limit()[-1] == ';' ||
+ _Jv_isPrimitiveOrDerived(klass->data.name))
+ {
+ // Reserves space for leading '[' and trailing '\0' .
+ char arrayName[len + 2];
+
+ arrayName[0] = '[';
+ strcpy(&arrayName[1], klass->data.name->chars());
+
+#ifdef VERIFY_DEBUG
+ // This is only needed when we want to print the string to the
+ // screen while debugging.
+ arrayName[len + 1] = '\0';
+
+ debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+ return type (verifier->make_utf8_const( arrayName, len + 1 ),
+ verifier);
+ }
+ else
+ {
+ // Reserves space for leading "[L" and trailing ';' and '\0' .
+ char arrayName[len + 4];
+
+ arrayName[0] = '[';
+ arrayName[1] = 'L';
+ strcpy(&arrayName[2], klass->data.name->chars());
+ arrayName[len + 2] = ';';
+
+#ifdef VERIFY_DEBUG
+ // This is only needed when we want to print the string to the
+ // screen while debugging.
+ arrayName[len + 3] = '\0';
+
+ debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+ return type (verifier->make_utf8_const( arrayName, len + 3 ),
+ verifier);
+ }
+ }
+
}
bool isreference () const
return changed;
}
- // Throw an exception if there is an uninitialized object on the
- // stack or in a local variable. EXCEPTION_SEMANTICS controls
- // whether we're using backwards-branch or exception-handing
- // semantics.
- void check_no_uninitialized_objects (int max_locals,
- _Jv_BytecodeVerifier *verifier,
- bool exception_semantics = false)
- {
- if (! exception_semantics)
- {
- for (int i = 0; i < stacktop; ++i)
- if (stack[i].isreference () && ! stack[i].isinitialized ())
- verifier->verify_fail ("uninitialized object on stack");
- }
-
- for (int i = 0; i < max_locals; ++i)
- if (locals[i].isreference () && ! locals[i].isinitialized ())
- verifier->verify_fail ("uninitialized object in local variable");
-
- check_this_initialized (verifier);
- }
-
// Ensure that `this' has been initialized.
void check_this_initialized (_Jv_BytecodeVerifier *verifier)
{
void push_jump (int offset)
{
int npc = compute_jump (offset);
- if (npc < PC)
- current_state->check_no_uninitialized_objects (current_method->max_locals, this);
+ // According to the JVM Spec, we need to check for uninitialized
+ // objects here. However, this does not actually affect type
+ // safety, and the Eclipse java compiler generates code that
+ // violates this constraint.
merge_into (npc, current_state);
}
void push_exception_jump (type t, int pc)
{
- current_state->check_no_uninitialized_objects (current_method->max_locals,
- this, true);
+ // According to the JVM Spec, we need to check for uninitialized
+ // objects here. However, this does not actually affect type
+ // safety, and the Eclipse java compiler generates code that
+ // violates this constraint.
state s (current_state, current_method->max_stack,
current_method->max_locals);
if (current_method->max_stack < 1)
if (npc >= current_method->code_length)
verify_fail ("fell off end");
- if (npc < PC)
- current_state->check_no_uninitialized_objects (current_method->max_locals,
- this);
+ // According to the JVM Spec, we need to check for uninitialized
+ // objects here. However, this does not actually affect type
+ // safety, and the Eclipse java compiler generates code that
+ // violates this constraint.
merge_into (npc, current_state);
invalidate_pc ();
}
{
int npc = compute_jump (offset);
- if (npc < PC)
- current_state->check_no_uninitialized_objects (current_method->max_locals, this);
+ // According to the JVM Spec, we need to check for uninitialized
+ // objects here. However, this does not actually affect type
+ // safety, and the Eclipse java compiler generates code that
+ // violates this constraint.
// Modify our state as appropriate for entry into a subroutine.
type ret_addr (return_address_type);
case op_getstatic_4:
case op_getstatic_8:
case op_getstatic_a:
+ case op_breakpoint:
default:
verify_fail ("unrecognized instruction in branch_prepass",
start_PC);
{
check_pool_index (index);
_Jv_Constants *pool = ¤t_class->constants;
- if (pool->tags[index] == JV_CONSTANT_ResolvedString
- || pool->tags[index] == JV_CONSTANT_String)
+ int tag = pool->tags[index];
+ if (tag == JV_CONSTANT_ResolvedString || tag == JV_CONSTANT_String)
return type (&java::lang::String::class$, this);
- else if (pool->tags[index] == JV_CONSTANT_Integer)
+ else if (tag == JV_CONSTANT_Integer)
return type (int_type);
- else if (pool->tags[index] == JV_CONSTANT_Float)
+ else if (tag == JV_CONSTANT_Float)
return type (float_type);
+ else if (current_method->is_15
+ && (tag == JV_CONSTANT_ResolvedClass || tag == JV_CONSTANT_Class))
+ return type (&java::lang::Class::class$, this);
verify_fail ("String, int, or float constant expected", start_PC);
}
}
// Return field's type, compute class' type if requested.
- type check_field_constant (int index, type *class_type = NULL)
+ // If PUTFIELD is true, use the special 'putfield' semantics.
+ type check_field_constant (int index, type *class_type = NULL,
+ bool putfield = false)
{
_Jv_Utf8Const *name, *field_type;
type ct = handle_field_or_method (index,
&name, &field_type);
if (class_type)
*class_type = ct;
- if (field_type->data[0] == '[' || field_type->data[0] == 'L')
- return type (field_type, this);
- return get_type_val_for_signature (field_type->data[0]);
+ type result;
+ if (field_type->first() == '[' || field_type->first() == 'L')
+ result = type (field_type, this);
+ else
+ result = get_type_val_for_signature (field_type->first());
+
+ // We have an obscure special case here: we can use `putfield' on
+ // a field declared in this class, even if `this' has not yet been
+ // initialized.
+ if (putfield
+ && ! current_state->this_type.isinitialized ()
+ && current_state->this_type.pc == type::SELF
+ && current_state->this_type.equals (ct, this)
+ // We don't look at the signature, figuring that if it is
+ // wrong we will fail during linking. FIXME?
+ && _Jv_Linker::has_field_p (current_class, name))
+ // Note that we don't actually know whether we're going to match
+ // against 'this' or some other object of the same type. So,
+ // here we set things up so that it doesn't matter. This relies
+ // on knowing what our caller is up to.
+ class_type->set_uninitialized (type::EITHER, this);
+
+ return result;
}
type check_method_constant (int index, bool is_interface,
void compute_argument_types (_Jv_Utf8Const *signature,
type *types)
{
- char *p = signature->data;
+ char *p = signature->chars();
+
// Skip `('.
++p;
type compute_return_type (_Jv_Utf8Const *signature)
{
- char *p = signature->data;
+ char *p = signature->chars();
while (*p != ')')
++p;
++p;
// We only have to do this checking in the situation where
// control flow falls through from the previous
// instruction. Otherwise merging is done at the time we
- // push the branch.
- if (states[PC] != NULL)
+ // push the branch. Note that we'll catch the
+ // off-the-end problem just below.
+ if (PC < current_method->code_length && states[PC] != NULL)
{
// We've already visited this instruction. So merge
// the states together. It is simplest, but not most
case op_putfield:
{
type klass;
- type field = check_field_constant (get_ushort (), &klass);
+ type field = check_field_constant (get_ushort (), &klass, true);
pop_type (field);
-
- // We have an obscure special case here: we can use
- // `putfield' on a field declared in this class, even if
- // `this' has not yet been initialized.
- if (! current_state->this_type.isinitialized ()
- && current_state->this_type.pc == type::SELF)
- klass.set_uninitialized (type::SELF, this);
pop_type (klass);
}
break;
if (opcode != op_invokespecial)
verify_fail ("can't invoke <init>");
}
- else if (method_name->data[0] == '<')
+ else if (method_name->first() == '<')
verify_fail ("can't invoke method starting with `<'");
// Pop arguments and check types.
case op_new:
{
type t = check_class_constant (get_ushort ());
- if (t.isarray () || t.isinterface (this) || t.isabstract (this))
- verify_fail ("type is array, interface, or abstract");
+ if (t.isarray ())
+ verify_fail ("type is array");
t.set_uninitialized (start_PC, this);
push_type (t);
}
case op_getstatic_4:
case op_getstatic_8:
case op_getstatic_a:
+ case op_breakpoint:
default:
// Unrecognized opcode.
verify_fail ("unrecognized instruction in verify_instructions_0",
// We just print the text as utf-8. This is just for debugging
// anyway.
debug_print ("--------------------------------\n");
- debug_print ("-- Verifying method `%s'\n", m->self->name->data);
+ debug_print ("-- Verifying method `%s'\n", m->self->name->chars());
current_method = m;
bytecode = m->bytecode ();
while (utf8_list != NULL)
{
linked<_Jv_Utf8Const> *n = utf8_list->next;
- _Jv_Free (utf8_list->val);
_Jv_Free (utf8_list);
utf8_list = n;
}