// Written by Tom Tromey <tromey@redhat.com>
+// Define VERIFY_DEBUG to enable debugging output.
+
#include <config.h>
#include <jvm.h>
#include <java/lang/reflect/Modifier.h>
#include <java/lang/StringBuffer.h>
+#ifdef VERIFY_DEBUG
+#include <stdio.h>
+#endif /* VERIFY_DEBUG */
// TO DO
// * read more about when classes must be loaded
static void verify_fail (char *msg, jint pc = -1)
__attribute__ ((__noreturn__));
+static void debug_print (const char *fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+
+static inline void
+debug_print (const char *fmt, ...)
+{
+#ifdef VERIFY_DEBUG
+ va_list ap;
+ va_start (ap, fmt);
+ vfprintf (stderr, fmt, ap);
+ va_end (ap);
+#endif /* VERIFY_DEBUG */
+}
+
class _Jv_BytecodeVerifier
{
private:
static const int FLAG_INSN_START = 1;
static const int FLAG_BRANCH_TARGET = 2;
- static const int FLAG_JSR_TARGET = 4;
struct state;
struct type;
struct subr_info;
+ struct linked_utf8;
// The current PC.
int PC;
// This method.
_Jv_InterpMethod *current_method;
+ // A linked list of utf8 objects we allocate. This is really ugly,
+ // but without this our utf8 objects would be collected.
+ linked_utf8 *utf8_list;
+
+ struct linked_utf8
+ {
+ _Jv_Utf8Const *val;
+ linked_utf8 *next;
+ };
+
+ _Jv_Utf8Const *make_utf8_const (char *s, int len)
+ {
+ _Jv_Utf8Const *val = _Jv_makeUtf8Const (s, len);
+ _Jv_Utf8Const *r = (_Jv_Utf8Const *) _Jv_Malloc (sizeof (_Jv_Utf8Const)
+ + val->length
+ + 1);
+ r->length = val->length;
+ r->hash = val->hash;
+ memcpy (r->data, val->data, val->length + 1);
+
+ linked_utf8 *lu = (linked_utf8 *) _Jv_Malloc (sizeof (linked_utf8));
+ lu->val = r;
+ lu->next = utf8_list;
+ utf8_list = lu;
+
+ return r;
+ }
+
// This enum holds a list of tags for all the different types we
// need to handle. Reference types are treated specially by the
// type class.
return_address_type,
continuation_type,
+ // There is an obscure special case which requires us to note when
+ // a local variable has not been used by a subroutine. See
+ // push_jump_merge for more information.
+ unused_by_subroutine_type,
+
// Everything after `reference_type' must be a reference type.
reference_type,
null_type,
if (target->isPrimitive () || source->isPrimitive ())
return false;
- // _Jv_IsAssignableFrom can handle a target which is an
- // interface even if it hasn't been prepared.
- if ((target->state > JV_STATE_LINKED || target->isInterface ())
- && source->state > JV_STATE_LINKED)
- return _Jv_IsAssignableFrom (target, source);
-
+ // Check array case first because we can have an array whose
+ // component type is not prepared; _Jv_IsAssignableFrom
+ // doesn't handle this correctly.
if (target->isArray ())
{
if (! source->isArray ())
target = target->getComponentType ();
source = source->getComponentType ();
}
+ // _Jv_IsAssignableFrom can handle a target which is an
+ // interface even if it hasn't been prepared.
+ else if ((target->state > JV_STATE_LINKED || target->isInterface ())
+ && source->state > JV_STATE_LINKED)
+ return _Jv_IsAssignableFrom (target, source);
else if (target->isInterface ())
{
for (int i = 0; i < source->interface_count; ++i)
{
if (local_semantics)
{
- key = unsuitable_type;
- changed = true;
+ // If we're merging into an "unused" slot, then we
+ // simply accept whatever we're merging from.
+ if (key == unused_by_subroutine_type)
+ {
+ *this = old_type;
+ changed = true;
+ }
+ else if (old_type.key == unused_by_subroutine_type)
+ {
+ // Do nothing.
+ }
+ // If we already have an `unsuitable' type, then we
+ // don't need to change again.
+ else if (key != unsuitable_type)
+ {
+ key = unsuitable_type;
+ changed = true;
+ }
}
else
verify_fail ("unmergeable type");
}
return changed;
}
+
+#ifdef VERIFY_DEBUG
+ void print (void) const
+ {
+ char c = '?';
+ switch (key)
+ {
+ case boolean_type: c = 'Z'; break;
+ case byte_type: c = 'B'; break;
+ case char_type: c = 'C'; break;
+ case short_type: c = 'S'; break;
+ case int_type: c = 'I'; break;
+ case long_type: c = 'J'; break;
+ case float_type: c = 'F'; break;
+ case double_type: c = 'D'; break;
+ case void_type: c = 'V'; break;
+ case unsuitable_type: c = '-'; break;
+ case return_address_type: c = 'r'; break;
+ case continuation_type: c = '+'; break;
+ case unused_by_subroutine_type: c = '_'; break;
+ case reference_type: c = 'L'; break;
+ case null_type: c = '@'; break;
+ case unresolved_reference_type: c = 'l'; break;
+ case uninitialized_reference_type: c = 'U'; break;
+ case uninitialized_unresolved_reference_type: c = 'u'; break;
+ }
+ debug_print ("%c", c);
+ }
+#endif /* VERIFY_DEBUG */
};
// This class holds all the state information we need for a given
// This is used to keep a linked list of all the states which
// require re-verification. We use the PC to keep track.
int next;
+ // We keep track of the type of `this' specially. This is used to
+ // ensure that an instance initializer invokes another initializer
+ // on `this' before returning. We must keep track of this
+ // specially because otherwise we might be confused by code which
+ // assigns to locals[0] (overwriting `this') and then returns
+ // without really initializing.
+ type this_type;
// INVALID marks a state which is not on the linked list of states
// requiring reverification.
static const int NO_NEXT = -2;
state ()
+ : this_type ()
{
stack = NULL;
locals = NULL;
}
state (int max_stack, int max_locals)
+ : this_type ()
{
stacktop = 0;
stackdepth = 0;
subroutine = 0;
}
- state (const state *orig, int max_stack, int max_locals)
+ state (const state *orig, int max_stack, int max_locals,
+ bool ret_semantics = false)
{
stack = new type[max_stack];
locals = new type[max_locals];
local_changed = (bool *) _Jv_Malloc (sizeof (bool) * max_locals);
- copy (orig, max_stack, max_locals);
+ copy (orig, max_stack, max_locals, ret_semantics);
next = INVALID;
}
_Jv_Free (mem);
}
- void copy (const state *copy, int max_stack, int max_locals)
+ void copy (const state *copy, int max_stack, int max_locals,
+ bool ret_semantics = false)
{
stacktop = copy->stacktop;
stackdepth = copy->stackdepth;
stack[i] = copy->stack[i];
for (int i = 0; i < max_locals; ++i)
{
- locals[i] = copy->locals[i];
+ // See push_jump_merge to understand this case.
+ if (ret_semantics)
+ locals[i] = type (copy->local_changed[i]
+ ? unsuitable_type
+ : unused_by_subroutine_type);
+ else
+ locals[i] = copy->locals[i];
local_changed[i] = copy->local_changed[i];
}
+ this_type = copy->this_type;
// Don't modify `next'.
}
// FIXME: subroutine handling?
}
- // Merge STATE into this state. Destructively modifies this state.
- // Returns true if the new state was in fact changed. Will throw an
- // exception if the states are not mergeable.
+ // Merge STATE_OLD into this state. Destructively modifies this
+ // state. Returns true if the new state was in fact changed.
+ // Will throw an exception if the states are not mergeable.
bool merge (state *state_old, bool ret_semantics,
int max_locals)
{
bool changed = false;
+ // Special handling for `this'. If one or the other is
+ // uninitialized, then the merge is uninitialized.
+ if (this_type.isinitialized ())
+ this_type = state_old->this_type;
+
// Merge subroutine states. *THIS and *STATE_OLD must be in the
// same subroutine. Also, recursive subroutine calls must be
// avoided.
for (int i = 0; i < max_locals; ++i)
if (locals[i].isreference () && ! locals[i].isinitialized ())
verify_fail ("uninitialized object in local variable");
+
+ check_this_initialized ();
}
- // Note that a local variable was accessed or modified.
+ // Ensure that `this' has been initialized.
+ void check_this_initialized ()
+ {
+ if (this_type.isreference () && ! this_type.isinitialized ())
+ verify_fail ("`this' is uninitialized");
+ }
+
+ // Set type of `this'.
+ void set_this_type (const type &k)
+ {
+ this_type = k;
+ }
+
+ // Note that a local variable was modified.
void note_variable (int index)
{
if (subroutine > 0)
stack[i].set_initialized (pc);
for (int i = 0; i < max_locals; ++i)
locals[i].set_initialized (pc);
+ this_type.set_initialized (pc);
+ }
+
+ // Return true if this state is the unmerged result of a `ret'.
+ bool is_unmerged_ret_state (int max_locals) const
+ {
+ for (int i = 0; i < max_locals; ++i)
+ {
+ if (locals[i].key == unused_by_subroutine_type)
+ return true;
+ }
+ return false;
}
+
+#ifdef VERIFY_DEBUG
+ void print (const char *leader, int pc,
+ int max_stack, int max_locals) const
+ {
+ debug_print ("%s [%4d]: [stack] ", leader, pc);
+ int i;
+ for (i = 0; i < stacktop; ++i)
+ stack[i].print ();
+ for (; i < max_stack; ++i)
+ debug_print (".");
+ debug_print (" [local] ");
+ for (i = 0; i < max_locals; ++i)
+ locals[i].print ();
+ debug_print (" | %p\n", this);
+ }
+#else
+ inline void print (const char *, int, int, int) const
+ {
+ }
+#endif /* VERIFY_DEBUG */
};
type pop_raw ()
if (! current_state->locals[index + 1].compatible (t))
verify_fail ("invalid local variable", start_PC);
}
- current_state->note_variable (index);
return current_state->locals[index];
}
bool changed = true;
if (states[npc] == NULL)
{
- // FIXME: what if we reach this code from a `ret'?
-
+ // There's a weird situation here. If are examining the
+ // branch that results from a `ret', and there is not yet a
+ // state available at the branch target (the instruction just
+ // after the `jsr'), then we have to construct a special kind
+ // of state at that point for future merging. This special
+ // state has the type `unused_by_subroutine_type' in each slot
+ // which was not modified by the subroutine.
states[npc] = new state (nstate, current_method->max_stack,
- current_method->max_locals);
+ current_method->max_locals, ret_semantics);
+ debug_print ("== New state in push_jump_merge\n");
+ states[npc]->print ("New", npc, current_method->max_stack,
+ current_method->max_locals);
}
else
- changed = states[npc]->merge (nstate, ret_semantics,
- current_method->max_locals);
+ {
+ debug_print ("== Merge states in push_jump_merge\n");
+ nstate->print ("Frm", start_PC, current_method->max_stack,
+ current_method->max_locals);
+ states[npc]->print (" To", npc, current_method->max_stack,
+ current_method->max_locals);
+ changed = states[npc]->merge (nstate, ret_semantics,
+ current_method->max_locals);
+ states[npc]->print ("New", npc, current_method->max_stack,
+ current_method->max_locals);
+ }
if (changed && states[npc]->next == state::INVALID)
{
int pop_jump ()
{
+ int *prev_loc = &next_verify_pc;
int npc = next_verify_pc;
- if (npc != state::NO_NEXT)
+ bool skipped = false;
+
+ while (npc != state::NO_NEXT)
{
- next_verify_pc = states[npc]->next;
- states[npc]->next = state::INVALID;
+ // If the next available PC is an unmerged `ret' state, then
+ // we aren't yet ready to handle it. That's because we would
+ // need all kind of special cases to do so. So instead we
+ // defer this jump until after we've processed it via a
+ // fall-through. This has to happen because the instruction
+ // before this one must be a `jsr'.
+ if (! states[npc]->is_unmerged_ret_state (current_method->max_locals))
+ {
+ *prev_loc = states[npc]->next;
+ states[npc]->next = state::INVALID;
+ return npc;
+ }
+
+ skipped = true;
+ prev_loc = &states[npc]->next;
+ npc = states[npc]->next;
}
- return npc;
+
+ // If we've skipped states and there is nothing else, that's a
+ // bug.
+ if (skipped)
+ verify_fail ("pop_jump: can't happen");
+ return state::NO_NEXT;
}
void invalidate_pc ()
void note_branch_target (int pc, bool is_jsr_target = false)
{
- if (pc <= PC && ! (flags[pc] & FLAG_INSN_START))
- verify_fail ("branch not to instruction start");
+ // Don't check `pc <= PC', because we've advanced PC after
+ // fetching the target and we haven't yet checked the next
+ // instruction.
+ if (pc < PC && ! (flags[pc] & FLAG_INSN_START))
+ verify_fail ("branch not to instruction start", start_PC);
flags[pc] |= FLAG_BRANCH_TARGET;
if (is_jsr_target)
{
info->pc = PC;
info->next = jsr_ptrs[pc];
jsr_ptrs[pc] = info;
- flags[pc] |= FLAG_JSR_TARGET;
}
}
PC = 0;
while (PC < current_method->code_length)
{
+ // Set `start_PC' early so that error checking can have the
+ // correct value.
+ start_PC = PC;
flags[PC] |= FLAG_INSN_START;
// If the previous instruction was a jsr, then the next
note_branch_target (PC);
last_was_jsr = false;
- start_PC = PC;
java_opcode opcode = (java_opcode) bytecode[PC++];
switch (opcode)
{
while (*p != ';')
++p;
++p;
- // FIXME! This will get collected!
- _Jv_Utf8Const *name = _Jv_makeUtf8Const (start, p - start);
+ _Jv_Utf8Const *name = make_utf8_const (start, p - start);
return type (name);
}
verify_fail ("incompatible return type", start_PC);
}
+ // Initialize the stack for the new method. Returns true if this
+ // method is an instance initializer.
+ bool initialize_stack ()
+ {
+ int var = 0;
+ bool is_init = false;
+
+ using namespace java::lang::reflect;
+ if (! Modifier::isStatic (current_method->self->accflags))
+ {
+ type kurr (current_class);
+ if (_Jv_equalUtf8Consts (current_method->self->name, gcj::init_name))
+ {
+ kurr.set_uninitialized (type::SELF);
+ is_init = true;
+ }
+ set_variable (0, kurr);
+ current_state->set_this_type (kurr);
+ ++var;
+ }
+
+ // We have to handle wide arguments specially here.
+ int arg_count = _Jv_count_arguments (current_method->self->signature);
+ type arg_types[arg_count];
+ compute_argument_types (current_method->self->signature, arg_types);
+ for (int i = 0; i < arg_count; ++i)
+ {
+ set_variable (var, arg_types[i]);
+ ++var;
+ if (arg_types[i].iswide ())
+ ++var;
+ }
+
+ return is_init;
+ }
+
void verify_instructions_0 ()
{
current_state = new state (current_method->max_stack,
PC = 0;
start_PC = 0;
- {
- int var = 0;
-
- using namespace java::lang::reflect;
- if (! Modifier::isStatic (current_method->self->accflags))
- {
- type kurr (current_class);
- if (_Jv_equalUtf8Consts (current_method->self->name, gcj::init_name))
- kurr.set_uninitialized (type::SELF);
- set_variable (0, kurr);
- ++var;
- }
-
- // We have to handle wide arguments specially here.
- int arg_count = _Jv_count_arguments (current_method->self->signature);
- type arg_types[arg_count];
- compute_argument_types (current_method->self->signature, arg_types);
- for (int i = 0; i < arg_count; ++i)
- {
- set_variable (var, arg_types[i]);
- ++var;
- if (arg_types[i].iswide ())
- ++var;
- }
- }
+ // True if we are verifying an instance initializer.
+ bool this_is_init = initialize_stack ();
states = (state **) _Jv_Malloc (sizeof (state *)
* current_method->code_length);
{
// We've already visited this instruction. So merge
// the states together. If this yields no change then
- // we don't have to re-verify.
+ // we don't have to re-verify. However, if the new
+ // state is an the result of an unmerged `ret', we
+ // must continue through it.
+ debug_print ("== Fall through merge\n");
+ states[PC]->print ("Old", PC, current_method->max_stack,
+ current_method->max_locals);
+ current_state->print ("Cur", PC, current_method->max_stack,
+ current_method->max_locals);
if (! current_state->merge (states[PC], false,
- current_method->max_locals))
+ current_method->max_locals)
+ && ! states[PC]->is_unmerged_ret_state (current_method->max_locals))
{
+ debug_print ("== Fall through optimization\n");
invalidate_pc ();
continue;
}
// Save a copy of it for later.
states[PC]->copy (current_state, current_method->max_stack,
current_method->max_locals);
+ current_state->print ("New", PC, current_method->max_stack,
+ current_method->max_locals);
}
}
current_method->max_locals);
}
+ // Set this before handling exceptions so that debug output is
+ // sane.
+ start_PC = PC;
+
// Update states for all active exception handlers. Ordinarily
// there are not many exception handlers. So we simply run
// through them all.
}
}
- start_PC = PC;
+ current_state->print (" ", PC, current_method->max_stack,
+ current_method->max_locals);
java_opcode opcode = (java_opcode) bytecode[PC++];
switch (opcode)
{
invalidate_pc ();
break;
case op_return:
+ // We only need to check this when the return type is
+ // void, because all instance initializers return void.
+ if (this_is_init)
+ current_state->check_this_initialized ();
check_return_type (void_type);
invalidate_pc ();
break;
type klass;
type field = check_field_constant (get_ushort (), &klass);
pop_type (field);
+
+ // We have an obscure special case here: we can use
+ // `putfield' on a field declared in this class, even if
+ // `this' has not yet been initialized.
+ if (! current_state->this_type.isinitialized ()
+ && current_state->this_type.pc == type::SELF)
+ klass.set_uninitialized (type::SELF);
pop_type (klass);
}
break;
_Jv_BytecodeVerifier (_Jv_InterpMethod *m)
{
+ // We just print the text as utf-8. This is just for debugging
+ // anyway.
+ debug_print ("--------------------------------\n");
+ debug_print ("-- Verifying method `%s'\n", m->self->name->data);
+
current_method = m;
bytecode = m->bytecode ();
exception = m->exceptions ();
states = NULL;
flags = NULL;
jsr_ptrs = NULL;
+ utf8_list = NULL;
}
~_Jv_BytecodeVerifier ()
_Jv_Free (flags);
if (jsr_ptrs)
_Jv_Free (jsr_ptrs);
+ while (utf8_list != NULL)
+ {
+ linked_utf8 *n = utf8_list->next;
+ _Jv_Free (utf8_list->val);
+ _Jv_Free (utf8_list);
+ utf8_list = n;
+ }
}
};