runtime/leb128.h

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ART_RUNTIME_LEB128_H_
  18 #define ART_RUNTIME_LEB128_H_
  19
  20 #include <vector>
  21
  22 #include "base/bit_utils.h"
  23 #include "base/logging.h"
  24 #include "globals.h"
  25
  26 namespace art {
  27
  28 // Reads an unsigned LEB128 value, updating the given pointer to point
  29 // just past the end of the read value. This function tolerates
  30 // non-zero high-order bits in the fifth encoded byte.
  31 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
  32   const uint8_t* ptr = *data;
  33   int result = *(ptr++);
  34   if (UNLIKELY(result > 0x7f)) {
  35     int cur = *(ptr++);
  36     result = (result & 0x7f) | ((cur & 0x7f) << 7);
  37     if (cur > 0x7f) {
  38       cur = *(ptr++);
  39       result |= (cur & 0x7f) << 14;
  40       if (cur > 0x7f) {
  41         cur = *(ptr++);
  42         result |= (cur & 0x7f) << 21;
  43         if (cur > 0x7f) {
  44           // Note: We don't check to see if cur is out of range here,
  45           // meaning we tolerate garbage in the four high-order bits.
  46           cur = *(ptr++);
  47           result |= cur << 28;
  48         }
  49       }
  50     }
  51   }
  52   *data = ptr;
  53   return static_cast<uint32_t>(result);
  54 }
  55
  56 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
  57                                                const void* end,
  58                                                uint32_t* out) {
  59   const uint8_t* ptr = *data;
  60   if (ptr >= end) {
  61     return false;
  62   }
  63   int result = *(ptr++);
  64   if (UNLIKELY(result > 0x7f)) {
  65     if (ptr >= end) {
  66       return false;
  67     }
  68     int cur = *(ptr++);
  69     result = (result & 0x7f) | ((cur & 0x7f) << 7);
  70     if (cur > 0x7f) {
  71       if (ptr >= end) {
  72         return false;
  73       }
  74       cur = *(ptr++);
  75       result |= (cur & 0x7f) << 14;
  76       if (cur > 0x7f) {
  77         if (ptr >= end) {
  78           return false;
  79         }
  80         cur = *(ptr++);
  81         result |= (cur & 0x7f) << 21;
  82         if (cur > 0x7f) {
  83           if (ptr >= end) {
  84             return false;
  85           }
  86           // Note: We don't check to see if cur is out of range here,
  87           // meaning we tolerate garbage in the four high-order bits.
  88           cur = *(ptr++);
  89           result |= cur << 28;
  90         }
  91       }
  92     }
  93   }
  94   *data = ptr;
  95   *out = static_cast<uint32_t>(result);
  96   return true;
  97 }
  98
  99 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
 100 // just past the end of the read value. This function tolerates
 101 // non-zero high-order bits in the fifth encoded byte.
 102 // It is possible for this function to return -1.
 103 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
 104   return DecodeUnsignedLeb128(data) - 1;
 105 }
 106
 107 // Reads a signed LEB128 value, updating the given pointer to point
 108 // just past the end of the read value. This function tolerates
 109 // non-zero high-order bits in the fifth encoded byte.
 110 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
 111   const uint8_t* ptr = *data;
 112   int32_t result = *(ptr++);
 113   if (result <= 0x7f) {
 114     result = (result << 25) >> 25;
 115   } else {
 116     int cur = *(ptr++);
 117     result = (result & 0x7f) | ((cur & 0x7f) << 7);
 118     if (cur <= 0x7f) {
 119       result = (result << 18) >> 18;
 120     } else {
 121       cur = *(ptr++);
 122       result |= (cur & 0x7f) << 14;
 123       if (cur <= 0x7f) {
 124         result = (result << 11) >> 11;
 125       } else {
 126         cur = *(ptr++);
 127         result |= (cur & 0x7f) << 21;
 128         if (cur <= 0x7f) {
 129           result = (result << 4) >> 4;
 130         } else {
 131           // Note: We don't check to see if cur is out of range here,
 132           // meaning we tolerate garbage in the four high-order bits.
 133           cur = *(ptr++);
 134           result |= cur << 28;
 135         }
 136       }
 137     }
 138   }
 139   *data = ptr;
 140   return result;
 141 }
 142
 143 static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
 144                                              const void* end,
 145                                              int32_t* out) {
 146   const uint8_t* ptr = *data;
 147   if (ptr >= end) {
 148     return false;
 149   }
 150   int32_t result = *(ptr++);
 151   if (result <= 0x7f) {
 152     result = (result << 25) >> 25;
 153   } else {
 154     if (ptr >= end) {
 155       return false;
 156     }
 157     int cur = *(ptr++);
 158     result = (result & 0x7f) | ((cur & 0x7f) << 7);
 159     if (cur <= 0x7f) {
 160       result = (result << 18) >> 18;
 161     } else {
 162       if (ptr >= end) {
 163         return false;
 164       }
 165       cur = *(ptr++);
 166       result |= (cur & 0x7f) << 14;
 167       if (cur <= 0x7f) {
 168         result = (result << 11) >> 11;
 169       } else {
 170         if (ptr >= end) {
 171           return false;
 172         }
 173         cur = *(ptr++);
 174         result |= (cur & 0x7f) << 21;
 175         if (cur <= 0x7f) {
 176           result = (result << 4) >> 4;
 177         } else {
 178           if (ptr >= end) {
 179             return false;
 180           }
 181           // Note: We don't check to see if cur is out of range here,
 182           // meaning we tolerate garbage in the four high-order bits.
 183           cur = *(ptr++);
 184           result |= cur << 28;
 185         }
 186       }
 187     }
 188   }
 189   *data = ptr;
 190   *out = static_cast<uint32_t>(result);
 191   return true;
 192 }
 193
 194 // Returns the number of bytes needed to encode the value in unsigned LEB128.
 195 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
 196   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
 197   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
 198   uint32_t x = 6 + 32 - CLZ(data | 1U);
 199   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
 200   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
 201   return (x * 37) >> 8;
 202 }
 203
 204 // Returns the number of bytes needed to encode the value in unsigned LEB128.
 205 static inline uint32_t SignedLeb128Size(int32_t data) {
 206   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
 207   data = data ^ (data >> 31);
 208   uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
 209   return (x * 37) >> 8;
 210 }
 211
 212 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
 213   uint8_t out = value & 0x7f;
 214   value >>= 7;
 215   while (value != 0) {
 216     *dest++ = out | 0x80;
 217     out = value & 0x7f;
 218     value >>= 7;
 219   }
 220   *dest++ = out;
 221   return dest;
 222 }
 223
 224 template <typename Vector>
 225 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
 226   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 227   uint8_t out = value & 0x7f;
 228   value >>= 7;
 229   while (value != 0) {
 230     dest->push_back(out | 0x80);
 231     out = value & 0x7f;
 232     value >>= 7;
 233   }
 234   dest->push_back(out);
 235 }
 236
 237 // Overwrite encoded Leb128 with a new value. The new value must be less than
 238 // or equal to the old value to ensure that it fits the allocated space.
 239 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
 240   const uint8_t* old_end = dest;
 241   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
 242   DCHECK_LE(value, old_value);
 243   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
 244     // Use longer encoding than necessary to fill the allocated space.
 245     end[-1] |= 0x80;
 246     end[0] = 0;
 247   }
 248 }
 249
 250 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
 251   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
 252   uint8_t out = value & 0x7f;
 253   while (extra_bits != 0u) {
 254     *dest++ = out | 0x80;
 255     value >>= 7;
 256     out = value & 0x7f;
 257     extra_bits >>= 7;
 258   }
 259   *dest++ = out;
 260   return dest;
 261 }
 262
 263 template<typename Vector>
 264 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
 265   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 266   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
 267   uint8_t out = value & 0x7f;
 268   while (extra_bits != 0u) {
 269     dest->push_back(out | 0x80);
 270     value >>= 7;
 271     out = value & 0x7f;
 272     extra_bits >>= 7;
 273   }
 274   dest->push_back(out);
 275 }
 276
 277 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
 278 template <typename Vector = std::vector<uint8_t>>
 279 class Leb128Encoder {
 280   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 281
 282  public:
 283   explicit Leb128Encoder(Vector* data) : data_(data) {
 284     DCHECK(data != nullptr);
 285   }
 286
 287   void Reserve(uint32_t size) {
 288     data_->reserve(size);
 289   }
 290
 291   void PushBackUnsigned(uint32_t value) {
 292     EncodeUnsignedLeb128(data_, value);
 293   }
 294
 295   template<typename It>
 296   void InsertBackUnsigned(It cur, It end) {
 297     for (; cur != end; ++cur) {
 298       PushBackUnsigned(*cur);
 299     }
 300   }
 301
 302   void PushBackSigned(int32_t value) {
 303     EncodeSignedLeb128(data_, value);
 304   }
 305
 306   template<typename It>
 307   void InsertBackSigned(It cur, It end) {
 308     for (; cur != end; ++cur) {
 309       PushBackSigned(*cur);
 310     }
 311   }
 312
 313   const Vector& GetData() const {
 314     return *data_;
 315   }
 316
 317  protected:
 318   Vector* const data_;
 319
 320  private:
 321   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
 322 };
 323
 324 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
 325 template <typename Vector = std::vector<uint8_t>>
 326 class Leb128EncodingVector FINAL : private Vector,
 327                                    public Leb128Encoder<Vector> {
 328   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 329
 330  public:
 331   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
 332
 333   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
 334     : Vector(alloc),
 335       Leb128Encoder<Vector>(this) { }
 336
 337  private:
 338   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
 339 };
 340
 341 }  // namespace art
 342
 343 #endif  // ART_RUNTIME_LEB128_H_