1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Represents JSON data structure using native Go types: booleans, floats,
6 // strings, arrays, and maps.
22 // Unmarshal parses the JSON-encoded data and stores the result
23 // in the value pointed to by v.
25 // Unmarshal traverses the value v recursively.
26 // If an encountered value implements the Unmarshaler interface,
27 // Unmarshal calls its UnmarshalJSON method with a well-formed
30 // Otherwise, Unmarshal uses the inverse of the encodings that
31 // Marshal uses, allocating maps, slices, and pointers as necessary,
32 // with the following additional rules:
34 // To unmarshal a JSON value into a nil interface value, the
35 // type stored in the interface value is one of:
37 // bool, for JSON booleans
38 // float64, for JSON numbers
39 // string, for JSON strings
40 // []interface{}, for JSON arrays
41 // map[string]interface{}, for JSON objects
44 // If a JSON value is not appropriate for a given target type,
45 // or if a JSON number overflows the target type, Unmarshal
46 // skips that field and completes the unmarshalling as best it can.
47 // If no more serious errors are encountered, Unmarshal returns
48 // an UnmarshalTypeError describing the earliest such error.
50 func Unmarshal(data []byte, v interface{}) os.Error {
51 d := new(decodeState).init(data)
53 // Quick check for well-formedness.
54 // Avoids filling out half a data structure
55 // before discovering a JSON syntax error.
56 err := checkValid(data, &d.scan)
64 // Unmarshaler is the interface implemented by objects
65 // that can unmarshal a JSON description of themselves.
66 // The input can be assumed to be a valid JSON object
67 // encoding. UnmarshalJSON must copy the JSON data
68 // if it wishes to retain the data after returning.
69 type Unmarshaler interface {
70 UnmarshalJSON([]byte) os.Error
73 // An UnmarshalTypeError describes a JSON value that was
74 // not appropriate for a value of a specific Go type.
75 type UnmarshalTypeError struct {
76 Value string // description of JSON value - "bool", "array", "number -5"
77 Type reflect.Type // type of Go value it could not be assigned to
80 func (e *UnmarshalTypeError) String() string {
81 return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
84 // An UnmarshalFieldError describes a JSON object key that
85 // led to an unexported (and therefore unwritable) struct field.
86 type UnmarshalFieldError struct {
89 Field reflect.StructField
92 func (e *UnmarshalFieldError) String() string {
93 return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
96 // An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
97 // (The argument to Unmarshal must be a non-nil pointer.)
98 type InvalidUnmarshalError struct {
102 func (e *InvalidUnmarshalError) String() string {
104 return "json: Unmarshal(nil)"
107 if e.Type.Kind() != reflect.Ptr {
108 return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
110 return "json: Unmarshal(nil " + e.Type.String() + ")"
113 func (d *decodeState) unmarshal(v interface{}) (err os.Error) {
115 if r := recover(); r != nil {
116 if _, ok := r.(runtime.Error); ok {
123 rv := reflect.ValueOf(v)
125 if pv.Kind() != reflect.Ptr || pv.IsNil() {
126 return &InvalidUnmarshalError{reflect.TypeOf(v)}
130 // We decode rv not pv.Elem because the Unmarshaler interface
131 // test must be applied at the top level of the value.
136 // decodeState represents the state while decoding a JSON value.
137 type decodeState struct {
139 off int // read offset in data
141 nextscan scanner // for calls to nextValue
143 tempstr string // scratch space to avoid some allocations
146 // errPhase is used for errors that should not happen unless
147 // there is a bug in the JSON decoder or something is editing
148 // the data slice while the decoder executes.
149 var errPhase = os.NewError("JSON decoder out of sync - data changing underfoot?")
151 func (d *decodeState) init(data []byte) *decodeState {
158 // error aborts the decoding by panicking with err.
159 func (d *decodeState) error(err os.Error) {
163 // saveError saves the first err it is called with,
164 // for reporting at the end of the unmarshal.
165 func (d *decodeState) saveError(err os.Error) {
166 if d.savedError == nil {
171 // next cuts off and returns the next full JSON value in d.data[d.off:].
172 // The next value is known to be an object or array, not a literal.
173 func (d *decodeState) next() []byte {
175 item, rest, err := nextValue(d.data[d.off:], &d.nextscan)
179 d.off = len(d.data) - len(rest)
181 // Our scanner has seen the opening brace/bracket
182 // and thinks we're still in the middle of the object.
183 // invent a closing brace/bracket to get it out.
185 d.scan.step(&d.scan, '}')
187 d.scan.step(&d.scan, ']')
193 // scanWhile processes bytes in d.data[d.off:] until it
194 // receives a scan code not equal to op.
195 // It updates d.off and returns the new scan code.
196 func (d *decodeState) scanWhile(op int) int {
199 if d.off >= len(d.data) {
201 d.off = len(d.data) + 1 // mark processed EOF with len+1
203 c := int(d.data[d.off])
205 newOp = d.scan.step(&d.scan, c)
214 // value decodes a JSON value from d.data[d.off:] into the value.
215 // it updates d.off to point past the decoded value.
216 func (d *decodeState) value(v reflect.Value) {
218 _, rest, err := nextValue(d.data[d.off:], &d.nextscan)
222 d.off = len(d.data) - len(rest)
224 // d.scan thinks we're still at the beginning of the item.
225 // Feed in an empty string - the shortest, simplest value -
226 // so that it knows we got to the end of the value.
227 if d.scan.step == stateRedo {
230 d.scan.step(&d.scan, '"')
231 d.scan.step(&d.scan, '"')
235 switch op := d.scanWhile(scanSkipSpace); op {
242 case scanBeginObject:
245 case scanBeginLiteral:
250 // indirect walks down v allocating pointers as needed,
251 // until it gets to a non-pointer.
252 // if it encounters an Unmarshaler, indirect stops and returns that.
253 // if wantptr is true, indirect stops at the last pointer.
254 func (d *decodeState) indirect(v reflect.Value, wantptr bool) (Unmarshaler, reflect.Value) {
255 // If v is a named type and is addressable,
256 // start with its address, so that if the type has pointer methods,
258 if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() {
262 var isUnmarshaler bool
263 if v.Type().NumMethod() > 0 {
264 // Remember that this is an unmarshaler,
265 // but wait to return it until after allocating
266 // the pointer (if necessary).
267 _, isUnmarshaler = v.Interface().(Unmarshaler)
270 if iv := v; iv.Kind() == reflect.Interface && !iv.IsNil() {
276 if pv.Kind() != reflect.Ptr {
280 if pv.Elem().Kind() != reflect.Ptr && wantptr && pv.CanSet() && !isUnmarshaler {
284 pv.Set(reflect.New(pv.Type().Elem()))
287 // Using v.Interface().(Unmarshaler)
288 // here means that we have to use a pointer
289 // as the struct field. We cannot use a value inside
290 // a pointer to a struct, because in that case
291 // v.Interface() is the value (x.f) not the pointer (&x.f).
292 // This is an unfortunate consequence of reflect.
293 // An alternative would be to look up the
294 // UnmarshalJSON method and return a FuncValue.
295 return v.Interface().(Unmarshaler), reflect.Value{}
302 // array consumes an array from d.data[d.off-1:], decoding into the value v.
303 // the first byte of the array ('[') has been read already.
304 func (d *decodeState) array(v reflect.Value) {
305 // Check for unmarshaler.
306 unmarshaler, pv := d.indirect(v, false)
307 if unmarshaler != nil {
309 err := unmarshaler.UnmarshalJSON(d.next())
317 // Decoding into nil interface? Switch to non-reflect code.
319 ok := iv.Kind() == reflect.Interface
321 iv.Set(reflect.ValueOf(d.arrayInterface()))
325 // Check type of target.
327 if av.Kind() != reflect.Array && av.Kind() != reflect.Slice {
328 d.saveError(&UnmarshalTypeError{"array", v.Type()})
338 // Look ahead for ] - can only happen on first iteration.
339 op := d.scanWhile(scanSkipSpace)
340 if op == scanEndArray {
344 // Back up so d.value can have the byte we just read.
348 // Get element of array, growing if necessary.
349 if i >= av.Cap() && sv.IsValid() {
350 newcap := sv.Cap() + sv.Cap()/2
354 newv := reflect.MakeSlice(sv.Type(), sv.Len(), newcap)
355 reflect.Copy(newv, sv)
358 if i >= av.Len() && sv.IsValid() {
359 // Must be slice; gave up on array during i >= av.Cap().
363 // Decode into element.
367 // Ran out of fixed array: skip.
368 d.value(reflect.Value{})
372 // Next token must be , or ].
373 op = d.scanWhile(scanSkipSpace)
374 if op == scanEndArray {
377 if op != scanArrayValue {
383 // Array. Zero the rest.
384 z := reflect.Zero(av.Type().Elem())
385 for ; i < av.Len(); i++ {
394 // matchName returns true if key should be written to a field named name.
395 func matchName(key, name string) bool {
396 return strings.ToLower(key) == strings.ToLower(name)
399 // object consumes an object from d.data[d.off-1:], decoding into the value v.
400 // the first byte of the object ('{') has been read already.
401 func (d *decodeState) object(v reflect.Value) {
402 // Check for unmarshaler.
403 unmarshaler, pv := d.indirect(v, false)
404 if unmarshaler != nil {
406 err := unmarshaler.UnmarshalJSON(d.next())
414 // Decoding into nil interface? Switch to non-reflect code.
416 if iv.Kind() == reflect.Interface {
417 iv.Set(reflect.ValueOf(d.objectInterface()))
421 // Check type of target: struct or map[string]T
428 // map must have string type
430 if t.Key() != reflect.TypeOf("") {
431 d.saveError(&UnmarshalTypeError{"object", v.Type()})
436 mv.Set(reflect.MakeMap(t))
441 d.saveError(&UnmarshalTypeError{"object", v.Type()})
444 if !mv.IsValid() && !sv.IsValid() {
446 d.next() // skip over { } in input
450 var mapElem reflect.Value
453 // Read opening " of string key or closing }.
454 op := d.scanWhile(scanSkipSpace)
455 if op == scanEndObject {
456 // closing } - can only happen on first iteration.
459 if op != scanBeginLiteral {
465 op = d.scanWhile(scanContinue)
466 item := d.data[start : d.off-1]
467 key, ok := unquote(item)
472 // Figure out field corresponding to key.
473 var subv reflect.Value
474 destring := false // whether the value is wrapped in a string to be decoded first
477 elemType := mv.Type().Elem()
478 if !mapElem.IsValid() {
479 mapElem = reflect.New(elemType).Elem()
481 mapElem.Set(reflect.Zero(elemType))
485 var f reflect.StructField
488 // First try for field with that tag.
490 for i := 0; i < sv.NumField(); i++ {
492 tagName, _ := parseTag(f.Tag.Get("json"))
500 // Second, exact match.
501 f, ok = st.FieldByName(key)
504 // Third, case-insensitive match.
505 f, ok = st.FieldByNameFunc(func(s string) bool { return matchName(key, s) })
508 // Extract value; name must be exported.
511 d.saveError(&UnmarshalFieldError{key, st, f})
513 subv = sv.FieldByIndex(f.Index)
515 _, opts := parseTag(f.Tag.Get("json"))
516 destring = opts.Contains("string")
520 // Read : before value.
521 if op == scanSkipSpace {
522 op = d.scanWhile(scanSkipSpace)
524 if op != scanObjectKey {
530 d.value(reflect.ValueOf(&d.tempstr))
531 d.literalStore([]byte(d.tempstr), subv)
535 // Write value back to map;
536 // if using struct, subv points into struct already.
538 mv.SetMapIndex(reflect.ValueOf(key), subv)
541 // Next token must be , or }.
542 op = d.scanWhile(scanSkipSpace)
543 if op == scanEndObject {
546 if op != scanObjectValue {
552 // literal consumes a literal from d.data[d.off-1:], decoding into the value v.
553 // The first byte of the literal has been read already
554 // (that's how the caller knows it's a literal).
555 func (d *decodeState) literal(v reflect.Value) {
556 // All bytes inside literal return scanContinue op code.
558 op := d.scanWhile(scanContinue)
560 // Scan read one byte too far; back up.
564 d.literalStore(d.data[start:d.off], v)
567 // literalStore decodes a literal stored in item into v.
568 func (d *decodeState) literalStore(item []byte, v reflect.Value) {
569 // Check for unmarshaler.
570 wantptr := item[0] == 'n' // null
571 unmarshaler, pv := d.indirect(v, wantptr)
572 if unmarshaler != nil {
573 err := unmarshaler.UnmarshalJSON(item)
581 switch c := item[0]; c {
585 d.saveError(&UnmarshalTypeError{"null", v.Type()})
586 case reflect.Interface, reflect.Ptr, reflect.Map:
587 v.Set(reflect.Zero(v.Type()))
590 case 't', 'f': // true, false
594 d.saveError(&UnmarshalTypeError{"bool", v.Type()})
597 case reflect.Interface:
598 v.Set(reflect.ValueOf(value))
602 s, ok := unquoteBytes(item)
608 d.saveError(&UnmarshalTypeError{"string", v.Type()})
610 if v.Type() != byteSliceType {
611 d.saveError(&UnmarshalTypeError{"string", v.Type()})
614 b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
615 n, err := base64.StdEncoding.Decode(b, s)
620 v.Set(reflect.ValueOf(b[0:n]))
622 v.SetString(string(s))
623 case reflect.Interface:
624 v.Set(reflect.ValueOf(string(s)))
628 if c != '-' && (c < '0' || c > '9') {
634 d.error(&UnmarshalTypeError{"number", v.Type()})
635 case reflect.Interface:
636 n, err := strconv.Atof64(s)
638 d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
641 v.Set(reflect.ValueOf(n))
643 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
644 n, err := strconv.Atoi64(s)
645 if err != nil || v.OverflowInt(n) {
646 d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
651 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
652 n, err := strconv.Atoui64(s)
653 if err != nil || v.OverflowUint(n) {
654 d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
659 case reflect.Float32, reflect.Float64:
660 n, err := strconv.AtofN(s, v.Type().Bits())
661 if err != nil || v.OverflowFloat(n) {
662 d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
670 // The xxxInterface routines build up a value to be stored
671 // in an empty interface. They are not strictly necessary,
672 // but they avoid the weight of reflection in this common case.
674 // valueInterface is like value but returns interface{}
675 func (d *decodeState) valueInterface() interface{} {
676 switch d.scanWhile(scanSkipSpace) {
680 return d.arrayInterface()
681 case scanBeginObject:
682 return d.objectInterface()
683 case scanBeginLiteral:
684 return d.literalInterface()
689 // arrayInterface is like array but returns []interface{}.
690 func (d *decodeState) arrayInterface() []interface{} {
693 // Look ahead for ] - can only happen on first iteration.
694 op := d.scanWhile(scanSkipSpace)
695 if op == scanEndArray {
699 // Back up so d.value can have the byte we just read.
703 v = append(v, d.valueInterface())
705 // Next token must be , or ].
706 op = d.scanWhile(scanSkipSpace)
707 if op == scanEndArray {
710 if op != scanArrayValue {
717 // objectInterface is like object but returns map[string]interface{}.
718 func (d *decodeState) objectInterface() map[string]interface{} {
719 m := make(map[string]interface{})
721 // Read opening " of string key or closing }.
722 op := d.scanWhile(scanSkipSpace)
723 if op == scanEndObject {
724 // closing } - can only happen on first iteration.
727 if op != scanBeginLiteral {
733 op = d.scanWhile(scanContinue)
734 item := d.data[start : d.off-1]
735 key, ok := unquote(item)
740 // Read : before value.
741 if op == scanSkipSpace {
742 op = d.scanWhile(scanSkipSpace)
744 if op != scanObjectKey {
749 m[key] = d.valueInterface()
751 // Next token must be , or }.
752 op = d.scanWhile(scanSkipSpace)
753 if op == scanEndObject {
756 if op != scanObjectValue {
763 // literalInterface is like literal but returns an interface value.
764 func (d *decodeState) literalInterface() interface{} {
765 // All bytes inside literal return scanContinue op code.
767 op := d.scanWhile(scanContinue)
769 // Scan read one byte too far; back up.
772 item := d.data[start:d.off]
774 switch c := item[0]; c {
778 case 't', 'f': // true, false
782 s, ok := unquote(item)
789 if c != '-' && (c < '0' || c > '9') {
792 n, err := strconv.Atof64(string(item))
794 d.saveError(&UnmarshalTypeError{"number " + string(item), reflect.TypeOf(0.0)})
801 // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
803 func getu4(s []byte) int {
804 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
807 rune, err := strconv.Btoui64(string(s[2:6]), 16)
814 // unquote converts a quoted JSON string literal s into an actual string t.
815 // The rules are different than for Go, so cannot use strconv.Unquote.
816 func unquote(s []byte) (t string, ok bool) {
817 s, ok = unquoteBytes(s)
822 func unquoteBytes(s []byte) (t []byte, ok bool) {
823 if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
828 // Check for unusual characters. If there are none,
829 // then no unquoting is needed, so return a slice of the
834 if c == '\\' || c == '"' || c < ' ' {
837 if c < utf8.RuneSelf {
841 rune, size := utf8.DecodeRune(s[r:])
842 if rune == utf8.RuneError && size == 1 {
851 b := make([]byte, len(s)+2*utf8.UTFMax)
854 // Out of room? Can only happen if s is full of
855 // malformed UTF-8 and we're replacing each
856 // byte with RuneError.
857 if w >= len(b)-2*utf8.UTFMax {
858 nb := make([]byte, (len(b)+utf8.UTFMax)*2)
871 case '"', '\\', '/', '\'':
902 if utf16.IsSurrogate(rune) {
903 rune1 := getu4(s[r:])
904 if dec := utf16.DecodeRune(rune, rune1); dec != unicode.ReplacementChar {
905 // A valid pair; consume.
907 w += utf8.EncodeRune(b[w:], dec)
910 // Invalid surrogate; fall back to replacement rune.
911 rune = unicode.ReplacementChar
913 w += utf8.EncodeRune(b[w:], rune)
916 // Quote, control characters are invalid.
917 case c == '"', c < ' ':
921 case c < utf8.RuneSelf:
926 // Coerce to well-formed UTF-8.
928 rune, size := utf8.DecodeRune(s[r:])
930 w += utf8.EncodeRune(b[w:], rune)