// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Malloc profiling. // Patterned after tcmalloc's algorithms; shorter code. package runtime #include "runtime.h" #include "arch.h" #include "malloc.h" #include "defs.h" #include "go-type.h" // NOTE(rsc): Everything here could use cas if contention became an issue. static Lock proflock; // Per-call-stack allocation information. // Lookup by hashing call stack into a linked-list hash table. typedef struct Bucket Bucket; struct Bucket { Bucket *next; // next in hash list Bucket *allnext; // next in list of all buckets uintptr allocs; uintptr frees; uintptr alloc_bytes; uintptr free_bytes; uintptr hash; uintptr nstk; uintptr stk[1]; }; enum { BuckHashSize = 179999, }; static Bucket **buckhash; static Bucket *buckets; static uintptr bucketmem; // Return the bucket for stk[0:nstk], allocating new bucket if needed. static Bucket* stkbucket(uintptr *stk, int32 nstk) { int32 i; uintptr h; Bucket *b; if(buckhash == nil) { buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; } // Hash stack. h = 0; for(i=0; i>6; } h += h<<3; h ^= h>>11; i = h%BuckHashSize; for(b = buckhash[i]; b; b=b->next) if(b->hash == h && b->nstk == (uintptr)nstk && runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) return b; b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); b->hash = h; b->nstk = nstk; b->next = buckhash[i]; buckhash[i] = b; b->allnext = buckets; buckets = b; return b; } // Map from pointer to Bucket* that allocated it. // Three levels: // Linked-list hash table for top N-20 bits. // Array index for next 13 bits. // Linked list for next 7 bits. // This is more efficient than using a general map, // because of the typical clustering of the pointer keys. typedef struct AddrHash AddrHash; typedef struct AddrEntry AddrEntry; struct AddrHash { AddrHash *next; // next in top-level hash table linked list uintptr addr; // addr>>20 AddrEntry *dense[1<<13]; }; struct AddrEntry { AddrEntry *next; // next in bottom-level linked list uint32 addr; Bucket *b; }; enum { AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space }; static AddrHash *addrhash[1<>20)*HashMultiplier) >> (32-AddrHashBits); for(ah=addrhash[h]; ah; ah=ah->next) if(ah->addr == (addr>>20)) goto found; ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); addrmem += sizeof *ah; ah->next = addrhash[h]; ah->addr = addr>>20; addrhash[h] = ah; found: if((e = addrfree) == nil) { e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; e[63].next = nil; } addrfree = e->next; e->addr = (uint32)~(addr & ((1<<20)-1)); e->b = b; h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. e->next = ah->dense[h]; ah->dense[h] = e; } // Get the bucket associated with addr and clear the association. static Bucket* getaddrbucket(uintptr addr) { uint32 h; AddrHash *ah; AddrEntry *e, **l; Bucket *b; h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); for(ah=addrhash[h]; ah; ah=ah->next) if(ah->addr == (addr>>20)) goto found; return nil; found: h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { if(e->addr == (uint32)~(addr & ((1<<20)-1))) { *l = e->next; b = e->b; e->next = addrfree; addrfree = e; return b; } } return nil; } // Called by malloc to record a profiled block. void runtime_MProf_Malloc(void *p, uintptr size) { M *m; int32 nstk; uintptr stk[32]; Bucket *b; m = runtime_m(); if(m->nomemprof > 0) return; m->nomemprof++; #if 0 nstk = runtime_callers(1, stk, 32); #else nstk = 0; #endif runtime_lock(&proflock); b = stkbucket(stk, nstk); b->allocs++; b->alloc_bytes += size; setaddrbucket((uintptr)p, b); runtime_unlock(&proflock); m = runtime_m(); m->nomemprof--; } // Called when freeing a profiled block. void runtime_MProf_Free(void *p, uintptr size) { M *m; Bucket *b; m = runtime_m(); if(m->nomemprof > 0) return; m->nomemprof++; runtime_lock(&proflock); b = getaddrbucket((uintptr)p); if(b != nil) { b->frees++; b->free_bytes += size; } runtime_unlock(&proflock); m = runtime_m(); m->nomemprof--; } // Go interface to profile data. (Declared in extern.go) // Assumes Go sizeof(int) == sizeof(int32) // Must match MemProfileRecord in extern.go. typedef struct Record Record; struct Record { int64 alloc_bytes, free_bytes; int64 alloc_objects, free_objects; uintptr stk[32]; }; // Write b's data to r. static void record(Record *r, Bucket *b) { uint32 i; r->alloc_bytes = b->alloc_bytes; r->free_bytes = b->free_bytes; r->alloc_objects = b->allocs; r->free_objects = b->frees; for(i=0; instk && istk); i++) r->stk[i] = b->stk[i]; for(; istk); i++) r->stk[i] = 0; } func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) { Bucket *b; Record *r; runtime_lock(&proflock); n = 0; for(b=buckets; b; b=b->allnext) if(include_inuse_zero || b->alloc_bytes != b->free_bytes) n++; ok = false; if(n <= p.__count) { ok = true; r = (Record*)p.__values; for(b=buckets; b; b=b->allnext) if(include_inuse_zero || b->alloc_bytes != b->free_bytes) record(r++, b); } runtime_unlock(&proflock); } void runtime_MProf_Mark(void (*scan)(byte *, int64)) { // buckhash is not allocated via mallocgc. scan((byte*)&buckets, sizeof buckets); scan((byte*)&addrhash, sizeof addrhash); scan((byte*)&addrfree, sizeof addrfree); }