OSDN Git Service

23c4f906f80f2d367357a6bcd3887bbc7bd63c0c
[pf3gnuchains/gcc-fork.git] / libgo / runtime / mprof.goc
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
7
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14
15 typedef struct __go_open_array Slice;
16
17 // NOTE(rsc): Everything here could use cas if contention became an issue.
18 static Lock proflock;
19
20 // Per-call-stack allocation information.
21 // Lookup by hashing call stack into a linked-list hash table.
22 typedef struct Bucket Bucket;
23 struct Bucket
24 {
25         Bucket  *next;  // next in hash list
26         Bucket  *allnext;       // next in list of all buckets
27         uintptr allocs;
28         uintptr frees;
29         uintptr alloc_bytes;
30         uintptr free_bytes;
31         uintptr hash;
32         uintptr nstk;
33         uintptr stk[1];
34 };
35 enum {
36         BuckHashSize = 179999,
37 };
38 static Bucket **buckhash;
39 static Bucket *buckets;
40 static uintptr bucketmem;
41
42 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
43 static Bucket*
44 stkbucket(uintptr *stk, int32 nstk)
45 {
46         int32 i;
47         uintptr h;
48         Bucket *b;
49
50         if(buckhash == nil) {
51                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
52                 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
53         }
54
55         // Hash stack.
56         h = 0;
57         for(i=0; i<nstk; i++) {
58                 h += stk[i];
59                 h += h<<10;
60                 h ^= h>>6;
61         }
62         h += h<<3;
63         h ^= h>>11;
64
65         i = h%BuckHashSize;
66         for(b = buckhash[i]; b; b=b->next)
67                 if(b->hash == h && b->nstk == (uintptr)nstk &&
68                    runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
69                         return b;
70
71         b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
72         bucketmem += sizeof *b + nstk*sizeof stk[0];
73         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
74         b->hash = h;
75         b->nstk = nstk;
76         b->next = buckhash[i];
77         buckhash[i] = b;
78         b->allnext = buckets;
79         buckets = b;
80         return b;
81 }
82
83 // Map from pointer to Bucket* that allocated it.
84 // Three levels:
85 //      Linked-list hash table for top N-20 bits.
86 //      Array index for next 13 bits.
87 //      Linked list for next 7 bits.
88 // This is more efficient than using a general map,
89 // because of the typical clustering of the pointer keys.
90
91 typedef struct AddrHash AddrHash;
92 typedef struct AddrEntry AddrEntry;
93
94 struct AddrHash
95 {
96         AddrHash *next; // next in top-level hash table linked list
97         uintptr addr;   // addr>>20
98         AddrEntry *dense[1<<13];
99 };
100
101 struct AddrEntry
102 {
103         AddrEntry *next;        // next in bottom-level linked list
104         uint32 addr;
105         Bucket *b;
106 };
107
108 enum {
109         AddrHashBits = 12       // 1MB per entry, so good for 4GB of used address space
110 };
111 static AddrHash *addrhash[1<<AddrHashBits];
112 static AddrEntry *addrfree;
113 static uintptr addrmem;
114
115 // Multiplicative hash function:
116 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
117 // This is a good multiplier as suggested in CLR, Knuth.  The hash
118 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
119 // of the multiplied value.
120 enum {
121         HashMultiplier = 2654435769U
122 };
123
124 // Set the bucket associated with addr to b.
125 static void
126 setaddrbucket(uintptr addr, Bucket *b)
127 {
128         int32 i;
129         uint32 h;
130         AddrHash *ah;
131         AddrEntry *e;
132
133         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
134         for(ah=addrhash[h]; ah; ah=ah->next)
135                 if(ah->addr == (addr>>20))
136                         goto found;
137
138         ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
139         addrmem += sizeof *ah;
140         ah->next = addrhash[h];
141         ah->addr = addr>>20;
142         addrhash[h] = ah;
143
144 found:
145         if((e = addrfree) == nil) {
146                 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
147                 addrmem += 64*sizeof *e;
148                 for(i=0; i+1<64; i++)
149                         e[i].next = &e[i+1];
150                 e[63].next = nil;
151         }
152         addrfree = e->next;
153         e->addr = (uint32)~(addr & ((1<<20)-1));
154         e->b = b;
155         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
156         e->next = ah->dense[h];
157         ah->dense[h] = e;
158 }
159
160 // Get the bucket associated with addr and clear the association.
161 static Bucket*
162 getaddrbucket(uintptr addr)
163 {
164         uint32 h;
165         AddrHash *ah;
166         AddrEntry *e, **l;
167         Bucket *b;
168
169         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
170         for(ah=addrhash[h]; ah; ah=ah->next)
171                 if(ah->addr == (addr>>20))
172                         goto found;
173         return nil;
174
175 found:
176         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
177         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
178                 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
179                         *l = e->next;
180                         b = e->b;
181                         e->next = addrfree;
182                         addrfree = e;
183                         return b;
184                 }
185         }
186         return nil;
187 }
188
189 void
190 runtime_Mprof_Init()
191 {
192         runtime_initlock(&proflock);
193 }
194
195 // Called by malloc to record a profiled block.
196 void
197 runtime_MProf_Malloc(void *p, uintptr size)
198 {
199         int32 nstk;
200         uintptr stk[32];
201         Bucket *b;
202
203         if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
204                 return;
205 #if 0
206         nstk = runtime_callers(1, stk, 32);
207 #else
208         nstk = 0;
209 #endif
210         runtime_lock(&proflock);
211         b = stkbucket(stk, nstk);
212         b->allocs++;
213         b->alloc_bytes += size;
214         setaddrbucket((uintptr)p, b);
215         runtime_unlock(&proflock);
216         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
217
218         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
219                 __go_run_goroutine_gc(100);
220 }
221
222 // Called when freeing a profiled block.
223 void
224 runtime_MProf_Free(void *p, uintptr size)
225 {
226         Bucket *b;
227
228         if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
229                 return;
230
231         runtime_lock(&proflock);
232         b = getaddrbucket((uintptr)p);
233         if(b != nil) {
234                 b->frees++;
235                 b->free_bytes += size;
236         }
237         runtime_unlock(&proflock);
238         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
239
240         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
241                 __go_run_goroutine_gc(101);
242 }
243
244
245 // Go interface to profile data.  (Declared in extern.go)
246 // Assumes Go sizeof(int) == sizeof(int32)
247
248 // Must match MemProfileRecord in extern.go.
249 typedef struct Record Record;
250 struct Record {
251         int64 alloc_bytes, free_bytes;
252         int64 alloc_objects, free_objects;
253         uintptr stk[32];
254 };
255
256 // Write b's data to r.
257 static void
258 record(Record *r, Bucket *b)
259 {
260         uint32 i;
261
262         r->alloc_bytes = b->alloc_bytes;
263         r->free_bytes = b->free_bytes;
264         r->alloc_objects = b->allocs;
265         r->free_objects = b->frees;
266         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
267                 r->stk[i] = b->stk[i];
268         for(; i<nelem(r->stk); i++)
269                 r->stk[i] = 0;
270 }
271
272 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
273         Bucket *b;
274         Record *r;
275
276         __sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
277
278         runtime_lock(&proflock);
279         n = 0;
280         for(b=buckets; b; b=b->allnext)
281                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
282                         n++;
283         ok = false;
284         if(n <= p.__count) {
285                 ok = true;
286                 r = (Record*)p.__values;
287                 for(b=buckets; b; b=b->allnext)
288                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
289                                 record(r++, b);
290         }
291         runtime_unlock(&proflock);
292
293         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
294
295         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
296                 __go_run_goroutine_gc(102);
297 }
298
299 void
300 runtime_MProf_Mark(void (*scan)(byte *, int64))
301 {
302         // buckhash is not allocated via mallocgc.
303         scan((byte*)&buckets, sizeof buckets);
304         scan((byte*)&addrhash, sizeof addrhash);
305         scan((byte*)&addrfree, sizeof addrfree);
306 }