OSDN Git Service

log/syslog: Fix name of C function syslog_c.
[pf3gnuchains/gcc-fork.git] / libgo / runtime / mprof.goc
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
7
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
16 static Lock proflock;
17
18 // Per-call-stack allocation information.
19 // Lookup by hashing call stack into a linked-list hash table.
20 typedef struct Bucket Bucket;
21 struct Bucket
22 {
23         Bucket  *next;  // next in hash list
24         Bucket  *allnext;       // next in list of all buckets
25         uintptr allocs;
26         uintptr frees;
27         uintptr alloc_bytes;
28         uintptr free_bytes;
29         uintptr recent_allocs;  // since last gc
30         uintptr recent_frees;
31         uintptr recent_alloc_bytes;
32         uintptr recent_free_bytes;
33         uintptr hash;
34         uintptr nstk;
35         uintptr stk[1];
36 };
37 enum {
38         BuckHashSize = 179999,
39 };
40 static Bucket **buckhash;
41 static Bucket *buckets;
42 static uintptr bucketmem;
43
44 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
45 static Bucket*
46 stkbucket(uintptr *stk, int32 nstk, bool alloc)
47 {
48         int32 i;
49         uintptr h;
50         Bucket *b;
51
52         if(buckhash == nil) {
53                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
54                 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
55         }
56
57         // Hash stack.
58         h = 0;
59         for(i=0; i<nstk; i++) {
60                 h += stk[i];
61                 h += h<<10;
62                 h ^= h>>6;
63         }
64         h += h<<3;
65         h ^= h>>11;
66
67         i = h%BuckHashSize;
68         for(b = buckhash[i]; b; b=b->next)
69                 if(b->hash == h && b->nstk == (uintptr)nstk &&
70                    runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
71                         return b;
72
73         if(!alloc)
74                 return nil;
75
76         b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
77         bucketmem += sizeof *b + nstk*sizeof stk[0];
78         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
79         b->hash = h;
80         b->nstk = nstk;
81         b->next = buckhash[i];
82         buckhash[i] = b;
83         b->allnext = buckets;
84         buckets = b;
85         return b;
86 }
87
88 // Record that a gc just happened: all the 'recent' statistics are now real.
89 void
90 runtime_MProf_GC(void)
91 {
92         Bucket *b;
93         
94         runtime_lock(&proflock);
95         for(b=buckets; b; b=b->allnext) {
96                 b->allocs += b->recent_allocs;
97                 b->frees += b->recent_frees;
98                 b->alloc_bytes += b->recent_alloc_bytes;
99                 b->free_bytes += b->recent_free_bytes;
100                 b->recent_allocs = 0;
101                 b->recent_frees = 0;
102                 b->recent_alloc_bytes = 0;
103                 b->recent_free_bytes = 0;
104         }
105         runtime_unlock(&proflock);
106 }
107
108 // Map from pointer to Bucket* that allocated it.
109 // Three levels:
110 //      Linked-list hash table for top N-20 bits.
111 //      Array index for next 13 bits.
112 //      Linked list for next 7 bits.
113 // This is more efficient than using a general map,
114 // because of the typical clustering of the pointer keys.
115
116 typedef struct AddrHash AddrHash;
117 typedef struct AddrEntry AddrEntry;
118
119 struct AddrHash
120 {
121         AddrHash *next; // next in top-level hash table linked list
122         uintptr addr;   // addr>>20
123         AddrEntry *dense[1<<13];
124 };
125
126 struct AddrEntry
127 {
128         AddrEntry *next;        // next in bottom-level linked list
129         uint32 addr;
130         Bucket *b;
131 };
132
133 enum {
134         AddrHashBits = 12       // 1MB per entry, so good for 4GB of used address space
135 };
136 static AddrHash *addrhash[1<<AddrHashBits];
137 static AddrEntry *addrfree;
138 static uintptr addrmem;
139
140 // Multiplicative hash function:
141 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
142 // This is a good multiplier as suggested in CLR, Knuth.  The hash
143 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
144 // of the multiplied value.
145 enum {
146         HashMultiplier = 2654435769U
147 };
148
149 // Set the bucket associated with addr to b.
150 static void
151 setaddrbucket(uintptr addr, Bucket *b)
152 {
153         int32 i;
154         uint32 h;
155         AddrHash *ah;
156         AddrEntry *e;
157
158         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
159         for(ah=addrhash[h]; ah; ah=ah->next)
160                 if(ah->addr == (addr>>20))
161                         goto found;
162
163         ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
164         addrmem += sizeof *ah;
165         ah->next = addrhash[h];
166         ah->addr = addr>>20;
167         addrhash[h] = ah;
168
169 found:
170         if((e = addrfree) == nil) {
171                 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
172                 addrmem += 64*sizeof *e;
173                 for(i=0; i+1<64; i++)
174                         e[i].next = &e[i+1];
175                 e[63].next = nil;
176         }
177         addrfree = e->next;
178         e->addr = (uint32)~(addr & ((1<<20)-1));
179         e->b = b;
180         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
181         e->next = ah->dense[h];
182         ah->dense[h] = e;
183 }
184
185 // Get the bucket associated with addr and clear the association.
186 static Bucket*
187 getaddrbucket(uintptr addr)
188 {
189         uint32 h;
190         AddrHash *ah;
191         AddrEntry *e, **l;
192         Bucket *b;
193
194         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
195         for(ah=addrhash[h]; ah; ah=ah->next)
196                 if(ah->addr == (addr>>20))
197                         goto found;
198         return nil;
199
200 found:
201         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
202         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
203                 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
204                         *l = e->next;
205                         b = e->b;
206                         e->next = addrfree;
207                         addrfree = e;
208                         return b;
209                 }
210         }
211         return nil;
212 }
213
214 // Called by malloc to record a profiled block.
215 void
216 runtime_MProf_Malloc(void *p, uintptr size)
217 {
218         M *m;
219         int32 nstk;
220         uintptr stk[32];
221         Bucket *b;
222
223         m = runtime_m();
224         if(m->nomemprof > 0)
225                 return;
226
227         m->nomemprof++;
228         nstk = runtime_callers(1, stk, 32);
229         runtime_lock(&proflock);
230         b = stkbucket(stk, nstk, true);
231         b->recent_allocs++;
232         b->recent_alloc_bytes += size;
233         setaddrbucket((uintptr)p, b);
234         runtime_unlock(&proflock);
235         m = runtime_m();
236         m->nomemprof--;
237 }
238
239 // Called when freeing a profiled block.
240 void
241 runtime_MProf_Free(void *p, uintptr size)
242 {
243         M *m;
244         Bucket *b;
245
246         m = runtime_m();
247         if(m->nomemprof > 0)
248                 return;
249
250         m->nomemprof++;
251         runtime_lock(&proflock);
252         b = getaddrbucket((uintptr)p);
253         if(b != nil) {
254                 b->recent_frees++;
255                 b->recent_free_bytes += size;
256         }
257         runtime_unlock(&proflock);
258         m = runtime_m();
259         m->nomemprof--;
260 }
261
262
263 // Go interface to profile data.  (Declared in extern.go)
264 // Assumes Go sizeof(int) == sizeof(int32)
265
266 // Must match MemProfileRecord in debug.go.
267 typedef struct Record Record;
268 struct Record {
269         int64 alloc_bytes, free_bytes;
270         int64 alloc_objects, free_objects;
271         uintptr stk[32];
272 };
273
274 // Write b's data to r.
275 static void
276 record(Record *r, Bucket *b)
277 {
278         uint32 i;
279
280         r->alloc_bytes = b->alloc_bytes;
281         r->free_bytes = b->free_bytes;
282         r->alloc_objects = b->allocs;
283         r->free_objects = b->frees;
284         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
285                 r->stk[i] = b->stk[i];
286         for(; i<nelem(r->stk); i++)
287                 r->stk[i] = 0;
288 }
289
290 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
291         Bucket *b;
292         Record *r;
293
294         runtime_lock(&proflock);
295         n = 0;
296         for(b=buckets; b; b=b->allnext)
297                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
298                         n++;
299         ok = false;
300         if(n <= p.__count) {
301                 ok = true;
302                 r = (Record*)p.__values;
303                 for(b=buckets; b; b=b->allnext)
304                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
305                                 record(r++, b);
306         }
307         runtime_unlock(&proflock);
308 }
309
310 void
311 runtime_MProf_Mark(void (*scan)(byte *, int64))
312 {
313         // buckhash is not allocated via mallocgc.
314         scan((byte*)&buckets, sizeof buckets);
315         scan((byte*)&addrhash, sizeof addrhash);
316         scan((byte*)&addrfree, sizeof addrfree);
317 }
318
319 // Must match StackRecord in debug.go.
320 typedef struct TRecord TRecord;
321 struct TRecord {
322         uintptr stk[32];
323 };
324
325 func ThreadCreateProfile(p Slice) (n int32, ok bool) {
326         TRecord *r;
327         M *first, *m;
328         
329         first = runtime_atomicloadp(&runtime_allm);
330         n = 0;
331         for(m=first; m; m=m->alllink)
332                 n++;
333         ok = false;
334         if(n <= p.__count) {
335                 ok = true;
336                 r = (TRecord*)p.__values;
337                 for(m=first; m; m=m->alllink) {
338                         runtime_memmove(r->stk, m->createstack, sizeof r->stk);
339                         r++;
340                 }
341         }
342 }
343
344 func Stack(b Slice, all bool) (n int32) {
345         byte *pc, *sp;
346         
347         sp = runtime_getcallersp(&b);
348         pc = runtime_getcallerpc(&b);
349
350         if(all) {
351                 runtime_semacquire(&runtime_worldsema);
352                 runtime_m()->gcing = 1;
353                 runtime_stoptheworld();
354         }
355
356         if(b.__count == 0)
357                 n = 0;
358         else{
359                 G* g = runtime_g();
360                 g->writebuf = (byte*)b.__values;
361                 g->writenbuf = b.__count;
362                 USED(pc);
363                 USED(sp);
364                 // runtime_goroutineheader(g);
365                 // runtime_traceback(pc, sp, 0, g);
366                 // if(all)
367                 //      runtime_tracebackothers(g);
368                 n = b.__count - g->writenbuf;
369                 g->writebuf = nil;
370                 g->writenbuf = 0;
371         }
372         
373         if(all) {
374                 runtime_m()->gcing = 0;
375                 runtime_semrelease(&runtime_worldsema);
376                 runtime_starttheworld(false);
377         }
378 }
379
380 static void
381 saveg(byte *pc, byte *sp, G *g, TRecord *r)
382 {
383         int32 n;
384
385         USED(pc);
386         USED(sp);
387         USED(g);
388         // n = runtime_gentraceback(pc, sp, 0, g, 0, r->stk, nelem(r->stk));
389         n = 0;
390         if((size_t)n < nelem(r->stk))
391                 r->stk[n] = 0;
392 }
393
394 func GoroutineProfile(b Slice) (n int32, ok bool) {
395         byte *pc, *sp;
396         TRecord *r;
397         G *gp;
398         
399         sp = runtime_getcallersp(&b);
400         pc = runtime_getcallerpc(&b);
401         
402         ok = false;
403         n = runtime_gcount();
404         if(n <= b.__count) {
405                 runtime_semacquire(&runtime_worldsema);
406                 runtime_m()->gcing = 1;
407                 runtime_stoptheworld();
408
409                 n = runtime_gcount();
410                 if(n <= b.__count) {
411                         G* g = runtime_g();
412                         ok = true;
413                         r = (TRecord*)b.__values;
414                         saveg(pc, sp, g, r++);
415                         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
416                                 if(gp == g || gp->status == Gdead)
417                                         continue;
418                                 //saveg(gp->sched.pc, gp->sched.sp, gp, r++);
419                                 r++;
420                         }
421                 }
422         
423                 runtime_m()->gcing = 0;
424                 runtime_semrelease(&runtime_worldsema);
425                 runtime_starttheworld(false);
426         }
427 }
428