1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to
18 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA. */
23 #include "coretypes.h"
27 const char *host_detect_local_cpu (int argc, const char **argv);
30 #define cpuid(num,a,b,c,d) \
31 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" \
32 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
35 #define bit_CMPXCHG8B (1 << 8)
36 #define bit_CMOV (1 << 15)
37 #define bit_MMX (1 << 23)
38 #define bit_SSE (1 << 25)
39 #define bit_SSE2 (1 << 26)
41 #define bit_SSE3 (1 << 0)
42 #define bit_SSSE3 (1 << 9)
43 #define bit_SSE4a (1 << 6)
44 #define bit_CMPXCHG16B (1 << 13)
46 #define bit_LAHF_LM (1 << 0)
47 #define bit_3DNOW (1 << 31)
48 #define bit_3DNOWP (1 << 30)
49 #define bit_LM (1 << 29)
51 /* Returns parameters that describe L1_ASSOC associative cache of size
52 L1_SIZEKB with lines of size L1_LINE. */
55 describe_cache (unsigned l1_sizekb, unsigned l1_line,
56 unsigned l1_assoc ATTRIBUTE_UNUSED)
58 char size[1000], line[1000];
59 unsigned size_in_lines;
61 /* At the moment, gcc middle-end does not use the information about the
62 associativity of the cache. */
64 size_in_lines = (l1_sizekb * 1024) / l1_line;
66 sprintf (size, "--param l1-cache-size=%u", size_in_lines);
67 sprintf (line, "--param l1-cache-line-size=%u", l1_line);
69 return concat (size, " ", line, " ", NULL);
72 /* Returns the description of caches for an AMD processor. */
75 detect_caches_amd (unsigned max_ext_level)
77 unsigned eax, ebx, ecx, edx;
78 unsigned l1_sizekb, l1_line, l1_assoc;
80 if (max_ext_level < 0x80000005)
83 cpuid (0x80000005, eax, ebx, ecx, edx);
86 l1_sizekb = (ecx >> 24) & 0xff;
87 l1_assoc = (ecx >> 16) & 0xff;
89 return describe_cache (l1_sizekb, l1_line, l1_assoc);
92 /* Stores the size of the L1 cache and cache line, and the associativity
93 of the cache according to REG to L1_SIZEKB, L1_LINE and L1_ASSOC. */
96 decode_caches_intel (unsigned reg, unsigned *l1_sizekb, unsigned *l1_line,
101 if (((reg >> 31) & 1) != 0)
104 for (i = 0; i < 4; i++)
153 /* Returns the description of caches for an intel processor. */
156 detect_caches_intel (unsigned max_level)
158 unsigned eax, ebx, ecx, edx;
159 unsigned l1_sizekb = 0, l1_line = 0, assoc = 0;
164 cpuid (2, eax, ebx, ecx, edx);
166 decode_caches_intel (eax, &l1_sizekb, &l1_line, &assoc);
167 decode_caches_intel (ebx, &l1_sizekb, &l1_line, &assoc);
168 decode_caches_intel (ecx, &l1_sizekb, &l1_line, &assoc);
169 decode_caches_intel (edx, &l1_sizekb, &l1_line, &assoc);
173 return describe_cache (l1_sizekb, l1_line, assoc);
176 /* This will be called by the spec parser in gcc.c when it sees
177 a %:local_cpu_detect(args) construct. Currently it will be called
178 with either "arch" or "tune" as argument depending on if -march=native
179 or -mtune=native is to be substituted.
181 It returns a string containing new command line parameters to be
182 put at the place of the above two options, depending on what CPU
183 this is executed. E.g. "-march=k8" on an AMD64 machine
186 ARGC and ARGV are set depending on the actual arguments given
188 const char *host_detect_local_cpu (int argc, const char **argv)
190 const char *cpu = NULL;
191 const char *cache = "";
192 const char *options = "";
193 enum processor_type processor = PROCESSOR_I386;
194 unsigned int eax, ebx, ecx, edx;
195 unsigned int max_level;
197 unsigned int ext_level;
198 unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0;
199 unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0;
200 unsigned char has_cmpxchg16b = 0, has_lahf_lm = 0;
201 unsigned char has_longmode = 0, has_cmpxchg8b = 0, has_sse4a = 0;
202 unsigned char is_amd = 0;
203 unsigned int family = 0;
209 arch = strcmp (argv[0], "arch") == 0;
210 if (!arch && strcmp (argv[0], "tune"))
214 /* See if we can use cpuid. */
215 asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
216 "pushl %0; popfl; pushfl; popl %0; popfl"
217 : "=&r" (eax), "=&r" (ebx)
220 if (((eax ^ ebx) & 0x00200000) == 0)
224 processor = PROCESSOR_PENTIUM;
226 /* Check the highest input value for eax. */
227 cpuid (0, eax, ebx, ecx, edx);
229 /* We only look at the first four characters. */
234 cpuid (1, eax, ebx, ecx, edx);
235 has_cmpxchg8b = !!(edx & bit_CMPXCHG8B);
236 has_cmov = !!(edx & bit_CMOV);
237 has_mmx = !!(edx & bit_MMX);
238 has_sse = !!(edx & bit_SSE);
239 has_sse2 = !!(edx & bit_SSE2);
240 has_sse3 = !!(ecx & bit_SSE3);
241 has_ssse3 = !!(ecx & bit_SSSE3);
242 has_cmpxchg16b = !!(ecx & bit_CMPXCHG16B);
243 /* We don't care for extended family. */
244 family = (eax >> 8) & ~(1 << 4);
246 cpuid (0x80000000, eax, ebx, ecx, edx);
248 if (ext_level >= 0x80000000)
250 cpuid (0x80000001, eax, ebx, ecx, edx);
251 has_lahf_lm = !!(ecx & bit_LAHF_LM);
252 has_3dnow = !!(edx & bit_3DNOW);
253 has_3dnowp = !!(edx & bit_3DNOWP);
254 has_longmode = !!(edx & bit_LM);
255 has_sse4a = !!(ecx & bit_SSE4a);
258 is_amd = vendor == *(unsigned int*)"Auth";
263 cache = detect_caches_amd (ext_level);
264 else if (vendor == *(unsigned int*)"Genu")
265 cache = detect_caches_intel (max_level);
271 processor = PROCESSOR_K6;
273 processor = PROCESSOR_ATHLON;
274 if (has_sse2 || has_longmode)
275 processor = PROCESSOR_K8;
277 processor = PROCESSOR_AMDFAM10;
284 /* Default is PROCESSOR_PENTIUM. */
287 processor = PROCESSOR_PENTIUMPRO;
290 processor = PROCESSOR_PENTIUM4;
293 /* We have no idea. Use something reasonable. */
311 else if (has_cmpxchg8b)
331 case PROCESSOR_PENTIUM:
337 case PROCESSOR_PENTIUMPRO:
340 /* It is Core 2 Duo. */
349 /* It is Core Duo. */
354 /* It is Pentium M. */
359 /* It is Pentium III. */
364 /* It is Pentium II. */
369 /* Default to Pentium Pro. */
375 /* For -mtune, we default to -mtune=generic. */
380 case PROCESSOR_GEODE:
389 case PROCESSOR_ATHLON:
395 case PROCESSOR_PENTIUM4:
409 case PROCESSOR_NOCONA:
412 case PROCESSOR_AMDFAM10:
415 case PROCESSOR_GENERIC32:
416 case PROCESSOR_GENERIC64:
427 options = concat (options, "-mcx16 ", NULL);
429 options = concat (options, "-msahf ", NULL);
433 return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
436 /* If we aren't compiling with GCC we just provide a minimal
438 const char *host_detect_local_cpu (int argc, const char **argv)
446 arch = strcmp (argv[0], "arch") == 0;
447 if (!arch && strcmp (argv[0], "tune"))
452 /* FIXME: i386 is wrong for 64bit compiler. How can we tell if
453 we are generating 64bit or 32bit code? */
459 return concat ("-m", argv[0], "=", cpu, NULL);
461 #endif /* GCC_VERSION */