/* At the moment, gcc does not use the information
about the associativity of the cache. */
- sprintf (size, "--param l1-cache-size=%u", level1.sizekb);
- sprintf (line, "--param l1-cache-line-size=%u", level1.line);
+ snprintf (size, sizeof (size),
+ "--param l1-cache-size=%u ", level1.sizekb);
+ snprintf (line, sizeof (line),
+ "--param l1-cache-line-size=%u ", level1.line);
- sprintf (size2, "--param l2-cache-size=%u", level2.sizekb);
+ snprintf (size2, sizeof (size2),
+ "--param l2-cache-size=%u ", level2.sizekb);
- return concat (size, " ", line, " ", size2, " ", NULL);
+ return concat (size, line, size2, NULL);
}
/* Detect L2 cache parameters using CPUID extended function 0x80000006. */
};
static void
-detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2)
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
+ struct cache_desc *level3)
{
struct cache_desc *cache;
case 2:
cache = level2;
break;
+ case 3:
+ cache = level3;
+ break;
default:
cache = NULL;
}
cache->sizekb = (cache->assoc * part
* cache->line * sets) / 1024;
- }
+ }
}
default:
break;
/* Returns the description of caches for an Intel processor. */
static const char *
-detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level)
+detect_caches_intel (bool xeon_mp, unsigned max_level,
+ unsigned max_ext_level, unsigned *l2sizekb)
{
- struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0};
+ struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
if (max_level >= 4)
- detect_caches_cpuid4 (&level1, &level2);
+ detect_caches_cpuid4 (&level1, &level2, &level3);
else if (max_level >= 2)
detect_caches_cpuid2 (xeon_mp, &level1, &level2);
else
if (level1.sizekb == 0)
return "";
+ /* Let the L3 replace the L2. This assumes inclusive caches
+ and single threaded program for now. */
+ if (level3.sizekb)
+ level2 = level3;
+
/* Intel CPUs are equipped with AMD style L2 cache info. Try this
method if other methods fail to provide L2 cache parameters. */
if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
detect_l2_cache (&level2);
+ *l2sizekb = level2.sizekb;
+
return describe_cache (level1, level2);
}
enum vendor_signatures
{
SIG_INTEL = 0x756e6547 /* Genu */,
- SIG_AMD = 0x68747541 /* Auth */,
+ SIG_AMD = 0x68747541 /* Auth */
+};
+
+enum processor_signatures
+{
SIG_GEODE = 0x646f6547 /* Geod */
};
/* Extended features */
unsigned int has_lahf_lm = 0, has_sse4a = 0;
unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
+ unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
+ unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
+ unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
bool arch;
+ unsigned int l2sizekb = 0;
+
if (argc < 1)
return NULL;
__cpuid (1, eax, ebx, ecx, edx);
- /* We don't care for extended family. */
model = (eax >> 4) & 0x0f;
family = (eax >> 8) & 0x0f;
+ if (vendor == SIG_INTEL)
+ {
+ unsigned int extended_model, extended_family;
+
+ extended_model = (eax >> 12) & 0xf0;
+ extended_family = (eax >> 20) & 0xff;
+ if (family == 0x0f)
+ {
+ family += extended_family;
+ model += extended_model;
+ }
+ else if (family == 0x06)
+ model += extended_model;
+ }
has_sse3 = ecx & bit_SSE3;
has_ssse3 = ecx & bit_SSSE3;
+ has_sse4_1 = ecx & bit_SSE4_1;
+ has_sse4_2 = ecx & bit_SSE4_2;
+ has_avx = ecx & bit_AVX;
has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+ has_movbe = ecx & bit_MOVBE;
+ has_popcnt = ecx & bit_POPCNT;
+ has_aes = ecx & bit_AES;
+ has_pclmul = ecx & bit_PCLMUL;
has_cmpxchg8b = edx & bit_CMPXCHG8B;
has_cmov = edx & bit_CMOV;
has_lahf_lm = ecx & bit_LAHF_LM;
has_sse4a = ecx & bit_SSE4a;
+ has_abm = ecx & bit_ABM;
+ has_lwp = ecx & bit_LWP;
has_longmode = edx & bit_LM;
has_3dnowp = edx & bit_3DNOWP;
else if (vendor == SIG_INTEL)
{
bool xeon_mp = (family == 15 && model == 6);
- cache = detect_caches_intel (xeon_mp, max_level, ext_level);
+ cache = detect_caches_intel (xeon_mp, max_level,
+ ext_level, &l2sizekb);
}
}
if (vendor == SIG_AMD)
{
- processor = PROCESSOR_PENTIUM;
+ unsigned int name;
- if (has_mmx)
- processor = PROCESSOR_K6;
- if (has_3dnowp)
- processor = PROCESSOR_ATHLON;
- if (has_sse2 || has_longmode)
- processor = PROCESSOR_K8;
- if (has_sse4a)
+ /* Detect geode processor by its processor signature. */
+ if (ext_level > 0x80000001)
+ __cpuid (0x80000002, name, ebx, ecx, edx);
+ else
+ name = 0;
+
+ if (name == SIG_GEODE)
+ processor = PROCESSOR_GEODE;
+ else if (has_sse4a)
processor = PROCESSOR_AMDFAM10;
+ else if (has_sse2 || has_longmode)
+ processor = PROCESSOR_K8;
+ else if (has_3dnowp)
+ processor = PROCESSOR_ATHLON;
+ else if (has_mmx)
+ processor = PROCESSOR_K6;
+ else
+ processor = PROCESSOR_PENTIUM;
}
- else if (vendor == SIG_GEODE)
- processor = PROCESSOR_GEODE;
else
{
switch (family)
cpu = "pentium";
break;
case PROCESSOR_PENTIUMPRO:
- if (has_longmode)
- /* It is Core 2 Duo. */
+ if (model == 28)
+ cpu = "atom";
+ else if (model >= 28 && l2sizekb < 2048)
+ /* Assume it's a small core if there's less than 2MB cache */
+ cpu = "atom";
+ else if (has_longmode)
cpu = "core2";
else if (arch)
{
if (has_sse3)
/* It is Core Duo. */
- cpu = "prescott";
+ cpu = "pentium-m";
else if (has_sse2)
/* It is Pentium M. */
cpu = "pentium-m";
if (arch)
{
if (has_cmpxchg16b)
- options = concat (options, "-mcx16 ", NULL);
+ options = concat (options, " -mcx16", NULL);
if (has_lahf_lm)
- options = concat (options, "-msahf ", NULL);
+ options = concat (options, " -msahf", NULL);
+ if (has_movbe)
+ options = concat (options, " -mmovbe", NULL);
+ if (has_aes)
+ options = concat (options, " -maes", NULL);
+ if (has_pclmul)
+ options = concat (options, " -mpclmul", NULL);
+ if (has_popcnt)
+ options = concat (options, " -mpopcnt", NULL);
+ if (has_abm)
+ options = concat (options, " -mabm", NULL);
+ if (has_lwp)
+ options = concat (options, " -mlwp", NULL);
+
+ if (has_avx)
+ options = concat (options, " -mavx", NULL);
+ else if (has_sse4_2)
+ options = concat (options, " -msse4.2", NULL);
+ else if (has_sse4_1)
+ options = concat (options, " -msse4.1", NULL);
}
done:
- return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
+ return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
}
#else
-/* If we aren't compiling with GCC we just provide a minimal
- default value. */
+/* If we aren't compiling with GCC then the driver will just ignore
+ -march and -mtune "native" target and will leave to the newly
+ built compiler to generate code for its default target. */
-const char *host_detect_local_cpu (int argc, const char **argv)
+const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED,
+ const char **argv ATTRIBUTE_UNUSED)
{
- const char *cpu;
- bool arch;
-
- if (argc < 1)
- return NULL;
-
- arch = !strcmp (argv[0], "arch");
-
- if (!arch && strcmp (argv[0], "tune"))
- return NULL;
-
- if (arch)
- {
- /* FIXME: i386 is wrong for 64bit compiler. How can we tell if
- we are generating 64bit or 32bit code? */
- cpu = "i386";
- }
- else
- cpu = "generic";
-
- return concat ("-m", argv[0], "=", cpu, NULL);
+ return NULL;
}
#endif /* __GNUC__ */