2011-11-06 François Dumont <fdumont@gcc.gnu.org>

author fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>

Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)

committer fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>

Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)
author fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>
Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)
committer fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>
Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index dd1bd56..b04f1a9 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,9 @@
+2011-11-06  François Dumont <fdumont@gcc.gnu.org>
+
+       * testsuite/performance/23_containers/insert_erase/41975.cc: Add
+       tests to check performance with or without cache of hash code and with
+       string type that has a costlier hash functor than int type.
+
  2011-11-06  Benjamin Kosnik  <bkoz@redhat.com>
             Andrew MacLeod  <amacleod@redhat.com>
  
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc

index 30fc105..a5dae41 100644 (file)
--- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
@@ -17,56 +17,167 @@
  // with this library; see the file COPYING3.  If not see
  // <http://www.gnu.org/licenses/>.
  
-#include <cassert>
+#include <sstream>
  #include <unordered_set>
  #include <testsuite_performance.h>
  
-int main()
+namespace
  {
-  using namespace __gnu_test;
+  // Bench using an unordered_set<int>. Hash functor for int is quite
+  // predictable so it helps bench very specific use cases.
+  template<bool use_cache>
+    void bench()
+    {
+      using namespace __gnu_test;
+      std::ostringstream ostr;
+      ostr << "unordered_set<int> " << (use_cache ? "with" : "without")
+          << " cache";
+      const std::string desc = ostr.str();
+
+      time_counter time;
+      resource_counter resource;
+
+      const int nb = 200000;
+      start_counters(time, resource);
+
+      std::__unordered_set<int, std::hash<int>, std::equal_to<int>,
+                          std::allocator<int>, use_cache> us;
+      for (int i = 0; i != nb; ++i)
+       us.insert(i);
+
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": first insert";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+      start_counters(time, resource);
+
+      // Here is the worst erase use case when hashtable implementation was
+      // something like vector<forward_list<>>. Erasing from the end was very
+      // costly because we need to return the iterator following the erased
+      // one, as the hashtable is getting emptier at each step there are
+      // more and more empty bucket to loop through to reach the end of the
+      // container and find out that it was in fact the last element.
+      for (int j = nb - 1; j >= 0; --j)
+       {
+         auto it = us.find(j);
+         if (it != us.end())
+           us.erase(it);
+       }
  
-  time_counter time;
-  resource_counter resource;
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": erase from iterator";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
  
-  start_counters(time, resource);
+      start_counters(time, resource);
  
-  std::unordered_set<int> us;
-  for (int i = 0; i != 5000000; ++i)
-    us.insert(i);
+      // This is a worst insertion use case for the current implementation as
+      // we insert an element at the begining of the hashtable and then we
+      // insert starting at the end so that each time we need to seek up to the
+      // first bucket to find the first non-empty one.
+      us.insert(0);
+      for (int i = nb - 1; i >= 0; --i)
+       us.insert(i);
  
-  stop_counters(time, resource);
-  report_performance(__FILE__, "Container generation", time, resource);
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": second insert";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
  
-  start_counters(time, resource);
+      start_counters(time, resource);
  
-  for (int j = 100; j != 0; --j)
+      for (int j = nb - 1; j >= 0; --j)
+       us.erase(j);
+
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": erase from key";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
+    }
+
+  // Bench using unordered_set<string> that show how important it is to cache
+  // hash code as computing string hash code is quite expensive compared to
+  // computing it for int.
+  template<bool use_cache>
+    void bench_str()
      {
-      auto it = us.begin();
-      while (it != us.end())
+      using namespace __gnu_test;
+      std::ostringstream ostr;
+      ostr << "unordered_set<string> " << (use_cache ? "with" : "without")
+          << " cache";
+      const std::string desc = ostr.str();
+
+      time_counter time;
+      resource_counter resource;
+
+      const int nb = 200000;
+      // First generate once strings that are going to be used throughout the
+      // bench:
+      std::vector<std::string> strs;
+      strs.reserve(nb);
+      for (int i = 0; i != nb; ++i)
+      {
+       ostr.str("");
+       ostr << "string #" << i;
+       strs.push_back(ostr.str());
+      }
+
+      start_counters(time, resource);
+
+      std::__unordered_set<std::string, std::hash<std::string>,
+                          std::equal_to<std::string>,
+                          std::allocator<std::string>, use_cache> us;
+      for (int i = 0; i != nb; ++i)
+       us.insert(strs[i]);
+
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": first insert";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+      start_counters(time, resource);
+
+      for (int j = nb - 1; j >= 0; --j)
         {
-         if ((*it % j) == 0)
-           it = us.erase(it);
-         else
-           ++it;
+         auto it = us.find(strs[j]);
+         if (it != us.end())
+           us.erase(it);
         }
-    }
  
-  stop_counters(time, resource);
-  report_performance(__FILE__, "Container erase", time, resource);
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": erase from iterator";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
  
-  start_counters(time, resource);
+      start_counters(time, resource);
  
-  us.insert(0);
+      us.insert(strs[0]);
+      for (int i = nb - 1; i >= 0; --i)
+       us.insert(strs[i]);
  
-  for (int i = 0; i != 500; ++i)
-    {
-      auto it = us.begin();
-      ++it;
-      assert( it == us.end() );
-    }
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": second insert";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+      start_counters(time, resource);
+
+      for (int j = nb - 1; j >= 0; --j)
+       us.erase(strs[j]);
  
-  stop_counters(time, resource);
-  report_performance(__FILE__, "Container iteration", time, resource);
+      stop_counters(time, resource);
+      ostr.str("");
+      ostr << desc << ": erase from key";
+      report_performance(__FILE__, ostr.str().c_str(), time, resource);
+    }
+}
  
+int main()
+{
+  bench<false>();
+  bench<true>();
+  bench_str<false>();
+  bench_str<true>();
    return 0;
  }
author	fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>
	Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)
committer	fdumont <fdumont@138bc75d-0d04-0410-961f-82ee72b054a4>
	Sun, 6 Nov 2011 17:16:00 +0000 (17:16 +0000)
libstdc++-v3/ChangeLog		patch \| blob \| history
libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc		patch \| blob \| history