WL#4975: Improvement of InnoDB spinloop

Affects: Server-5.4   —   Status: Complete

Improvement of InnoDB spinloop and modified default values of spin values.
In MySQL 5.4.0
Add a PAUSE instruction to the SPIN loop to decrease the amount of resources 
used by the CPU-thread that is spinning, to make it possible for the CPU-threads
that do valuable work to progress with greater speed.

This will be implemented on x86 by using the PAUSE instruction, on SPARC 
the Compare and Swap instruction will be used instead in light of a research 
paper on this topic on CMT boxes.

For further details, see:
http://mikaelronstrom.blogspot.com/2009/04/mysql-54-patches-improvements-to-spin.html


Added a PAUSE instruction to the SPIN loop
  according to recommendation from Intel.
  Minor modifications to spin values which showed to be slightly better.
  Fix of ifdef error
  Yet another idef error
  Fix compiler error
  Fixed bug
  Use AC_TRY_RUN instead of AC_TRY_LINK when checking for x86 PAUSE instruction
support, due to
http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684 .
  Added support for rep; nop replacing PAUSE due to Solaris bug
  Replaced abs_top_srcdir with top_srcdir, not sure it's an
  improvement but at least it's known that abs_top_srcdir
  in cases have a problem and this is a more common variable
  to use for the same purpose.

These patches implement the InnoDB spinloop improvements.


=== modified file 'configure.in'
--- configure.in	2008-12-05 18:43:45 +0000
+++ configure.in	2008-12-10 13:02:52 +0000
@@ -2342,6 +2342,17 @@ fi
 fi
 #---END:
 
+#Check for x86 PAUSE instruction
+AC_MSG_CHECKING("for x86 PAUSE instruction")
+AC_TRY_COMPILE(
+[],
+[{__asm__ __volatile__ ("pause");}],
+x86_pause_exists=yes, x86_pause_exists=no)
+if test "$x86_pause_exists" = "yes"
+then
+  AC_DEFINE([HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
+fi
+
 # Check if pthread_attr_setscope() exists
 AC_CACHE_CHECK("for pthread_attr_setscope", mysql_cv_pthread_attr_setscope,
 AC_TRY_LINK(

=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-10-16 18:45:12 +0000
+++ storage/innobase/include/ut0ut.h	2008-12-10 13:02:52 +0000
@@ -17,6 +17,19 @@ Created 1/20/1994 Heikki Tuuri
 
 typedef time_t	ib_time_t;
 
+#ifdef HAVE_PAUSE_INSTRUCTION
+#define PAUSE_INSTRUCTION(volatile_var) {__asm__ __volatile__ ("pause");}
+#elif
+#ifdef UNIV_SYNC_ATOMIC
+#define PAUSE_INSTRUCTION(volatile_var) \
+  { \
+    os_compare_and_swap(volatile_var, 0, 1); \
+  }
+#elif
+#define PAUSE_INSTRUCTION(volatile_var)
+#endif
+#endif
+
 /************************************************************
 Gets the high 32 bits in a ulint. That is makes a shift >> 32,
 but since there seem to be compiler bugs in both gcc and Visual C++,

=== modified file 'storage/innobase/ut/ut0ut.c'
--- storage/innobase/ut/ut0ut.c	2008-10-16 18:45:12 +0000
+++ storage/innobase/ut/ut0ut.c	2008-12-10 13:02:52 +0000
@@ -336,11 +336,13 @@ ut_delay(
 	ulint	delay)	/* in: delay in microseconds on 100 MHz Pentium */
 {
 	ulint	i, j;
+        volatile lint volatile_var;
 
 	j = 0;
 
 	for (i = 0; i < delay * 50; i++) {
 		j += i;
+                PAUSE_INSTRUCTION(&volatile_var);
 	}
 
 	if (ut_always_false) {


=== modified file 'storage/innobase/srv/srv0srv.c'
--- storage/innobase/srv/srv0srv.c	2008-12-05 13:08:28 +0000
+++ storage/innobase/srv/srv0srv.c	2008-12-10 17:54:23 +0000
@@ -352,10 +352,10 @@ ibool	srv_use_awe			= FALSE;
 ibool	srv_use_adaptive_hash_indexes	= TRUE;
 
 /*-------------------------------------------*/
-ulong	srv_n_spin_wait_rounds	= 20;
+ulong	srv_n_spin_wait_rounds	= 30;
 ulong	srv_n_free_tickets_to_enter = 500;
 ulong	srv_thread_sleep_delay = 10000;
-ulint	srv_spin_wait_delay	= 5;
+ulint	srv_spin_wait_delay	= 6;
 ibool	srv_priority_boost	= TRUE;
 
 ibool	srv_print_thread_releases	= FALSE;


=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-12-10 13:02:52 +0000
+++ storage/innobase/include/ut0ut.h	2008-12-12 21:39:12 +0000
@@ -25,7 +25,7 @@ typedef time_t	ib_time_t;
   { \
     os_compare_and_swap(volatile_var, 0, 1); \
   }
-#elif
+#else
 #define PAUSE_INSTRUCTION(volatile_var)
 #endif
 #endif


=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-12-12 21:39:12 +0000
+++ storage/innobase/include/ut0ut.h	2008-12-12 22:44:39 +0000
@@ -19,7 +19,7 @@ typedef time_t	ib_time_t;
 
 #ifdef HAVE_PAUSE_INSTRUCTION
 #define PAUSE_INSTRUCTION(volatile_var) {__asm__ __volatile__ ("pause");}
-#elif
+#else
 #ifdef UNIV_SYNC_ATOMIC
 #define PAUSE_INSTRUCTION(volatile_var) \
   { \


=== modified file 'configure.in'
--- configure.in	2008-12-20 15:15:46 +0000
+++ configure.in	2009-02-01 01:34:39 +0000
@@ -2357,10 +2357,20 @@ fi
 
 #Check for x86 PAUSE instruction
 AC_MSG_CHECKING("for x86 PAUSE instruction")
-AC_TRY_COMPILE(
-[],
-[{__asm__ __volatile__ ("pause");}],
-x86_pause_exists=yes, x86_pause_exists=no)
+# We have to actually try running the test program, because of a bug
+# in Solaris on x86_64, where it wrongly reports that PAUSE is not
+# supported when trying to run an application.  See
+# http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
+AC_TRY_RUN([
+    int main() {
+      __asm__ __volatile__ ("pause");
+      return 0;
+    }
+  ],
+  [x86_pause_exists=yes],
+  [x86_pause_exists=no],
+  [x86_pause_exists=no]  # Cross-compile, assume no PAUSE instruction
+)
 if test "$x86_pause_exists" = "yes"
 then
   AC_DEFINE([HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])


=== modified file 'configure.in'
--- configure.in	2009-02-01 01:34:39 +0000
+++ configure.in	2009-02-03 11:46:52 +0000
@@ -2371,9 +2371,24 @@ AC_TRY_RUN([
   [x86_pause_exists=no],
   [x86_pause_exists=no]  # Cross-compile, assume no PAUSE instruction
 )
+AC_TRY_RUN([
+    int main() {
+      __asm__ __volatile__ ("rep; nop");
+      return 0;
+    }
+  ],
+  [x86_fake_pause_exists=yes],
+  [x86_fake_pause_exists=no],
+  [x86_fake_pause_exists=no]  # Cross-compile, assume no x86 NOP instruction
+)
 if test "$x86_pause_exists" = "yes"
 then
   AC_DEFINE([HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
+else
+  if test "$x86_fake_pause_exists" = "yes"
+  then
+    AC_DEFINE([HAVE_FAKE_PAUSE_INSTRUCTION], [1], [Does x86 NOP instruction exist])
+  fi
 fi
 
 # Check if pthread_attr_setscope() exists

=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-12-15 10:19:47 +0000
+++ storage/innobase/include/ut0ut.h	2009-02-03 11:46:52 +0000
@@ -20,6 +20,9 @@ typedef time_t	ib_time_t;
 #ifdef HAVE_PAUSE_INSTRUCTION
 #define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");}
 #else
+#ifdef HAVE_FAKE_PAUSE_INSTRUCTION
+#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");}
+#else
 #ifdef UNIV_SYNC_ATOMIC
 #define PAUSE_INSTRUCTION() \
   { \
@@ -30,6 +33,7 @@ typedef time_t	ib_time_t;
 #define PAUSE_INSTRUCTION()
 #endif
 #endif
+#endif
 
 /************************************************************
 Gets the high 32 bits in a ulint. That is makes a shift >> 32,


=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-12-15 08:44:00 +0000
+++ storage/innobase/include/ut0ut.h	2008-12-15 09:05:35 +0000
@@ -23,7 +23,7 @@ typedef time_t	ib_time_t;
 #ifdef UNIV_SYNC_ATOMIC
 #define PAUSE_INSTRUCTION() \
   { \
-    volatile lint volatile_var;
+    volatile lint volatile_var; \
     os_compare_and_swap(volatile_var, 0, 1); \
   }
 #else


=== modified file 'storage/innobase/include/ut0ut.h'
--- storage/innobase/include/ut0ut.h	2008-12-15 09:05:35 +0000
+++ storage/innobase/include/ut0ut.h	2008-12-15 10:19:47 +0000
@@ -24,7 +24,7 @@ typedef time_t	ib_time_t;
 #define PAUSE_INSTRUCTION() \
   { \
     volatile lint volatile_var; \
-    os_compare_and_swap(volatile_var, 0, 1); \
+    os_compare_and_swap(&volatile_var, 0, 1); \
   }
 #else
 #define PAUSE_INSTRUCTION()