<br><font size=2 face="sans-serif">Here are the results from running this
test:</font>
<br>
<br><tt><font size=2>$ mpirun -np 1 -nofree -mode SMP ./build-tests/perf/dcmf/CS.cnk</font></tt>
<br><tt><font size=2>DCMF_THREAD_SINGLE: Called Enter/Exit 10000 times
at 78.0386 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_FUNNELED: Called Enter/Exit 10000 times
at 78.0054 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_SERIALIZED: Called Enter/Exit 10000 times
at 78.0029 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_MULTIPLE: Called Enter/Exit 10000 times
at 180.063 cycles each.</font></tt>
<br><tt><font size=2>$ mpirun -np 1 -nofree -mode DUAL ./build-tests/perf/dcmf/CS.cnk</font></tt>
<br><tt><font size=2>DCMF_THREAD_SINGLE: Called Enter/Exit 10000 times
at 78.0378 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_FUNNELED: Called Enter/Exit 10000 times
at 78.0032 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_SERIALIZED: Called Enter/Exit 10000 times
at 78.0019 cycles each.</font></tt>
<br><tt><font size=2>DCMF_THREAD_MULTIPLE: Called Enter/Exit 10000 times
at 196.044 cycles each.</font></tt>
<br>
<br><font size=2 face="sans-serif">While this is a doubling in the time
it takes to lock/unlock, that alone wouldn't drop the one process/thread
performance from 4.47 to 1 MMPS. We will look into it more after
we get the benchmark.</font>
<br>
<br>
<br><font size=2 face="sans-serif">Thanks,</font>
<br><font size=2 face="sans-serif">Joe Ratterman</font>
<br>
<br>
<br>
<br>
<br>
<table width=100%>
<tr valign=top>
<td width=40%><font size=1 face="sans-serif"><b>Joseph Ratterman/Rochester/IBM@IBMUS</b>
</font>
<p><font size=1 face="sans-serif">02/05/08 11:19 AM</font>
<td width=59%>
<table width=100%>
<tr valign=top>
<td>
<div align=right><font size=1 face="sans-serif">To</font></div>
<td><font size=1 face="sans-serif">DCMF <dcmf@lists.anl-external.org></font>
<tr valign=top>
<td>
<div align=right><font size=1 face="sans-serif">cc</font></div>
<td><font size=1 face="sans-serif">Joseph Ratterman/Rochester/IBM@IBMUS</font>
<tr valign=top>
<td>
<div align=right><font size=1 face="sans-serif">Subject</font></div>
<td><font size=1 face="sans-serif">[PATCH] This test will repeatedly call
the low-level critical-section functions for performance testing.</font></table>
<br>
<table>
<tr valign=top>
<td>
<td></table>
<br></table>
<br>
<br>
<br><tt><font size=2>This is helpful when trying to understand performance
degradations in MPI_THREAD_MULTIPLE.<br>
<br>
Signed-off-by: Joe Ratterman <jratt@us.ibm.com><br>
---<br>
sys/tests/perf/Makefile.in |
2 +-<br>
sys/tests/perf/dcmf/CS.c |
64 +++++++++++++++++++++++++++++++++<br>
sys/tests/perf/{ => dcmf}/Makefile.in | 4 +-<br>
3 files changed, 67 insertions(+), 3 deletions(-)<br>
create mode 100644 sys/tests/perf/dcmf/CS.c<br>
copy sys/tests/perf/{ => dcmf}/Makefile.in (96%)<br>
<br>
diff --git a/sys/tests/perf/Makefile.in b/sys/tests/perf/Makefile.in<br>
index 22a7ac7..4266989 100644<br>
--- a/sys/tests/perf/Makefile.in<br>
+++ b/sys/tests/perf/Makefile.in<br>
@@ -12,6 +12,6 @@<br>
# end_generated_IBM_copyright_prolog
#<br>
<br>
VPATH
=
@abs_srcdir@<br>
-SUBDIRS
=
mpi spi mpid<br>
+SUBDIRS
=
mpi spi mpid dcmf<br>
TESTS
=
<br>
include @abs_top_builddir@/Make.rules<br>
diff --git a/sys/tests/perf/dcmf/CS.c b/sys/tests/perf/dcmf/CS.c<br>
new file mode 100644<br>
index 0000000..080f5df<br>
--- /dev/null<br>
+++ b/sys/tests/perf/dcmf/CS.c<br>
@@ -0,0 +1,64 @@<br>
+/* begin_generated_IBM_copyright_prolog
*/<br>
+/*
*/<br>
+/* ---------------------------------------------------------------- */<br>
+/* (C)Copyright IBM Corp. 2007, 2008
*/<br>
+/* IBM CPL License
*/<br>
+/* ---------------------------------------------------------------- */<br>
+/*
*/<br>
+/* end_generated_IBM_copyright_prolog
*/<br>
+/**<br>
+ * \file perf/dcmf/CS.c<br>
+ * \brief Test the performance of the low-level critical-section functions<br>
+ */<br>
+<br>
+<br>
+#include <tests.h><br>
+#define NUM 10000<br>
+DCMF_Configure_t config;<br>
+<br>
+<br>
+double time_CS(uint32_t x)<br>
+{<br>
+ uint64_t start, stop;<br>
+ uint32_t i;<br>
+<br>
+ start = DCMF_Timebase();<br>
+ for (i=0; i<x; ++i) {<br>
+ DCMF_CriticalSection_enter(0);<br>
+ DCMF_CriticalSection_exit(0);<br>
+ }<br>
+ stop = DCMF_Timebase();<br>
+<br>
+ return (double)(stop-start)/(double)x;<br>
+}<br>
+<br>
+<br>
+#define time_run(c) time_run_long(c, #c)<br>
+void time_run_long(DCMF_Thread thread_level, char* thread_string)<br>
+{<br>
+ double time;<br>
+ DCMF_Result rc;<br>
+<br>
+ config.thread_level = thread_level;<br>
+ rc = DCMF_Messager_configure (&config, &config);<br>
+ assert(rc == DCMF_SUCCESS);<br>
+ assert(config.thread_level == thread_level);<br>
+ time = time_CS(NUM);<br>
+ printf("%s: Called Enter/Exit %u times at %g cycles each.\n",
thread_string, NUM, time);<br>
+}<br>
+<br>
+<br>
+int main()<br>
+{<br>
+ config.interrupts = DCMF_INTERRUPTS_OFF;<br>
+<br>
+ MPI_INIT;<br>
+<br>
+ time_run(DCMF_THREAD_SINGLE);<br>
+ time_run(DCMF_THREAD_FUNNELED);<br>
+ time_run(DCMF_THREAD_SERIALIZED);<br>
+ time_run(DCMF_THREAD_MULTIPLE);<br>
+<br>
+ MPI_FINALIZE;<br>
+ return (0);<br>
+}<br>
diff --git a/sys/tests/perf/Makefile.in b/sys/tests/perf/dcmf/Makefile.in<br>
similarity index 96%<br>
copy from sys/tests/perf/Makefile.in<br>
copy to sys/tests/perf/dcmf/Makefile.in<br>
index 22a7ac7..4c474b6 100644<br>
--- a/sys/tests/perf/Makefile.in<br>
+++ b/sys/tests/perf/dcmf/Makefile.in<br>
@@ -12,6 +12,6 @@<br>
# end_generated_IBM_copyright_prolog
#<br>
<br>
VPATH
=
@abs_srcdir@<br>
-SUBDIRS
=
mpi spi mpid<br>
-TESTS
=
<br>
+SUBDIRS
=
<br>
+TESTS
=
CS.c<br>
include @abs_top_builddir@/Make.rules<br>
-- <br>
1.5.4<br>
<br>
</font></tt>
<br>