# Invocation command line: # /root/work/cpu2006-amd421gh/bin/runspec -c amd421gh.cfg -F amd421gh-flags.xml --rate 8 -T all fp # output_root was not used for this run ############################################################################ # AMD64 SPEC CPU2006 Configuration File for 64-bit Linux (Quad-Core) # # Compiler name/version: PGI 7.2, Pathscale 3.2 # Operating system version: 64-bit SLES10 SP1 # Hardware: AMD Opteron (Quad-core) # FP Base Pointer Size: 64-bit only # FP Peak Pointer Size: 32/64-bit # INT Base Pointer Size: 32/64-bit # INT Peak Pointer Size: 32/64-bit # Auto Parallization: Not Used # # Important! Please run with your stack size set to 'unlimited'. # Failure to do so may cause 483.xalancbmk to get a stack overflow during execution. # Using csh: unlimit # Using bash: ulimit -s unlimited # # Set your LD_LIBRARY_PATH to the location of the dependency runtime libraries # # Please adjust the SMARTHEAP_DIR variable to the directory containing the Smartheap library. ##################################################################### # Macro section #################################################################### #################PGI MACRO notes # Modify this section to use the appropriate architecture flags %define pgi_tp64 -tp barcelona-64 %define pgi_tp32 -tp barcelona # Change this to 896 (Huge Pages=hp) for systems with 4GB of memory per copy %define pgi_hp 150 %define pgi_sys_hp 1200 # Adjust the build jobs to the number of concurrent build processses %define build_jobs 4 ##################PATHSCALE MACRO notes # If you are building and running on a Linux distro that # uses the GCC v3.x compilers by default, then you need to either # use "--define gnu3_fe" on the runspec command line or uncomment # the "%define gnu3_fe" line below: # # %define gnu4_fe # %define gnu3_fe # # gnu4_fe is the default, so really does not need to be defined. ##################################################################### # Header Section ##################################################################### ext = amd421ghb ignore_errors = no tune = base,peak output_format = asc,pdf,raw,flags,cfg,html,csv flagsusrl = ./amd421gh-flags.xml size = test,train,ref check_md5 = yes reportable = yes env_vars = no teeout = yes teerunout = yes mean_anyway = yes makeflags = -j %{build_jobs} #################################################################### # Include file containing the SUT hardware information # as well as the submit command, tester information and notes #################################################################### #include: AMD421sut.inc # ----- Begin inclusion of 'AMD421sut.inc' ############################################################################ ##################################################################### # Submit Section ##################################################################### # RATE #This config file is set to run these binaries on a 2P, 8 core system. If this #does not match the description of your system, change this config file, #specifically "bind0","bind1","bind2", etc. to match your system. Use #"man numactl" and "numactl --hardware" to better understand how to use #this command. bind0 = numactl -m 0 --physcpubind=0 bind1 = numactl -m 1 --physcpubind=1 bind2 = numactl -m 0 --physcpubind=2 bind3 = numactl -m 1 --physcpubind=3 bind4 = numactl -m 0 --physcpubind=4 bind5 = numactl -m 1 --physcpubind=5 bind6 = numactl -m 0 --physcpubind=6 bind7 = numactl -m 1 --physcpubind=7 # SPEED # Below is an example numactl command for a auto-parallel speed run using # 4 threads on a single Quad-core chip. # Please adjust as needed for your system # # bind0 = numactl -l --physcpubind=0,1,2,3 use_submit_for_speed = 1 submit = echo "$command" > run.sh ; $BIND bash run.sh #################################################################### # Tester information #################################################################### license_num = 11 prepared_by = IBM Corporation tester = Advanced Micro Devices test_sponsor = IBM Corporation test_date = Jun-2008 hw_vendor = IBM Corporation hw_model = IBM System x3655 (AMD Opteron 2344 HE) #################################################################### # Hardware information #################################################################### default=default=default=default: hw_avail = Aug-2008 hw_cpu_name = AMD Opteron 2344 HE hw_cpu_mhz = 1700 hw_fpu = Integrated hw_nchips = 2 hw_ncores = 8 hw_ncoresperchip = 4 hw_nthreadspercore = 1 hw_ncpuorder = 1,2 chips hw_pcache = 64 KB I + 64 KB D on chip per core hw_scache = 512 KB I+D on chip per core hw_tcache = 2 MB I+D on chip per chip hw_ocache = None hw_disk = 1 x 73.4 GB SAS, 15000 RPM hw_memory = 16 GB (8 x 2 GB, DDR2-667 CL5 Reg Dual Rank) hw_other = None sw_file = ext3 sw_os000 = SuSE Linux Enterprise Server 10 (x86_64) SP1, sw_os001 = Kernel 2.6.16.46-0.12-smp #sw_os000 = Red Hat Enterprise Linux Server release 5.1, #sw_os001 = Kernel 2.6.18-53.el5 sw_state = Run level 3 (Full multiuser with network) ##################################################################### # Notes ##################################################################### notes_os_000 = 'numactl' was used to bind copies to the cores notes_os_005 = Environment variable PGI_HUGE_PAGES set to 150 #notes_os_005 = Environment variable PGI_HUGE_PAGES set to 896 notes_os_010 = 'ulimit -s unlimited' was used to set environment stack size notes_os_015 = 'ulimit -l 2097152' was used to set environment locked pages in memory limit # Uncomment according to your machine configuration # 2P configuration notes_os_020 = Set vm/nr_hugepages=1200 in /etc/sysctl.conf #notes_os_020 = Set vm/nr_hugepages=1792 in /etc/sysctl.conf notes_os_025 = mount -t hugetlbfs nodev /mnt/hugepages #notes_os_030 = powersave -f was used to set the CPU frequency to its maximum. # ---- End inclusion of '/root/work/cpu2006-amd421gh/config/AMD421sut.inc' ############################ Software Info ############################ fp=default=default=default: sw_peak_ptrsize = 32/64-bit sw_base_ptrsize = 64-bit sw_auto_parallel = No sw_avail = Jun-2008 sw_compiler000 = PGI Server Complete Version 7.2 sw_compiler001 = PathScale Compiler Suite Version 3.2 sw_other = None int=default=default=default: sw_peak_ptrsize = 32/64-bit sw_base_ptrsize = 32/64-bit sw_auto_parallel = No sw_avail = Jun-2008 sw_compiler1 = PGI Server Complete Version 7.2 sw_compiler2 = PathScale Compiler Suite Version 3.2 sw_other = SmartHeap 8.0 32-bit Library for Linux default=default=default=default: ##################################################################### # Compiler selection # Make sure your path is set to use the PGI 7.2 compiler as well # as the Pathscale 3.2 compiler, which is used in peak # example: CC = pathcc # default compiler is PGI CC = pgcc CXX = pgcpp FC = pgf95 SMARTHEAP_DIR=/root/work/cpu2006-amd421gh/amd421gh.libs/32 ##################################################################### # Optimization ##################################################################### default=base=default=default: EXTRA_LDFLAGS = -Bstatic_pgi ##################################################################### # BASE FLAGS ##################################################################### fp=base=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge:%{pgi_hp} -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} CXXOPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} int=base=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} CXXOPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp32} ##################################################################### ## INT Peak Flags ##################################################################### int=peak=default=default: EXTRA_LIBS = feedback = 0 basepeak = 0 #PATHSCALE compile 400.perlbench=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -Ofast -IPA:plimit=20000 -LNO:opt=0 -WOPT:if_conv=0 -CG:local_sched_alg=1 PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PGI compile C 401.bzip2=peak=default=default: CC = pgcc EXTRA_LDFLAGS = -Bstatic_pgi COPTIMIZE = -fastsse -O4 -Msmartalloc=huge:%{pgi_hp} -Mprefetch=t0 -Mnounroll %{pgi_tp64} PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CFLAGS = -Mpfo=indirect PASS2_LDFLAGS = -Mpfo=indirect feedback = 1 #PATHSCALE compile C 403.gcc=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -O3 -OPT:Ofast -m32 PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PGI compile C 429.mcf=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mipa=jobs:%{build_jobs},fast,inline:1 %{pgi_tp32} feedback = 0 basepeak = 0 #PATHSCALE compile C 445.gobmk=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -O3 -OPT:alias=restrict -LNO:prefetch=1:ignore_feedback=off -CG:p2align=on PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PGI compile C 456.hmmer=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc COPTIMIZE = -fastsse -Mvect=partial -Munroll=n:8 -Msmartalloc=huge:%{pgi_hp} -Msafeptr -Mprefetch=t0 -Mfprelaxed -Mipa=jobs:%{build_jobs},const,ptr,arg,inline %{pgi_tp64} feedback = 0 basepeak = 0 #PGI compiler C 458.sjeng=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:1,noarg PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:1,noarg feedback = 1 basepeak = 0 #PGI compiler C 462.libquantum=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc COPTIMIZE = -fastsse -Munroll=m:8 -Msmartalloc=huge:%{pgi_hp} -Mprefetch=distance:4 -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline,noarg %{pgi_tp64} feedback = 0 basepeak = 0 #PATHSCALE compile C 464.h264ref=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -O3 -IPA:plimit=20000 -OPT:alias=disjoint -LNO:prefetch=0 -CG:ptr_load_use=0:push_pop_int_saved_regs=off PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PATHSCALE compile C++ 471.omnetpp=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -Ofast -CG:gcm=off -INLINE:aggressive=on -OPT:alias=disjoint -WOPT:if_conv=0 -m32 EXTRA_CXXLIBS = -L$(SMARTHEAP_DIR) -lsmartheap feedback = 0 #PGI compiler C++ 473.astar=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CXX = pgcpp CXXOPTIMIZE = -fastsse -O4 -Msmartalloc=huge:%{pgi_hp} -Msafeptr=global -Mfprelaxed --zc_eh %{pgi_tp32} PASS1_CXXFLAGS= -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CXXFLAGS= -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:6 PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:6 feedback = 1 basepeak = 0 #PATHSCALE compile 483.xalancbmk=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE=-Ofast -OPT:unroll_times_max=8 -CG:push_pop_int_saved_regs=off:ptr_load_use=0 -m32 EXTRA_CXXLIBS = -L$(SMARTHEAP_DIR) -lsmartheap feedback = 0 ##################################################################### ## FP Peak Flags ##################################################################### #PGI compile F77 410.bwaves=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 FOPTIMIZE = -fastsse -Msmartalloc -Mprefetch=distance:12,nta -Mpre -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_FFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 #PATHSCALE compile Fortran 416.gamess=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -O2 -OPT:Ofast:ro=3:unroll_size=256 PASS1_FFLAGS = -fb_create fbdata PASS2_FFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PGI compile C 433.milc=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Msafeptr -Mfprelaxed -Mipa=jobs:%{build_jobs},inline,arg,const,ptr,shape %{pgi_tp64} feedback = 0 basepeak = 0 #PGI compile Fortran 434.zeusmp=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 basepeak=1 #PGI compile C/F 435.gromacs=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 CC = pgcc FOPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mfprelaxed -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 0 #PATHSCALE compile C/F 436.cactusADM=peak=default=default: FC = pathf95 -march=barcelona CC = pathcc -march=barcelona FOPTIMIZE = -Ofast -LNO:blocking=off COPTIMIZE = -Ofast -LNO:blocking=off PASS1_FFLAGS = -fb_create fbdata PASS2_FFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata PASS1_CFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata feedback = 1 #PGI compile F90 437.leslie3d=peak=default=default: FC = pgf95 EXTRA_LDFLAGS = -Bstatic_pgi FOPTIMIZE = -fastsse -Mvect=fuse -Msmartalloc=huge:%{pgi_hp} -Mprefetch=distance:8,t0 -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_FFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 #PGI compile C++ 444.namd=peak=default=default: CXX = pgcpp EXTRA_LDFLAGS = -Bstatic_pgi CXXOPTIMIZE = -fastsse -Munroll=n:4,m:8 -Msmartalloc=huge:%{pgi_hp} -Mnodepchk -Mfprelaxed --zc_eh %{pgi_tp64} PASS1_CXXFLAGS= -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CXXFLAGS= -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 #PATHSCALE compile C++ # lm flag depends on the level of gcc on the system define it in the macro section 447.dealII=peak=default=default: CXX = pathCC -march=barcelona %ifdef %{gnu3_fe} CXXOPTIMIZE = -Ofast -INLINE:aggressive=on -LNO:opt=0 -OPT:alias=disjoint -fno-exceptions -m32 feedback = 0 %else CXXOPTIMIZE = -Ofast -static -INLINE:aggressive=on -fno-exceptions -m32 LDCXXFLAGS = -lm feedback = 0 %endif #PATHSCALE compiler C++ 450.soplex=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -O3 -TENV:frame_pointer=off -LNO:prefetch=1 -OPT:malloc_alg=1 -CG:load_exe=0 -m32 PASS1_CXXFLAGS = -fb_create fbdata PASS2_CXXFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PATHSCALE compiler C++ 453.povray=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -Ofast PASS1_CXXFLAGS = -fb_create fbdata PASS2_CXXFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 #PGI Compile F/C 454.calculix=peak=default=default: CC = pgcc FC = pgf95 EXTRA_LDFLAGS = -Bstatic_pgi FOPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mprefetch=t0 -Mpre -Mfprelaxed %{pgi_tp64} COPTIMIZE = -fastsse -Msmartalloc=huge:%{pgi_hp} -Mprefetch=t0 -Mpre -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi=indirect PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_FFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_CFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 #PATHSCALE compile F90 459.GemsFDTD=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -Ofast -LNO:fission=2:simd=2:prefetch_ahead=1 -CG:load_exe=0 feedback = 0 #PATHSCALE compile F95 465.tonto=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -Ofast -OPT:alias=no_f90_pointer_alias -LNO:blocking=off -CG:load_exe=1 -IPA:plimit=525 feedback = 0 #PATHSCALE compile C 470.lbm=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -Ofast -CG:sse_cse_regs=0 -CG:locs_shallow_depth=1 -m3dnow feedback = 0 #PGI compile F/C 481.wrf=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc FC = pgf95 FOPTIMIZE = -fastsse -Mvect=noaltcode -Msmartalloc -Mprefetch=distance:8 -Mfprelaxed %{pgi_tp64} COPTIMIZE = -fastsse -Mvect=noaltcode -Msmartalloc -Mprefetch=distance:8 -Mfprelaxed %{pgi_tp64} feedback = 0 basepeak = 0 #PGI compile C 482.sphinx3=peak=default=default: CC = pgcc EXTRA_LDFLAGS = -Bstatic_pgi COPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc %{pgi_tp64} PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 ##################################################################### # Portability ##################################################################### #Set for all base and peak bmks unless changed below per bmk fp=default=default=default: PORTABILITY = -DSPEC_CPU_LP64 int=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 #################################################################### # INT Portability ################################################################### 400.perlbench=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX_X64 403.gcc=peak=default=default: CPORTABILITY = 429.mcf=peak=default=default: CPORTABILITY = 462.libquantum=default=default=default: CPORTABILITY= -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX 471.omnetpp=peak=default=default: # PATHSCALE peak - Needed to avoid -DSPEC_CPU_LP64 on -m32 C++ codes PORTABILITY = 483.xalancbmk=default=default=default: CXXPORTABILITY= -DSPEC_CPU_LINUX 483.xalancbmk=peak=default=default: #PATHSCALE peak - Needed to avoid -DSPEC_CPU_LP64 on -m32 C++ codes PORTABILITY = ##################################################################### # FP Portability ##################################################################### 435.gromacs=default=default=default: LDPORTABILITY = -Mnomain 436.cactusADM=base=default=default: LDPORTABILITY = -Mnomain 436.cactusADM=peak=default=default: #PATHSCALE peak port FPORTABILITY = -fno-second-underscore 447.dealII=peak=default=default: # PATHSCALE peak port - Needed to avoid -DSPEC_CPU_LP64 PORTABILITY = %ifdef %{gnu3_fe} 447.dealII=default=default: CXXPORTABILITY = -DSPEC_CPU_TABLE_WORKAROUND %else 447.dealII=default=default: CXXPORTABILITY = %endif 450.soplex=peak=default=default: # PATHSCALE peak port - Needed to avoid -DSPEC_CPU_LP64 PORTABILITY = 454.calculix=default=default=default: LDPORTABILITY = -Mnomain 481.wrf=default=default=default: CPORTABILITY = -DSPEC_CPU_CASE_FLAG -DSPEC_CPU_LINUX # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000= http://www.spec.org/cpu2006/flags/amd421GH-flags.20090713.00.xml