# Invocation command line: # /root/work/cpu2006v1.1/bin/runspec -c linux64-pgi724-shang-speed.cfg --define NCPUS=16 -T all fp --nopreenv --note-preenv # output_root was not used for this run ############################################################################ # PGI 7.2 CPU2006 Speed (Auto-par) Configuration File for 64-bit Linux (Quad-Core) # # CPU2006 Version 1.1 # Compiler name/version: PGI 7.2 # Configuration Revision: 2 # Operating system version: 64-bit Linux # Hardware: AMD Opteron (Quad-core) # FP Base Pointer Size: 64-bit only # FP Peak Pointer Size: 64-bit only # INT Base Pointer Size: 32/64-bit # INT Peak Pointer Size: 32/64-bit # Auto Parallization: Yes # # Example runspec commands: # runspec -c linux64-pgi724-shang-speed --define SPEC_DIR=$SPEC --define NCPUS=`pgi72/bin/getNCPUS.out` fp # runspec -c linux64-pgi724-shang-speed --define SPEC_DIR=$SPEC --define NCPUS=`pgi72/bin/getNCPUS.out` int # # Important! Please run with your stack size set to 'unlimited'. # Failure to do so may cause 483.xalancbmk to get a stack overflow during execution. # Using csh: unlimit # Using bash: ulimit -s unlimited # # Your LD_LIBRARY_PATH must be set to the location of the Smartheap runtime libraries. # The library should be located in $SPEC/pgi72/linux_lib32. The LD_LIBRARY_PATH will # be set automaticlly if you use the runspec flag '--define SPEC_DIR=$SPEC'. # # You can define the base output root directory from the command line using '--define OROOT_BASE=/dir'. # The full output_root will then be set to '${OROOT_BASE}/${username}/CPU2006' # # If building new binaries, please adjust the SHL_DIR variable to the directory containing # the Smartheap library. # # Huge Pages: # This configuratation does not set the huge page maximum. # Note that you can override the huge page setting at runtime using the environment variable # PGI_HUGE_PAGES. Also, the binaries will run systems not configured for huge pages # but will revert to using the default page size. # # To configure huge pages on your system, each time you reboot do the following # as root. # # echo "7168" >/proc/sys/vm/nr_hugepages # test -d /mnt/hugetlb || mkdir /mnt/hugetlb # mount -t hugetlbfs none /mnt/hugetlb # chmod 777 /mnt/hugetlb # ulimit -l 2097152 # su # start your CPU2006 run. # # Or you can make these settings permanent by adding the following # in /etc/rc.d/boot.local add: # # hugepages # test -d /mnt/hugetlb || mkdir /mnt/hugetlb # mount -t hugetlbfs none /mnt/hugetlb # chmod 777 /mnt/hugetlb # # in /etc/sysctl set: # vm.nr_hugepages = 7168 # # in /etc/security/limits.conf set the set the maximum block size that # can be set in memory: # * hard memlock 2097152 # * soft memlock 2097152 # * - stack unlimited # # Note that this configuration uses the PGI flag "-Mloop32". You must have # binutils 2.18 in order to use this flag since older assemblers contain # a bug which may cause performance issues when this flag is used. If you # do not have binutils 2.18 available please set the macro "pgi_loop32" # to blank. Note that binutils 2.18 can be found at http://ftp.gnu.org/gnu/binutils/ # ##################################################################### # Macro section #################################################################### # Modify this section to use the appropriate architecture flags %define pgi_tp64 -tp barcelona-64 %define pgi_tp32 -tp barcelona-32 %define pgi_sys_hp 14336 # Adjust the build jobs to the number of concurrent build processses %define build_jobs 8 # Set this to blank if you don't want to use -Mloop32 %define pgi_loop32 -Mloop32 ##################################################################### # Header Section ##################################################################### ext = pgi724_shang_speed ignore_errors = no tune = base,peak output_format = asc,pdf,raw,flags,cfg,html,csv size = test,train,ref check_md5 = yes reportable = yes env_vars = no teeout = yes mean_anyway = yes flagsurl000 = http://www.spec.org/cpu2006/flags/pgi72_linux_flags.20090713.xml %ifdef %{OROOT_DIR} output_root = %{OROOT_DIR}/${username}/CPU2006 allow_extension_override = yes %endif makeflags = -j %{build_jobs} #################################################################### # Include file containing the SUT hardware inforamtion # as well as the submit command, tester information and notes #################################################################### #include: pgi_sut_speed.cfg # ----- Begin inclusion of 'pgi_sut_speed.cfg' ############################################################################ # Set NCPUS environment variable for PGI autopar binaries %ifdef %{NCPUS} preENV_NCPUS=%{NCPUS} %endif # Set location of run time libraries preENV_LD_LIBRARY_PATH = $[top]/pgi72/linux_lib64:$[top]/pgi72/linux_lib32 # Run the sync command after setup post_setup=sync ##################################################################### # Notes ##################################################################### default=default=default=default: notes_submit_000 = 'numactl' was used to bind copies to the cores. notes_os_000 = Environment stack size set to 'unlimited'. notes_os_005 = The powersaved was disabled, set the CPU frequency to its maximum. notes_os_010 = Total number of huge pages available is 14336. notes_os_015 = 'ulimit -l 2097152' was used to set environment locked pages in memory quantity. notes_os_020 = Set vm/nr_hugepages=14336 in /etc/sysctl.conf notes_os_025 = mount -t hugetlbfs nodev /mnt/hugepages ##################################################################### # Submit Section ##################################################################### # Below is an example numactl command for a speed run using # 4 threads on a single Quad-core chip. # Please adjust as needed for your system # # Note that some peak rules (those run serially) may include a # modified submit command. use_submit_for_speed = 1 submit = echo "$command" > run.sh ; numactl --physcpubind=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 bash run.sh #################################################################### # Tester information #################################################################### license_num = 11 prepared_by = IBM Corporation tester = Advanced Micro Devices test_sponsor = IBM Corporation hw_vendor = IBM Corporation hw_model = IBM System x3755 (AMD Opteron 8378) #################################################################### # Hardware information #################################################################### default=default=default=default: hw_avail = Mar-2009 hw_cpu_name = AMD Opteron 8378 hw_cpu_mhz = 2400 hw_fpu = Integrated hw_nchips = 4 hw_ncores = 16 hw_ncoresperchip = 4 hw_nthreadspercore = 1 hw_ncpuorder = 1,2,3,4 chips hw_pcache = 64 KB I + 64 KB D on chip per core hw_scache = 512 KB I+D on chip per core hw_tcache = 6 MB I+D on chip per chip hw_ocache = None hw_disk = 1 x 73.4 GB SAS, 15000 RPM hw_memory = 64 GB (16 x 4 GB, DDR2-667 CL5 Reg Dual Rank) hw_other = None sw_file = ReiserFS sw_os000 = SuSE Linux Enterprise Server 10 (x86_64) SP1, sw_os001 = Kernel 2.6.16.46-0.12-smp sw_state = Run level 3 (Full multiuser with network) # ---- End inclusion of '/root/work/cpu2006v1.1/config/pgi_sut_speed.cfg' ############################ Software Info ############################ fp=default=default=default: sw_peak_ptrsize = 64-bit sw_base_ptrsize = 32/64-bit sw_avail = May-2008 sw_compiler = PGI Server Complete Version 7.2 sw_other = binutils 2.18.50 int=default=default=default: sw_peak_ptrsize = 32/64-bit sw_base_ptrsize = 32/64-bit sw_other000 = SmartHeap 8.1 32-bit Library for Linux sw_other001 = binutils 2.18.50 sw_avail = May-2008 sw_compiler1 = PGI Server Complete Version 7.2 default=default=default=default: ##################################################################### # Compiler selection # # CC = pgcc CXX = pgcpp FC = pgf95 SHL_DIR = /proj/qa/smartheap/SmartHeap_8.1/lib ##################################################################### # Optimization ##################################################################### default=default=default=default: EXTRA_LDFLAGS = -Bstatic_pgi OPTIMIZE = -Mvect=cachesize:6291456 ##################################################################### # BASE FLAGS ##################################################################### fp=base=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Mconcur -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge -Mconcur -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} CXXOPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed -Mconcur --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} int=base=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge %{pgi_loop32} -Mconcur=innermost -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} CXXOPTIMIZE = -fastsse -Msmartalloc=huge %{pgi_loop32} -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline:10 %{pgi_tp32} ##################################################################### # PEAK FLAGS ##################################################################### 400.perlbench=peak=default=default: COPTIMIZE = -fastsse -O4 -Msmartalloc=huge -Mnovect -Mnounroll -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},inline feedback = 1 basepeak = 0 submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh 401.bzip2=peak=default=default: COPTIMIZE = -fastsse -O4 -Msmartalloc=huge -Mprefetch=t0 -Mnounroll %{pgi_tp64} PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CFLAGS = -Mpfo=indirect PASS2_LDFLAGS = -Mpfo=indirect submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 403.gcc=peak=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Mprefetch=t0 -Mnodalign %{pgi_loop32} -Mfprelaxed %{pgi_tp32} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 429.mcf=peak=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Mipa=jobs:%{build_jobs},fast,inline:1 %{pgi_tp32} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 0 445.gobmk=peak=default=default: COPTIMIZE = -fastsse -O4 -Msmartalloc=huge -Mnovect -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 456.hmmer=peak=default=default: COPTIMIZE = -fastsse -Mvect=partial -Munroll=n:8 -Msmartalloc=huge -Msafeptr -Mprefetch=t0 -Mfprelaxed -Mipa=jobs:%{build_jobs},const,ptr,arg,inline %{pgi_tp64} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 0 458.sjeng=peak=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:1,noarg PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:1,noarg submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 462.libquantum=peak=default=default: COPTIMIZE = -fastsse -Munroll=m:8 -Msmartalloc=huge -Mprefetch=distance:8 -Mconcur=innermost,noaltcode -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,noarg %{pgi_tp64} feedback = 0 basepeak = 0 464.h264ref=peak=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 471.omnetpp=peak=default=default: #CXXOPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp32} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 1 473.astar=peak=default=default: CXXOPTIMIZE = -fastsse -O4 -Msmartalloc=huge -Msafeptr=global %{pgi_loop32} -Mfprelaxed --zc_eh %{pgi_tp32} PASS1_CXXFLAGS= -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CXXFLAGS= -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:6 PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline:6 submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 483.xalancbmk=peak=default=default: EXTRA_CXXLIBS = -L/proj/qa/smartheap/SmartHeap_8.1/lib -lsmartheap CXXOPTIMIZE = --zc_eh -fastsse -O4 -Mfprelaxed -Msmartalloc -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp32} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 0 410.bwaves=peak=default=default: feedback = 0 basepeak = 1 416.gamess=peak=default=default: FOPTIMIZE = -fastsse -Msmartalloc=huge -Mvect=noaltcode -Mprefetch=t0 -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_FFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 433.milc=peak=default=default: COPTIMIZE = -fastsse -Msmartalloc=huge -Msafeptr -Mconcur -Mfprelaxed -Mipa=jobs:%{build_jobs},inline,arg,const,ptr,shape %{pgi_tp64} feedback = 0 basepeak = 0 434.zeusmp=peak=default=default: FOPTIMIZE = -fastsse -Mfprelaxed -Mconcur -Mprefetch=distance:8,t0 -Msmartalloc=huge,hugebss -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 0 435.gromacs=peak=default=default: FOPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed -Mconcur -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} COPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed -Mconcur -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 0 436.cactusADM=peak=default=default: FOPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed -Mconcur -Mdse -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} COPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed -Mconcur -Mdse -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 0 437.leslie3d=peak=default=default: FOPTIMIZE = -fastsse -Mvect=fuse -Msmartalloc=huge -Mprefetch=distance:8,t0 -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_FFLAGS = -Mpfo=indirect -Mconcur=noaltcode -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mconcur=noaltcode -Mipa=jobs:%{build_jobs},fast,inline feedback = 1 basepeak = 0 444.namd=peak=default=default: CXXOPTIMIZE = -fastsse -Munroll=n:4,m:8 -Msmartalloc=huge -Mnodepchk -Mfprelaxed --zc_eh %{pgi_tp64} PASS1_CXXFLAGS= -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CXXFLAGS= -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 447.dealII=peak=default=default: CXXOPTIMIZE = -fastsse -alias=ansi -Msmartalloc=huge -Mprefetch=t0 -Mnovect -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp32} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 0 450.soplex=peak=default=default: #CXXOPTIMIZE = -fastsse -Msmartalloc=huge -Mfprelaxed --zc_eh -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 1 453.povray=peak=default=default: CXXOPTIMIZE = -fastsse -Msmartalloc=huge -Mprefetch=t0 -Mfprelaxed %{pgi_tp64} PASS1_CXXFLAGS= -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CXXFLAGS= -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inlinenopfo:3,staticfunc PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inlinenopfo:3,staticfunc submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 454.calculix=peak=default=default: FOPTIMIZE = -fastsse -Msmartalloc=huge %{pgi_loop32} -Mprefetch=t0 -Mpre -Mfprelaxed %{pgi_tp64} COPTIMIZE = -fastsse -Msmartalloc=huge %{pgi_loop32} -Mprefetch=t0 -Mpre -Mfprelaxed %{pgi_tp64} PASS1_FFLAGS = -Mpfi=indirect PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_FFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_CFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo=indirect -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 459.GemsFDTD=peak=default=default: #FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} feedback = 0 basepeak = 1 465.tonto=peak=default=default: FOPTIMIZE = -fastsse -O4 -Mvect=noaltcode -Msmartalloc=huge -Mprefetch=distance:8,t0 -Mfprelaxed -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 0 basepeak = 0 470.lbm=peak=default=default: feedback = 0 basepeak = 1 481.wrf=peak=default=default: FOPTIMIZE = -fastsse -Mvect=noaltcode -Msmartalloc=huge -Mprefetch=distance:8 -Mconcur=noaltcode -Mfprelaxed %{pgi_tp64} COPTIMIZE = -fastsse -Mvect=noaltcode -Msmartalloc=huge -Mprefetch=distance:8 -Mconcur=noaltcode -Mfprelaxed %{pgi_tp64} feedback = 0 basepeak = 0 482.sphinx3=peak=default=default: COPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc %{pgi_tp64} PASS1_FFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_FFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline PASS2_LDFLAGS = -Mpfo -Mipa=jobs:%{build_jobs},fast,inline submit = echo "$command" > run.sh ; numactl -m 0 --physcpubind=0 bash run.sh feedback = 1 basepeak = 0 ##################################################################### # Portability ##################################################################### fp=default=default=default: PORTABILITY = -DSPEC_CPU_LP64 int=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 400.perlbench=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX_X64 403.gcc=peak=default=default: CPORTABILITY = 429.mcf=peak=default=default: CPORTABILITY = 435.gromacs=default=default=default: LDPORTABILITY = -Mnomain 436.cactusADM=default=default=default: LDPORTABILITY = -Mnomain 447.dealII=peak=default=default: PORTABILITY= 454.calculix=default=default=default: LDPORTABILITY = -Mnomain 462.libquantum=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX 481.wrf=default=default=default: CPORTABILITY = -DSPEC_CPU_CASE_FLAG -DSPEC_CPU_LINUX 483.xalancbmk=default=default=default: CXXPORTABILITY = -DSPEC_CPU_LINUX # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_000 =Environment variables set by runspec before the start of the run: notes_005 =LD_LIBRARY_PATH = "/root/work/cpu2006v1.1/pgi72/linux_lib64:/root/work/cpu2006v1.1/pgi72/linux_lib32" notes_010 =NCPUS = "16" notes_015 =