# Invocation command line: # /home/cpu2017-1.0.5/bin/harness/runcpu --configfile amd1812na_rate_revA1.cfg --tune all --nopower --runmode rate --tune base:peak --size test:train:refrate fprate # output_root was not used for this run ############################################################################ ################################################################################ # AMD1812 SPEC CPU2017 V1.0.5 Rate Configuration File for 64-bit Linux # # File name : amd1812na_rate_revA1.cfg # Creation Date : December 14, 2018 # CPU2017 Version : 1.0.5 # Supported benchmarks : All Rate benchmarks (intrate, fprate) # Compiler name/version : AOCC v1.3.0 # Operating system version : RHEL 7.4 # Supported OS's : Ubuntu 18.04, RHEL 7.4/7.5/7.6, SLES 12 SP3/15 # Hardware : AMD Naples (AMD64) # FP Base Pointer Size : 64-bit # FP Peak Pointer Size : 32/64-bit # INT Base Pointer Size : 64-bit # INT Peak Pointer Size : 32/64-bit # Auto Parallization : No # # Note: DO NOT EDIT THIS FILE, the only edits required to properly run these # binaries are made in the ini Python file. Please consult Readme.amd1812na_revA1.txt # for a few uncommon exceptions which require edits to this file. # # Description:# # This binary package automates away many of the complexities necessary to set # up and run SPEC CPU2017 under optimized conditions on AMD Naples-based # server platforms within Linux (AMD64). # # The binary package was built specifically for AMD Naples microprocessors and # is not intended to run on other products. # # Please install the binary package by following the instructions in # "Readme.amd1812na_revA1.txt" under the "How To Use the Binaries" section. # # The binary package is designed to work without alteration on two socket AMD # Naples-based servers with 32 cores per socket, SMT enabled and 512 GiB of DDR4 # memory distributed evenly among all 16 channels using 16 GiB DIMMs. # # To run the binary package on other Naples configurations, please review # "Readme.amd1812na_revA1.txt" for instructions on how to easily modify # the CPU2017 configuration include file "amd1812na_rate_revA1.inc". Typically, # the number of sockets, number of cores per socket, SMT state and memory # size values will need to be changed in amd1812na-rate-revX.inc. Furthermore, # modify necessary system specific and test specific documentation within this # file. Again, refer to Readme.amd1812na-rate-revX.txt for more information. # # In most cases, it should be unnecessary to edit "amd1812na_rate_revA1.cfg" or any # other file besides "ini_amd1812na_rate_revA1.py". # # The config file automatically sets the number of rate copies equal to the # number of logical processors and automatically binds each copy to a unique # logical core. # # The run script and accompanying binary package are designed to work on Ubuntu # 18.04/16.04, RHEL 7.4/7.5 and SLES 12 SP3. # # Important! If you write your own run script, please set the stack size to # "unlimited" when executing this binary package. Failure to do so may cause # some benchmarks to overflow the stack. For example, to set stack size within # the bash shell, include the following line somewhere at the top of your run # script before the runcpu invocation: # # ulimit -s unlimited # # Modification of this config file should only be necessary if you intend to # rebuild the binaries. General instructions for rebuilding the binaries are # found in-line below. # ################################################################################ # Include file name ################################################################################ # The include file contains fields that are commonly changed. This file is auto- # generated based upon INI file settings and should not need user modification # for runs. %define inc_file_name amd1812na_rate_revA1.inc #include: %{inc_file_name} # ----- Begin inclusion of 'amd1812na_rate_revA1.inc' ############################################################################ ################################################################################ ################################################################################ # File name: amd1812na_rate_revA1.inc # File generation code date: December 14, 2018 # File generation date/time: March 04, 2019 / 16:44:47 # # This file is automatically generated during a SPEC CPU2017 run. # # To modify inc file generation, please consult the readme file or the run # script. ################################################################################ ################################################################################ ################################################################################ ################################################################################ # The following macros are generated for use in the cfg file. ################################################################################ ################################################################################ %define logical_core_count 128 %define physical_core_count 64 # The following macros define the rate copy counts for the peak benchmarks. # Int Rate Peak copy counts: %define copies_500perlbench 128 %define copies_502gcc 128 %define copies_505mcf 128 %define copies_520omnetpp 128 %define copies_523xalancbmk 128 %define copies_525x264 128 %define copies_531deepsjeng 128 %define copies_541leela 128 %define copies_548exchange 128 %define copies_557xz 128 # FP Rate Peak copy counts: %define copies_503bwaves 64 %define copies_507cactus 128 %define copies_508namd 128 %define copies_510parest 64 %define copies_511povray 128 %define copies_519lbm 128 %define copies_521wrf 128 %define copies_526blender 128 %define copies_527cam4 128 %define copies_538imagick 128 %define copies_544nab 128 %define copies_549fotonik 64 %define copies_554roms 64 # Base copy counts: %define copies_base_int 128 %define copies_base_fp 128 ################################################################################ ################################################################################ ################################################################################ ################################################################################ # Bind commands for assigning affinity ################################################################################ ################################################################################ bind0 = numactl --localalloc --physcpubind=0 bind1 = numactl --localalloc --physcpubind=1 bind2 = numactl --localalloc --physcpubind=2 bind3 = numactl --localalloc --physcpubind=3 bind4 = numactl --localalloc --physcpubind=4 bind5 = numactl --localalloc --physcpubind=5 bind6 = numactl --localalloc --physcpubind=6 bind7 = numactl --localalloc --physcpubind=7 bind8 = numactl --localalloc --physcpubind=8 bind9 = numactl --localalloc --physcpubind=9 bind10 = numactl --localalloc --physcpubind=10 bind11 = numactl --localalloc --physcpubind=11 bind12 = numactl --localalloc --physcpubind=12 bind13 = numactl --localalloc --physcpubind=13 bind14 = numactl --localalloc --physcpubind=14 bind15 = numactl --localalloc --physcpubind=15 bind16 = numactl --localalloc --physcpubind=16 bind17 = numactl --localalloc --physcpubind=17 bind18 = numactl --localalloc --physcpubind=18 bind19 = numactl --localalloc --physcpubind=19 bind20 = numactl --localalloc --physcpubind=20 bind21 = numactl --localalloc --physcpubind=21 bind22 = numactl --localalloc --physcpubind=22 bind23 = numactl --localalloc --physcpubind=23 bind24 = numactl --localalloc --physcpubind=24 bind25 = numactl --localalloc --physcpubind=25 bind26 = numactl --localalloc --physcpubind=26 bind27 = numactl --localalloc --physcpubind=27 bind28 = numactl --localalloc --physcpubind=28 bind29 = numactl --localalloc --physcpubind=29 bind30 = numactl --localalloc --physcpubind=30 bind31 = numactl --localalloc --physcpubind=31 bind32 = numactl --localalloc --physcpubind=32 bind33 = numactl --localalloc --physcpubind=33 bind34 = numactl --localalloc --physcpubind=34 bind35 = numactl --localalloc --physcpubind=35 bind36 = numactl --localalloc --physcpubind=36 bind37 = numactl --localalloc --physcpubind=37 bind38 = numactl --localalloc --physcpubind=38 bind39 = numactl --localalloc --physcpubind=39 bind40 = numactl --localalloc --physcpubind=40 bind41 = numactl --localalloc --physcpubind=41 bind42 = numactl --localalloc --physcpubind=42 bind43 = numactl --localalloc --physcpubind=43 bind44 = numactl --localalloc --physcpubind=44 bind45 = numactl --localalloc --physcpubind=45 bind46 = numactl --localalloc --physcpubind=46 bind47 = numactl --localalloc --physcpubind=47 bind48 = numactl --localalloc --physcpubind=48 bind49 = numactl --localalloc --physcpubind=49 bind50 = numactl --localalloc --physcpubind=50 bind51 = numactl --localalloc --physcpubind=51 bind52 = numactl --localalloc --physcpubind=52 bind53 = numactl --localalloc --physcpubind=53 bind54 = numactl --localalloc --physcpubind=54 bind55 = numactl --localalloc --physcpubind=55 bind56 = numactl --localalloc --physcpubind=56 bind57 = numactl --localalloc --physcpubind=57 bind58 = numactl --localalloc --physcpubind=58 bind59 = numactl --localalloc --physcpubind=59 bind60 = numactl --localalloc --physcpubind=60 bind61 = numactl --localalloc --physcpubind=61 bind62 = numactl --localalloc --physcpubind=62 bind63 = numactl --localalloc --physcpubind=63 bind64 = numactl --localalloc --physcpubind=64 bind65 = numactl --localalloc --physcpubind=65 bind66 = numactl --localalloc --physcpubind=66 bind67 = numactl --localalloc --physcpubind=67 bind68 = numactl --localalloc --physcpubind=68 bind69 = numactl --localalloc --physcpubind=69 bind70 = numactl --localalloc --physcpubind=70 bind71 = numactl --localalloc --physcpubind=71 bind72 = numactl --localalloc --physcpubind=72 bind73 = numactl --localalloc --physcpubind=73 bind74 = numactl --localalloc --physcpubind=74 bind75 = numactl --localalloc --physcpubind=75 bind76 = numactl --localalloc --physcpubind=76 bind77 = numactl --localalloc --physcpubind=77 bind78 = numactl --localalloc --physcpubind=78 bind79 = numactl --localalloc --physcpubind=79 bind80 = numactl --localalloc --physcpubind=80 bind81 = numactl --localalloc --physcpubind=81 bind82 = numactl --localalloc --physcpubind=82 bind83 = numactl --localalloc --physcpubind=83 bind84 = numactl --localalloc --physcpubind=84 bind85 = numactl --localalloc --physcpubind=85 bind86 = numactl --localalloc --physcpubind=86 bind87 = numactl --localalloc --physcpubind=87 bind88 = numactl --localalloc --physcpubind=88 bind89 = numactl --localalloc --physcpubind=89 bind90 = numactl --localalloc --physcpubind=90 bind91 = numactl --localalloc --physcpubind=91 bind92 = numactl --localalloc --physcpubind=92 bind93 = numactl --localalloc --physcpubind=93 bind94 = numactl --localalloc --physcpubind=94 bind95 = numactl --localalloc --physcpubind=95 bind96 = numactl --localalloc --physcpubind=96 bind97 = numactl --localalloc --physcpubind=97 bind98 = numactl --localalloc --physcpubind=98 bind99 = numactl --localalloc --physcpubind=99 bind100 = numactl --localalloc --physcpubind=100 bind101 = numactl --localalloc --physcpubind=101 bind102 = numactl --localalloc --physcpubind=102 bind103 = numactl --localalloc --physcpubind=103 bind104 = numactl --localalloc --physcpubind=104 bind105 = numactl --localalloc --physcpubind=105 bind106 = numactl --localalloc --physcpubind=106 bind107 = numactl --localalloc --physcpubind=107 bind108 = numactl --localalloc --physcpubind=108 bind109 = numactl --localalloc --physcpubind=109 bind110 = numactl --localalloc --physcpubind=110 bind111 = numactl --localalloc --physcpubind=111 bind112 = numactl --localalloc --physcpubind=112 bind113 = numactl --localalloc --physcpubind=113 bind114 = numactl --localalloc --physcpubind=114 bind115 = numactl --localalloc --physcpubind=115 bind116 = numactl --localalloc --physcpubind=116 bind117 = numactl --localalloc --physcpubind=117 bind118 = numactl --localalloc --physcpubind=118 bind119 = numactl --localalloc --physcpubind=119 bind120 = numactl --localalloc --physcpubind=120 bind121 = numactl --localalloc --physcpubind=121 bind122 = numactl --localalloc --physcpubind=122 bind123 = numactl --localalloc --physcpubind=123 bind124 = numactl --localalloc --physcpubind=124 bind125 = numactl --localalloc --physcpubind=125 bind126 = numactl --localalloc --physcpubind=126 bind127 = numactl --localalloc --physcpubind=127 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ # The remainder of this file defines CPU2017 report parameters. ################################################################################ ################################################################################ ################################################################################ # SPEC CPU 2017 report header ################################################################################ license_num =55 # (Your SPEC license number) tester =Dell Inc. test_sponsor =Dell Inc. hw_vendor =Dell Inc. hw_model000 =PowerEdge R7425 (AMD EPYC 7551, 2.00GHz) #--------- If you install new compilers, edit this section -------------------- sw_compiler000 =C/C++: Version 1.3.0 of AOCC sw_compiler001 =Fortran: Version 4.8.2 of GCC ################################################################################ ################################################################################ # Hardware, firmware and software information ################################################################################ hw_avail =Jan-2019 sw_avail =Feb-2019 hw_cpu_name =AMD EPYC 7551 hw_cpu_nominal_mhz =2000 hw_cpu_max_mhz =3000 hw_ncores =64 hw_nthreadspercore =2 hw_ncpuorder =1,2 chips hw_other =None # Other perf-relevant hw, or "None" fw_bios =Version 1.7.6 released Jan-2019 sw_base_ptrsize =64-bit hw_pcache =64 KB I + 32 KB D on chip per core hw_scache =512 KB I+D on chip per core hw_tcache =64 MB I+D on chip per chip, 8 MB shared / 4 cores hw_ocache =None ################################################################################ # Notes ################################################################################ notes_015 =Binaries were compiled on a system with 2 x AMD EPYC 7601 CPU + 512GB Memory using RHEL 7.6 notes_020 = notes_025 =NA: The test sponsor attests, as of date of publication, that CVE-2017-5754 (Meltdown) notes_030 =is mitigated in the system as tested and documented. notes_035 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5753 (Spectre variant 1) notes_040 =is mitigated in the system as tested and documented. notes_045 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5715 (Spectre variant 2) notes_050 =is mitigated in the system as tested and documented. notes_055 = notes_060 =jemalloc: configured and built with GCC v4.8.5 notes_065 =in RHEL v7.2 under default conditions. notes_070 =jemalloc: sources available from jemalloc.net or notes_075 =https://github.com/jemalloc/jemalloc/releases notes_080 =jemalloc uses environment variable MALLOC_CONF notes_085 =with values narenas and lg_chunk: notes_090 =narenas: sets the maximum number of arenas to use notes_095 =for automatic multiplexing notes_100 =of threads and arenas. notes_105 =lg_chunk: set the virtual memory chunk size (log notes_110 =base 2). For example, notes_115 =lg_chunk:21 sets the default chunk size to 2^21 = notes_120 =2MiB notes_submit_000 ='numactl' was used to bind copies to the cores. notes_submit_005 =See the configuration file for details. notes_os_000 ='ulimit -s unlimited' was used to set environment stack size notes_os_005 ='ulimit -l 2097152' was used to set environment locked pages in memory limit notes_os_010 = notes_os_015 =runspec command invoked through numactl i.e.: notes_os_020 =numactl --interleave=all runspec notes_os_025 = notes_os_030 =Set dirty_ratio=8 to limit dirty cache to 8% of memory notes_os_035 =Set swappiness=1 to swap only if necessary notes_os_040 =Set zone_reclaim_mode=1 to free local node memory and avoid remote memory notes_os_045 =sync then drop_caches=3 to reset caches before invoking runcpu notes_os_050 = notes_os_055 =dirty_ratio, swappiness, zone_reclaim_mode and drop_caches were notes_os_060 =all set using privileged echo (e.g. echo 1 > /proc/sys/vm/swappiness). notes_os_065 = notes_os_070 =Transparent huge pages were enabled for this run (OS default) notes_comp_000 =The AMD64 AOCC Compiler Suite is available at notes_comp_005 =http://developer.amd.com/amd-aocc/ notes_comp_010 = notes_comp_015 =The AOCC Gold Linker plugin was installed and used for the link stage. notes_comp_020 = notes_comp_025 =The AOCC Fortran Plugin version 1.3.0 was used to leverage AOCC optimizers notes_comp_030 =with gfortran. It is available here: notes_comp_035 =http://developer.amd.com/amd-aocc/ ################################################################################ # The following note fields describe platorm settings. ################################################################################ # example: (uncomment as necessary) notes_plat_000 =BIOS settings: # notes_plat_005 = cTDP = 200 notes_plat_005 = Determinism Slider set to Power Determinism #notes_plat_015 = Software Prefetcher disabled # notes_plat_015 = Fan Speed = Maximum ################################################################################ # The following are custom fields: ################################################################################ # Use custom_fields to enter lines that are not listed here. For example: # notes_plat_100 = Energy Bias set to Max Performance # new_field = Ambient temperature set to 10C ################################################################################ # The following fields must be set here for Int Rate. ################################################################################ intrate: notes_comp020 = notes_comp030 =jemalloc: configured and built with GCC v4.8.5 in RHEL v7.2 under default conditions. notes_comp040 =https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2 notes_comp040 = notes_comp050 =jemalloc uses environment variable MALLOC_CONF with values narenas and lg_chunk: notes_comp060 = narenas: sets the maximum number of arenas to use for automatic multiplexing of notes_comp070 = threads and arenas. notes_comp080 = lg_chunk: set the virtual memory chunk size (log base 2). For example, notes_comp090 = lg_chunk:21 sets the default chunk size to 2^21 = 2MiB. sw_other000 =jemalloc: jemalloc memory allocator library sw_other001 = V5.1.0; sw_peak_ptrsize =32/64-bit ################################################################################ # The following fields must be set here for FP Rate. ################################################################################ fprate: sw_other000 =jemalloc: jemalloc memory allocator library sw_other001 = V5.1.0 sw_peak_ptrsize =64-bit ################################################################################ # The following fields must be set here or they will be overwritten by sysinfo. ################################################################################ intrate,fprate: hw_disk =1 x 120 GB SATA SSD hw_memory000 =1 TB (16 x 64 GB 4Rx4 PC4-2666V-L) hw_memory001 = hw_memory002 = hw_nchips =2 prepared_by =Dell Inc. sw_file =ext4 sw_os000 =Ubuntu 18.04.2 LTS sw_os001 =kernel 4.15.0-45-generic sw_state =Run level 5 (multi-user) ################################################################################ # End of inc file ################################################################################ # Switch back to the default block after the include file: default: # ---- End inclusion of '/home/cpu2017-1.0.5/config/amd1812na_rate_revA1.inc' # Switch back to default block after the include file: default: ################################################################################ # Binary label extension and "allow_build"" switch ################################################################################ # Only modify the binary label extension if you plan to rebuild the binaries. %define ext amd1812na_rate_revA # If you plan to recompile these CPU2017 binaries, please choose a new extension # name (ext above) to avoid confusion with the current binary set on your system # under test, and to avoid confusion for SPEC submission reviewers. You will # also need to set "allow_build" to true below. Finally, you must modify the # Paths section below to point to your library locations if the paths are not # already set up in your build environment. # Change the following line to true if you intend to REBUILD the binaries (AMD # does not support this). Valid values are "true" or "false" (no quotes). %define allow_build false # ################################################################################ # Paths -- MODIFY AS NEEDED (modification should not be necessary for runs) ################################################################################ # Set location of runtime libraries for runs or builds. preenv = 1 %define lib_dir amd1812na_rate_revA_lib # The following path will have to be changed on the build system if different: JEMALLOC_LIB32_PATH = /root/work/lib/jemalloc510/lib32 %if '%{allow_build}' eq 'false' fail_build = 1 # Runtime libraries: preENV_LD_LIBRARY_PATH = $[top]/%{lib_dir}/64;$[top]/%{lib_dir}/32:%{ENV_LD_LIBRARY_PATH} %elif '%{allow_build}' eq 'true' # If you intend to rebuild, be sure to set the library paths either in the # build script or here: % define build_ncpus 16 # controls number of simultaneous compiles fail_build = 0 makeflags = --jobs=%{build_ncpus} --load-average=%{build_ncpus} %else % error The value of "allow_build" is %{allow_build}, but it can only be "true" or "false". This error was generated %endif # ------- # Enable automated data collection per benchmark # ------- # Data collection is not enabled for reportable runs. # teeout is necessary to get data collection stdout into the logs. Best # practices for the individual data collection items would be to have # them store important output in separate files. Filenames could be # constructed from $SPEC (environment), $lognum (result number from runcpu), # and benchmark name/number. teeout = yes # Run runcpu with '-v 35' (or greater) to log lists of variables which can # be used in substitutions as below. %define data-collection-start $[top]/data-collection/data-collection start benchname=$name benchnum=$num benchmark=$benchmark iteration=$iter size=$size label='$label' lognum='$lognum' %define data-collection-stop $[top]/data-collection/data-collection stop monitor_specrun_wrapper = %{data-collection-start} ; $command ; %{data-collection-stop} ################################################################################ # Header settings ################################################################################ backup_config = 0 # set to 0 if you do not want backup files bench_post_setup = sync # command_add_redirect: If set, the generated ${command} will include # redirection operators (stdout, stderr), which are passed along to the shell # that executes the command. If this variable is not set, specinvoke does the # redirection. NOTE: this value must be "yes" for the affinity generation # section of this config file. command_add_redirect = yes check_md5 = yes env_vars = no flagsurl000 = http://www.spec.org/cpu2017/flags/gcc.2018-02-16.xml flagsurl001 = http://www.spec.org/cpu2017/flags/aocc130-flags-revA2.xml #flagsurl03 = $[top]/amd1812-INVALID-platform-revA-I.xml ignore_errors = yes # label: Arbitrary string, tags your binaries & directories. label = %{ext} line_width = 1020 log_line_width = 1020 mean_anyway = yes output_format = all reportable = yes size = test,train,ref teeout = yes teerunout = yes tune = base,peak ################################################################################ # Compilers ################################################################################ default: CC = clang CXX = clang++ FC = gfortran CLD = clang CXXLD = clang++ FLD = clang CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version ################################################################################ # Portability Flags ################################################################################ default:# data model applies to all benchmarks EXTRA_PORTABILITY = -DSPEC_LP64 # Benchmark-specific portability (anything other than data model # is allowed only where need is proven 500.perlbench_r,600.perlbench_s: #lang='C' PORTABILITY = -DSPEC_LINUX_X64 521.wrf_r,621.wrf_s: #lang='F,C' CPORTABILITY = -DSPEC_CASE_FLAG FPORTABILITY = -fconvert=big-endian 523.xalancbmk_r,623.xalancbmk_s: #lang='CXX' PORTABILITY = -DSPEC_LINUX 526.blender_r: #lang='CXX,C' CPORTABILITY = -funsigned-char CXXPORTABILITY = -D__BOOL_DEFINED 527.cam4_r,627.cam4_s: #lang='F,C' PORTABILITY = -DSPEC_CASE_FLAG ################################################################################ # Tuning Flags ################################################################################ ##################### # Base tuning flags # ##################### default=base: COPTIMIZE = -O3 -flto -ffast-math -march=znver1 -mno-avx2 -fstruct-layout=3 \ -mllvm -unroll-threshold=50 -fremap-arrays \ -mllvm -inline-threshold=1000 \ -flv-function-specialization \ -mllvm -enable-gvn-hoist -mllvm -function-specialize CXXOPTIMIZE = -O3 -flto -march=znver1 \ -mllvm -unroll-threshold=100 -finline-aggressive \ -fremap-arrays -mllvm -inline-threshold=1000 \ -mllvm -enable-vectorize-compares=false FOPTIMIZE = -O3 -mavx -madx -funroll-loops -ffast-math EXTRA_FFLAGS = -fplugin=dragonegg.so \ -fplugin-arg-dragonegg-llvm-option="-merge-constant -enable-vectorize-compares:false" EXTRA_FLIBS = -lgfortran -lamdlibm -lm EXTRA_LDFLAGS = -flto -Wl,-plugin-opt=-merge-constant \ -Wl,-plugin-opt=-lsr-in-nested-loop \ -Wl,-plugin-opt=-enable-vectorize-compares=false EXTRA_LIBS = -lpthread -ldl -ljemalloc # The following is necessary for 502/602 gcc: LDOPTIMIZE = -z muldefs MATHLIBOPT = -lamdlibm -lm ######################## # intrate tuning flags # ######################## intrate: FOPTIMIZE = -O3 -mavx -madx -funroll-loops -ffast-math -frepack-arrays EXTRA_FFLAGS = -O3 -flto -madx -fplugin=dragonegg.so \ -specs=integrated-as.specs \ -fplugin-arg-dragonegg-llvm-option=-disable-indvar-simplify \ -fplugin-arg-dragonegg-llvm-option=-unroll-aggressive \ -fplugin-arg-dragonegg-llvm-option=-unroll-threshold:150 EXTRA_LDFLAGS = -flto \ -Wl,-mllvm -Wl,-function-specialize \ -Wl,-mllvm -Wl,-enable-vectorize-compares LDFFLAGS = -Wl,-mllvm -Wl,-inline-recursion=4 \ -Wl,-mllvm -Wl,-lsr-in-nested-loop \ -Wl,-mllvm -Wl,-enable-iv-split \ -Wl,-mllvm -Wl,-merge-constant \ -Wl,-mllvm -Wl,-unroll-aggressive \ -Wl,-mllvm -Wl,-unroll-threshold=150 copies = %{copies_base_int} ####################### # fprate tuning flags # ####################### fprate: sw_peak_ptrsize =64-bit CXX = clang++ -std=c++98 copies = %{copies_base_fp} ##################### # Peak tuning flags # ##################### default=peak: COPTIMIZE = -Ofast -flto -march=znver1 -fstruct-layout=3 \ -mllvm -vectorize-memory-aggressively -mno-avx2 \ -mllvm -unroll-threshold=100 -fremap-arrays \ -mllvm -inline-threshold=1000 CXXOPTIMIZE = -Ofast -flto -march=znver1 -finline-aggressive \ -mllvm -unroll-threshold=100 -fremap-arrays \ -mllvm -inline-threshold=1000 FOPTIMIZE = -O3 -mavx2 -madx -funroll-loops -ffast-math EXTRA_FFLAGS = -fplugin=dragonegg.so \ -fplugin-arg-dragonegg-llvm-option="-merge-constant -inline-threshold:1000" EXTRA_FLIBS = -lgfortran -lamdlibm -lm EXTRA_LDFLAGS = -flto -Wl,-plugin-opt=-merge-constant \ -Wl,-plugin-opt=-lsr-in-nested-loop EXTRA_LIBS = -lpthread -ldl -ljemalloc feedback = 0 PASS1_CFLAGS = -fprofile-instr-generate PASS2_CFLAGS = -fprofile-instr-use PASS1_FFLAGS = -fprofile-generate PASS2_FFLAGS = -fprofile-use PASS1_CXXFLAGS = -fprofile-instr-generate PASS2_CXXFLAGS = -fprofile-instr-use PASS1_LDFLAGS = -fprofile-instr-generate PASS2_LDFLAGS = -fprofile-instr-use fdo_run1 = $command ; llvm-profdata merge -output=default.profdata *.profraw 500.perlbench_r=peak: #lang='C' feedback = 1 copies = %{copies_500perlbench} 502.gcc_r=peak: #lang='C' EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 EXTRA_COPTIMIZE = -fgnu89-inline CC = clang -m32 CLD = clang -m32 EXTRA_LIBS = -lpthread -ldl -L$[JEMALLOC_LIB32_PATH] -ljemalloc MATHLIBOPT = -lm copies = %{copies_502gcc} 503.bwaves_r=peak: copies = %{copies_503bwaves} 505.mcf_r=peak: copies = %{copies_505mcf} 507.cactuBSSN_r=peak: copies = %{copies_507cactus} 508.namd_r=peak: copies = %{copies_508namd} 510.parest_r=peak: copies = %{copies_510parest} 511.povray_r=peak: copies = %{copies_511povray} 519.lbm_r=peak: copies = %{copies_519lbm} 520.omnetpp_r=peak: copies = %{copies_520omnetpp} 521.wrf_r,621.wrf_s=peak: #lang='F,C' COPTIMIZE = -O3 -mavx -ffast-math FOPTIMIZE = -O3 -mavx -funroll-loops -ffast-math copies = %{copies_521wrf} 523.xalancbmk_r=peak: #lang='CXX` EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 CXX = clang++ -m32 CXXLD = clang++ -m32 EXTRA_LIBS = -lpthread -ldl -L$[JEMALLOC_LIB32_PATH] -ljemalloc MATHLIBOPT = -lm copies = %{copies_523xalancbmk} 525.x264_r=peak: #lang='C' COPTIMIZE = -Ofast -flto -march=znver1 -mno-avx2 -fstruct-layout=5 \ -mllvm -vectorize-memory-aggressively \ -mllvm -unroll-threshold=50 -fremap-arrays \ -mllvm -inline-threshold=1000 -mllvm -enable-gvn-hoist \ -flv-function-specialization EXTRA_LDFLAGS = -flto \ -Wl,-mllvm -Wl,-function-specialize \ -Wl,-mllvm -Wl,-enable-vectorize-compares EXTRA_LIBS = -lamdlibm -ljemalloc -lpthread -ldl MATHLIBOPT = -lamdlibm -lm feedback = 1 PASS1_CFLAGS = -fprofile-instr-generate PASS2_CFLAGS = -fprofile-instr-use PASS1_LDFLAGS = -fprofile-instr-generate PASS2_LDFLAGS = -fprofile-instr-use fdo_run1 = $command ; llvm-profdata merge -output=default.profdata *.profraw copies = %{copies_525x264} 526.blender_r=peak: #lang='C' copies = %{copies_526blender} 527.cam4_r=peak: #lang='C' copies = %{copies_527cam4} 531.deepsjeng_r=peak: #lang='C' copies = %{copies_531deepsjeng} 538.imagick_r=peak: #lang='C' copies = %{copies_538imagick} 541.leela_r=peak: #lang="C++" CXXOPTIMIZE = -Ofast -flto -march=znver1 -mllvm -unroll-count=8 \ -mllvm -unroll-threshold=100 feedback = 1 copies = %{copies_541leela} 544.nab_r=peak: #lang='C' copies = %{copies_544nab} 548.exchange2_r=peak: #lang='Fortran' copies = %{copies_548exchange} 549.fotonik3d_r=peak: copies = %{copies_549fotonik} 554.roms_r=peak: copies = %{copies_554roms} 557.xz_r=peak: copies = %{copies_557xz} # The following settings were obtained by running the sysinfo_program # 'specperl $[top]/bin/sysinfo' (sysinfo:SHA:32259ebd59f3e93740723202f27c44c82ee68e0f2e40cd2ca50cfd5519772397) default: notes_plat_sysinfo_000 = Sysinfo program /home/cpu2017-1.0.5/bin/sysinfo notes_plat_sysinfo_005 = Rev: r5974 of 2018-05-19 9bcde8f2999c33d61f64985e45859ea9 notes_plat_sysinfo_010 = running on r7425 Tue Mar 5 04:15:38 2019 notes_plat_sysinfo_015 = notes_plat_sysinfo_020 = SUT (System Under Test) info as seen by some common utilities. notes_plat_sysinfo_025 = For more information on this section, see notes_plat_sysinfo_030 = https://www.spec.org/cpu2017/Docs/config.html#sysinfo notes_plat_sysinfo_035 = notes_plat_sysinfo_040 = From /proc/cpuinfo notes_plat_sysinfo_045 = model name : AMD EPYC 7551 32-Core Processor notes_plat_sysinfo_050 = 2 "physical id"s (chips) notes_plat_sysinfo_055 = 128 "processors" notes_plat_sysinfo_060 = cores, siblings (Caution: counting these is hw and system dependent. The following notes_plat_sysinfo_065 = excerpts from /proc/cpuinfo might not be reliable. Use with caution.) notes_plat_sysinfo_070 = cpu cores : 32 notes_plat_sysinfo_075 = siblings : 64 notes_plat_sysinfo_080 = physical 0: cores 0 1 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 notes_plat_sysinfo_085 = 29 30 31 notes_plat_sysinfo_090 = physical 1: cores 0 1 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 notes_plat_sysinfo_095 = 29 30 31 notes_plat_sysinfo_100 = notes_plat_sysinfo_105 = From lscpu: notes_plat_sysinfo_110 = Architecture: x86_64 notes_plat_sysinfo_115 = CPU op-mode(s): 32-bit, 64-bit notes_plat_sysinfo_120 = Byte Order: Little Endian notes_plat_sysinfo_125 = CPU(s): 128 notes_plat_sysinfo_130 = On-line CPU(s) list: 0-127 notes_plat_sysinfo_135 = Thread(s) per core: 2 notes_plat_sysinfo_140 = Core(s) per socket: 32 notes_plat_sysinfo_145 = Socket(s): 2 notes_plat_sysinfo_150 = NUMA node(s): 8 notes_plat_sysinfo_155 = Vendor ID: AuthenticAMD notes_plat_sysinfo_160 = CPU family: 23 notes_plat_sysinfo_165 = Model: 1 notes_plat_sysinfo_170 = Model name: AMD EPYC 7551 32-Core Processor notes_plat_sysinfo_175 = Stepping: 2 notes_plat_sysinfo_180 = CPU MHz: 2397.890 notes_plat_sysinfo_185 = BogoMIPS: 3992.30 notes_plat_sysinfo_190 = Virtualization: AMD-V notes_plat_sysinfo_195 = L1d cache: 32K notes_plat_sysinfo_200 = L1i cache: 64K notes_plat_sysinfo_205 = L2 cache: 512K notes_plat_sysinfo_210 = L3 cache: 8192K notes_plat_sysinfo_215 = NUMA node0 CPU(s): 0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120 notes_plat_sysinfo_220 = NUMA node1 CPU(s): 2,10,18,26,34,42,50,58,66,74,82,90,98,106,114,122 notes_plat_sysinfo_225 = NUMA node2 CPU(s): 4,12,20,28,36,44,52,60,68,76,84,92,100,108,116,124 notes_plat_sysinfo_230 = NUMA node3 CPU(s): 6,14,22,30,38,46,54,62,70,78,86,94,102,110,118,126 notes_plat_sysinfo_235 = NUMA node4 CPU(s): 1,9,17,25,33,41,49,57,65,73,81,89,97,105,113,121 notes_plat_sysinfo_240 = NUMA node5 CPU(s): 3,11,19,27,35,43,51,59,67,75,83,91,99,107,115,123 notes_plat_sysinfo_245 = NUMA node6 CPU(s): 5,13,21,29,37,45,53,61,69,77,85,93,101,109,117,125 notes_plat_sysinfo_250 = NUMA node7 CPU(s): 7,15,23,31,39,47,55,63,71,79,87,95,103,111,119,127 notes_plat_sysinfo_255 = Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov notes_plat_sysinfo_260 = pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm notes_plat_sysinfo_265 = constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf pni notes_plat_sysinfo_270 = pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c notes_plat_sysinfo_275 = rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch notes_plat_sysinfo_280 = osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb notes_plat_sysinfo_285 = hw_pstate sme ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap notes_plat_sysinfo_290 = clflushopt sha_ni xsaveopt xsavec xgetbv1 xsaves clzero irperf xsaveerptr arat npt notes_plat_sysinfo_295 = lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter notes_plat_sysinfo_300 = pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca notes_plat_sysinfo_305 = notes_plat_sysinfo_310 = /proc/cpuinfo cache data notes_plat_sysinfo_315 = cache size : 512 KB notes_plat_sysinfo_320 = notes_plat_sysinfo_325 = From numactl --hardware WARNING: a numactl 'node' might or might not correspond to a notes_plat_sysinfo_330 = physical chip. notes_plat_sysinfo_335 = available: 8 nodes (0-7) notes_plat_sysinfo_340 = node 0 cpus: 0 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120 notes_plat_sysinfo_345 = node 0 size: 128638 MB notes_plat_sysinfo_350 = node 0 free: 128392 MB notes_plat_sysinfo_355 = node 1 cpus: 2 10 18 26 34 42 50 58 66 74 82 90 98 106 114 122 notes_plat_sysinfo_360 = node 1 size: 129020 MB notes_plat_sysinfo_365 = node 1 free: 128767 MB notes_plat_sysinfo_370 = node 2 cpus: 4 12 20 28 36 44 52 60 68 76 84 92 100 108 116 124 notes_plat_sysinfo_375 = node 2 size: 129020 MB notes_plat_sysinfo_380 = node 2 free: 128792 MB notes_plat_sysinfo_385 = node 3 cpus: 6 14 22 30 38 46 54 62 70 78 86 94 102 110 118 126 notes_plat_sysinfo_390 = node 3 size: 129020 MB notes_plat_sysinfo_395 = node 3 free: 128732 MB notes_plat_sysinfo_400 = node 4 cpus: 1 9 17 25 33 41 49 57 65 73 81 89 97 105 113 121 notes_plat_sysinfo_405 = node 4 size: 129020 MB notes_plat_sysinfo_410 = node 4 free: 128760 MB notes_plat_sysinfo_415 = node 5 cpus: 3 11 19 27 35 43 51 59 67 75 83 91 99 107 115 123 notes_plat_sysinfo_420 = node 5 size: 129020 MB notes_plat_sysinfo_425 = node 5 free: 128783 MB notes_plat_sysinfo_430 = node 6 cpus: 5 13 21 29 37 45 53 61 69 77 85 93 101 109 117 125 notes_plat_sysinfo_435 = node 6 size: 129020 MB notes_plat_sysinfo_440 = node 6 free: 128742 MB notes_plat_sysinfo_445 = node 7 cpus: 7 15 23 31 39 47 55 63 71 79 87 95 103 111 119 127 notes_plat_sysinfo_450 = node 7 size: 124965 MB notes_plat_sysinfo_455 = node 7 free: 124728 MB notes_plat_sysinfo_460 = node distances: notes_plat_sysinfo_465 = node 0 1 2 3 4 5 6 7 notes_plat_sysinfo_470 = 0: 10 16 16 16 28 28 22 28 notes_plat_sysinfo_475 = 1: 16 10 16 16 28 28 28 22 notes_plat_sysinfo_480 = 2: 16 16 10 16 22 28 28 28 notes_plat_sysinfo_485 = 3: 16 16 16 10 28 22 28 28 notes_plat_sysinfo_490 = 4: 28 28 22 28 10 16 16 16 notes_plat_sysinfo_495 = 5: 28 28 28 22 16 10 16 16 notes_plat_sysinfo_500 = 6: 22 28 28 28 16 16 10 16 notes_plat_sysinfo_505 = 7: 28 22 28 28 16 16 16 10 notes_plat_sysinfo_510 = notes_plat_sysinfo_515 = From /proc/meminfo notes_plat_sysinfo_520 = MemTotal: 1052392348 kB notes_plat_sysinfo_525 = HugePages_Total: 0 notes_plat_sysinfo_530 = Hugepagesize: 2048 kB notes_plat_sysinfo_535 = notes_plat_sysinfo_540 = /usr/bin/lsb_release -d notes_plat_sysinfo_545 = Ubuntu 18.04.2 LTS notes_plat_sysinfo_550 = notes_plat_sysinfo_555 = From /etc/*release* /etc/*version* notes_plat_sysinfo_560 = debian_version: buster/sid notes_plat_sysinfo_565 = os-release: notes_plat_sysinfo_570 = NAME="Ubuntu" notes_plat_sysinfo_575 = VERSION="18.04.2 LTS (Bionic Beaver)" notes_plat_sysinfo_580 = ID=ubuntu notes_plat_sysinfo_585 = ID_LIKE=debian notes_plat_sysinfo_590 = PRETTY_NAME="Ubuntu 18.04.2 LTS" notes_plat_sysinfo_595 = VERSION_ID="18.04" notes_plat_sysinfo_600 = HOME_URL="https://www.ubuntu.com/" notes_plat_sysinfo_605 = SUPPORT_URL="https://help.ubuntu.com/" notes_plat_sysinfo_610 = notes_plat_sysinfo_615 = uname -a: notes_plat_sysinfo_620 = Linux r7425 4.15.0-45-generic #48-Ubuntu SMP Tue Jan 29 16:28:13 UTC 2019 x86_64 notes_plat_sysinfo_625 = x86_64 x86_64 GNU/Linux notes_plat_sysinfo_630 = notes_plat_sysinfo_635 = Kernel self-reported vulnerability status: notes_plat_sysinfo_640 = notes_plat_sysinfo_645 = CVE-2017-5754 (Meltdown): Not affected notes_plat_sysinfo_650 = CVE-2017-5753 (Spectre variant 1): Mitigation: __user pointer sanitization notes_plat_sysinfo_655 = CVE-2017-5715 (Spectre variant 2): Mitigation: Full AMD retpoline, IBPB notes_plat_sysinfo_660 = notes_plat_sysinfo_665 = run-level 5 Mar 4 16:41 notes_plat_sysinfo_670 = notes_plat_sysinfo_675 = SPEC is set to: /home/cpu2017-1.0.5 notes_plat_sysinfo_680 = Filesystem Type Size Used Avail Use% Mounted on notes_plat_sysinfo_685 = /dev/sda2 ext4 109G 19G 85G 19% / notes_plat_sysinfo_690 = notes_plat_sysinfo_695 = Additional information from dmidecode follows. WARNING: Use caution when you interpret notes_plat_sysinfo_700 = this section. The 'dmidecode' program reads system data which is "intended to allow notes_plat_sysinfo_705 = hardware to be accurately determined", but the intent may not be met, as there are notes_plat_sysinfo_710 = frequent changes to hardware, firmware, and the "DMTF SMBIOS" standard. notes_plat_sysinfo_715 = BIOS Dell Inc. 1.7.6 01/14/2019 notes_plat_sysinfo_720 = Memory: notes_plat_sysinfo_725 = 16x 80CE863280CE M386A8K40BM2-CTD 64 GB 4 rank 2666 notes_plat_sysinfo_730 = 16x Not Specified Not Specified notes_plat_sysinfo_735 = notes_plat_sysinfo_740 = (End of data from sysinfo program) hw_cpu_name = AMD EPYC 7551 hw_disk = 109 GB add more disk info here hw_memory001 = 1003.640 GB fixme: If using DDR4, the format is: hw_memory002 = 'N GB (N x N GB nRxn PC4-nnnnX-X)' hw_nchips = 2 prepared_by = root (is never output, only tags rawfile) sw_file = ext4 sw_os001 = Ubuntu 18.04.2 LTS sw_os002 = 4.15.0-45-generic sw_state = Run level 5 (add definition here) # End of settings added by sysinfo_program # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. 544.nab_r: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 519.lbm_r: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 default: notes_000 =Environment variables set by runcpu before the start of the run: notes_005 =LD_LIBRARY_PATH = "/home/cpu2017-1.0.5/amd1812na_rate_revA_lib/64;/home/cpu2017-1.0.5/amd1812na_rate_revA_lib/32:" notes_010 =