# Invocation command line: # /lfs/lfs09/emelnich/SPECMPI2007/mpi2007-skl-qual-opa-avx512/bin/runspec --reportable --config intel_mpi2007.avx512.cfg --define fabric=shm:tmi --define ppn=40 --flagsurl EM64T_Intel160_flags.xml --size lref --iterations 3 -I --output_format=txt --ranks=1280 large # output_root was not used for this run ############################################################################ ##################################################################### # # Config file to run SPEC MPI2007 with Intel Software Toolchain # (Intel Compiler 16.0.2 and Intel MPI 5.1.3) # ##################################################################### env_vars = 1 basepeak = 1 reportable = 0 ignore_errors = 1 iterations = 3 makeflags = -j 8 tune = base #size = mref use_version_url = 1 version_url = version.txt use_submit_for_speed = 1 output_format = all FC = mpiifort CC = mpiicc CXX = mpiicpc ##################################################################### # Portability flags ##################################################################### 121.pop2=default=default=default: CPORTABILITY = -DSPEC_MPI_CASE_FLAG 126.lammps=default=default=default: CXXPORTABILITY = -DMPICH_IGNORE_CXX_SEEK 127.wrf2=default=default=default: CPORTABILITY = -DSPEC_MPI_CASE_FLAG -DSPEC_MPI_LINUX 129.tera_tf=default=default=default: srcalt=add_rank_support 130.socorro=default=default=default: srcalt=nullify_ptrs FPORTABILITY = -assume nostd_intent_in 143.dleslie=default=default=default: srcalt=integer_overflow %ifdef %{fabric} FABRIC=%{fabric} %else FABRIC=shm:tmi %endif ################################################################# # Optimization flags ################################################################# default=default=default=default: OPTIMIZE = -O3 -xCORE-AVX512 -no-prec-div submit = mpiexec.hydra -f \$LSB_DJOB_HOSTFILE -genv I_MPI_DEBUG=5 -genv I_MPI_DEBUG_OUTPUT=mpi_debug.out -genv I_MPI_FABRICS %{fabric} -genv I_MPI_PROVIDER psm2 -genv I_MPI_FALLBACK 0 -genv I_MPI_COMPATIBILITY=3 -genv I_MPI_HYDRA_PMI_CONNECT=alltoall -n $ranks -ppn %{ppn} $command ################################################################## ## Notes ################################################################## test_sponsor = Intel Corporation license_num = 13 tester = Intel Corporation test_date = Jul-2017 hw_avail = Jul-2017 sw_avail = Sep-2017 prepared_by = Intel Corporation system_vendor = Intel Corporation node_fileserver_hw_cpu_char = Intel Turbo Boost Technology disabled node_fileserver_hw_disk = 2.1 TB node_fileserver_sw_state = Multi-User node_fileserver_sw_sharedfile = Lustre FS node_fileserver_sw_other = None node_fileserver_sw_os000= Redhat* Enterprise Linux* Server Release 7.2, node_fileserver_sw_os001 = Kernel 3.10.0-514.6.2.0.1.el7.x86_64.knl1 node_fileserver_sw_localfile = None node_fileserver_purpose = fileserver node_fileserver_order = 2 node_fileserver_label = Lustre FS node_fileserver_hw_vendor = Intel node_fileserver_hw_tcache = 20 MB I+D on chip per chip node_fileserver_hw_scache = 2 MB I+D on chip per chip node_fileserver_hw_pcache = 32 KB I + 32 KB D on chip per core node_fileserver_hw_other = None node_fileserver_hw_ocache = None node_fileserver_hw_nthreadspercore = 2 node_fileserver_hw_ncpuorder = 1-2 chips node_fileserver_hw_ncoresperchip = 8 node_fileserver_hw_ncores = 16 node_fileserver_hw_nchips = 2 node_fileserver_hw_model = Intel Server System R2224GZ4GC4 node_fileserver_hw_memory = 64 GB (8 x 8GB 1600MHz Reg ECC DDR3) node_fileserver_hw_cpu_name = Intel Xeon E5-2680 node_fileserver_hw_cpu_mhz = 2700 node_fileserver_hw_adapter_fs_slot_type = PCI-Express x16 node_fileserver_hw_adapter_fs_ports_used = 1 node_fileserver_hw_adapter_fs_interconnect = Intel Omni-Path Fabric Adapter 100 series node_fileserver_hw_adapter_fs_firmware = 0.9-46 node_fileserver_hw_adapter_fs_driver = IFS 10.4 node_fileserver_hw_adapter_fs_data_rate = 12.5 GB/s node_fileserver_hw_adapter_fs_count = 1 node_fileserver_count = 11 node_compute_sw_state = Multi-User node_compute_sw_sharedfile = LFS node_compute_sw_other = IBM Platform LSF Standard 9.1.1.1 node_compute_sw_os000= Oracle Linux Server release 7.3, Kernel node_compute_sw_os001 = 3.10.0-514.6.2.0.1.el7.x86_64.knl1 node_compute_sw_localfile = Linux/xfs node_compute_purpose = compute node_compute_order = 1 node_compute_label = Endeavor Node node_compute_hw_vendor = Intel node_compute_hw_scache = 1 MB I+D on chip per core node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core node_compute_hw_other = None node_compute_hw_ocache = None node_compute_hw_nthreadspercore = 2 node_compute_hw_ncpuorder = 1-2 chips node_compute_hw_ncoresperchip = 20 node_compute_hw_ncores = 40 node_compute_hw_nchips = 2 node_compute_hw_model000= Intel Server System R2208WFTZS node_compute_hw_model001 = (Intel Xeon Gold 6148, 2.4 GHz) node_compute_hw_memory = 192 GB (12 x 16 GB 2Rx4 DDR4-2666 ECC Registered) node_compute_hw_disk = 1 x 800 GB SSD (INTEL SSDSC2BA80) node_compute_hw_cpu_name = Intel Xeon Gold 6148 node_compute_hw_cpu_mhz = 2400 node_compute_hw_adapter_ib_slot_type = PCI-Express x16 node_compute_hw_adapter_ib_ports_used = 1 node_compute_hw_adapter_ib_model = Intel Omni-Path Edge Switch 100 series node_compute_hw_adapter_ib_interconnect = Intel Omni-Path Fabric Adapter 100 series node_compute_hw_adapter_ib_firmware = 0.9-46 node_compute_hw_adapter_ib_driver = IFS 10.4 node_compute_hw_adapter_ib_data_rate = 12.5 GB/s node_compute_hw_adapter_ib_count = 1 node_compute_hw_adapter_fs_slot_type = PCI-Express x16 node_compute_hw_adapter_fs_ports_used = 1 node_compute_hw_adapter_fs_interconnect = Intel Omni-Path Fabric Adapter 100 series node_compute_hw_adapter_fs_firmware = 0.9-46 node_compute_hw_adapter_fs_driver = IFS 10.4 node_compute_hw_adapter_fs_data_rate = 12.5 GB/s node_compute_hw_adapter_fs_count = 1 node_compute_count = 32 interconnect_ib_purpose = MPI traffic interconnect_ib_order = 1 interconnect_ib_label = Intel Omni-Path interconnect_ib_hw_vendor = Intel interconnect_ib_hw_topo = Fat tree interconnect_ib_hw_switch_3600_ports = 48 interconnect_ib_hw_switch_3600_model = Intel Omni-Path Edge Switch 100 series interconnect_ib_hw_switch_3600_firmware = 0.9-46 interconnect_ib_hw_switch_3600_data_rate = 12.5 GB/s interconnect_ib_hw_switch_3600_count = 24 interconnect_ib_hw_model = Intel Omni-Path 100 series interconnect_fs_purpose = Cluster File System interconnect_fs_order = 1 interconnect_fs_label = Intel Omni-Path interconnect_fs_hw_vendor = Intel Corporation interconnect_fs_hw_topo = Fat tree interconnect_fs_hw_switch_fs_ports = 48 interconnect_fs_hw_switch_fs_model = Intel Omni-Path Edge Switch 100 series interconnect_fs_hw_switch_fs_firmware = 0.9-46 interconnect_fs_hw_switch_fs_data_rate = 12.5 GB/s interconnect_fs_hw_switch_fs_count = 1 interconnect_fs_hw_model = Intel Omni-Path 100 series system_name000 = Intel Server System R2208WFTZS (Intel Xeon Gold 6148, 2.40 GHz) # ## ## Computation node info ## #node_compute_label = Endeavor Node #node_compute_order = 1 #node_compute_count = 4 #node_compute_purpose = compute #node_compute_hw_vendor = Intel #node_compute_hw_model = R2308WTTYS #node_compute_hw_cpu_name = Intel Xeon E5-2697 v4 #node_compute_hw_ncpuorder = 1-2 chips #node_compute_hw_nchips = 2 #node_compute_hw_ncores = 36 #node_compute_hw_ncoresperchip = 18 #node_compute_hw_nthreadspercore = 2 #node_compute_hw_cpu_char000 = Intel Turbo Boost Technology disabled, #node_compute_hw_cpu_char001 = 9.6 GT/s QPI, Hyper-Threading enabled #node_compute_hw_cpu_mhz = 2300 #node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core #node_compute_hw_scache = 256 KB I+D on chip per core #node_compute_hw_tcache000= 45 MB I+D on chip per chip, 45 MB #node_compute_hw_tcache001 = shared / 18 cores #node_compute_hw_ocache = None #node_compute_hw_memory = 128 GB (8 x 16 GB 2Rx4 PC4-19200R-15, ECC) #node_compute_hw_disk = ATA INTEL SSDSA2BZ20, SSDSC2BB80 #node_compute_hw_other = None #node_compute_hw_adapter_ib_model = Mellanox MCX353A-FCAT ConnectX-3 #node_compute_hw_adapter_ib_count = 1 #node_compute_hw_adapter_ib_slot_type = PCIe x8 Gen3 #node_compute_hw_adapter_ib_data_rate = InfiniBand 4x FDR #node_compute_hw_adapter_ib_ports_used = 1 #node_compute_hw_adapter_ib_interconnect = InfiniBand #node_compute_hw_adapter_ib_driver = OFED 3.5-2-MIC-rc1 #node_compute_hw_adapter_ib_firmware = 2.31.5050 #node_compute_hw_adapter_fs_model000= Intel (ESB2) 82575EB Dual-Port Gigabit #node_compute_hw_adapter_fs_model001 = Ethernet Controller #node_compute_hw_adapter_fs_count = 1 #node_compute_hw_adapter_fs_slot_type = PCI-Express x8 #node_compute_hw_adapter_fs_data_rate = 1Gbps Ethernet #node_compute_hw_adapter_fs_ports_used = 2 #node_compute_hw_adapter_fs_interconnect = Ethernet #node_compute_hw_adapter_fs_driver = e1000 #node_compute_hw_adapter_fs_firmware = None #node_compute_sw_os = Oracle Linux Server 6.7, kernel 3.10.0-229 #node_compute_sw_localfile = Linux/xfs #node_compute_sw_sharedfile = NFS #node_compute_sw_state = Multi-User #node_compute_sw_other = IBM Platform LSF Standard 9.1.1.1 # ## ## Fileserver node info ## #node_fileserver_label = NFS #node_fileserver_order = 2 #node_fileserver_count = 1 #node_fileserver_purpose = fileserver #node_fileserver_hw_vendor = Intel #node_fileserver_hw_model = S7000FC4UR #node_fileserver_hw_cpu_name = Intel Xeon CPU #node_fileserver_hw_ncpuorder = 1-4 chips #node_fileserver_hw_nchips = 4 #node_fileserver_hw_ncores = 16 #node_fileserver_hw_ncoresperchip = 4 #node_fileserver_hw_nthreadspercore = 2 #node_fileserver_hw_cpu_char = -- #node_fileserver_hw_cpu_mhz = 2926 #node_fileserver_hw_pcache = 32 KB I + 32 KB D on chip per core #node_fileserver_hw_scache = 8 MB I+D on chip per chip, 4 MB shared / 2 cores #node_fileserver_hw_tcache = None #node_fileserver_hw_ocache = None #node_fileserver_hw_memory = 64 GB #node_fileserver_hw_disk = 8 disks, 500GB/disk, 2.7TB total #node_fileserver_hw_other = None #node_fileserver_hw_adapter_fs_model000 = Intel 82563GB Dual-Port Gigabit #node_fileserver_hw_adapter_fs_model001 = Ethernet Controller #node_fileserver_hw_adapter_fs_count = 1 #node_fileserver_hw_adapter_fs_slot_type = PCI-Express x8 #node_fileserver_hw_adapter_fs_data_rate = 1Gbps Ethernet #node_fileserver_hw_adapter_fs_ports_used = 1 #node_fileserver_hw_adapter_fs_interconnect = Ethernet #node_fileserver_hw_adapter_fs_driver = e1000e #node_fileserver_hw_adapter_fs_firmware = N/A #node_fileserver_sw_os = RedHat EL 5 Update 4 #node_fileserver_sw_localfile = None #node_fileserver_sw_sharedfile = NFS #node_fileserver_sw_state = Multi-User #node_fileserver_sw_other = None # ## ## IB interconnect ## #interconnect_ib_label = IB Switch #interconnect_ib_order = 1 #interconnect_ib_purpose = MPI traffic #interconnect_ib_hw_vendor = Mellanox #interconnect_ib_hw_model = Mellanox MSX6025F-1BFR #interconnect_ib_hw_switch_3600_model = Mellanox MSX6025F-1BFR #interconnect_ib_hw_switch_3600_count = 46 #interconnect_ib_hw_switch_3600_ports = 36 #interconnect_ib_hw_topo = Fat tree #interconnect_ib_hw_switch_3600_data_rate = InfiniBand 4x FDR #interconnect_ib_hw_switch_3600_firmware = 9.2.8000 # ## ## Cluster file system interconnect ## #interconnect_fs_label = Gigabit Ethernet #interconnect_fs_order = 2 #interconnect_fs_purpose = Cluster File System #interconnect_fs_hw_vendor = Force10 Networks, Cisco Systems #interconnect_fs_hw_model = Force10 S50N, Force10 C300, Cisco WS-C4948E-F #interconnect_fs_hw_switch_fs_model = Force10 S50N, Force10 C300, Cisco WS-C4948E-F #interconnect_fs_hw_switch_fs_count = 13 #interconnect_fs_hw_switch_fs_ports = 48 #interconnect_fs_hw_topo = Star #interconnect_fs_hw_switch_fs_data_rate = 1Gbps Ethernet, 10Gbps Ethernet #interconnect_fs_hw_switch_fs_firmware = 8.3.2.0, 12.2(54)WO # ## ## Hardware ## #system_class = Homogeneous #hw_total_nodes = 16 #hw_total_chips = 32 #hw_total_cores = 576 #hw_total_threads = 576 #hw_total_memory = 2048 GB #max_ranks = 576 #max_peak_ranks = 576 # ## ## Software ## #sw_c_compiler000= Intel C++ Composer XE 2013 for Linux #sw_c_compiler001 = Version 16.0.2.181 Build 20160204 #sw_cxx_compiler000= Intel C++ Composer XE 2013 for Linux #sw_cxx_compiler001 = Version 16.0.2.181 Build 20160204 #sw_f_compiler000= Intel Fortran Composer XE 2013 for Linux #sw_f_compiler001 = Version 16.0.2.181 Build 20160204 #sw_auto_parallel = #sw_base_ptrsize = 64-bit #sw_peak_ptrsize = 64-bit #sw_mpi_library = Intel MPI Library 5.1.3.181 for Linux #sw_mpi_other = None #sw_preprocessors = No #sw_other = None # ## ## General notes ## #notes_000 = MPI startup command: #notes_005 = mpiexec.hydra command was used to start MPI jobs. #notes_010 = #notes_015 = BIOS settings: #notes_020 = Intel Hyper-Threading Technology (SMT): Enabled (default is Enabled) #notes_025 = Intel Turbo Boost Technology (Turbo) : Disabled (default is Enabled) #notes_030 = #notes_035 = RAM configuration: #notes_040 = Compute nodes have 4x8-GB RDIMM on each memory channel. #notes_045 = #notes_050 = Network: #notes_055 = Forty six 36-port switches: 18 core switches and 28 leaf switches. #notes_060 = Each leaf has one link to each core. Remaining 18 ports on 25 of 28 leafs #notes_065 = are used for compute nodes. On the remaining 3 leafs the ports are used #notes_070 = for FS nodes and other peripherals. #notes_075 = #notes_080 = Job placement: #notes_085 = Each MPI job was assigned to a topologically compact set of nodes, i.e. #notes_090 = the minimal needed number of leaf switches was used for each job: 1 switch #notes_095 = for 36/72/144/288/576 ranks, 2 switches for 1152 ranks, 4 switches for 2304. #notes_100 = #notes_105 = IBM Platform LSF was used for job submission. It has no impact on performance. #notes_110 = Information can be found at: http://www.ibm.com # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000 = http://www.spec.org/mpi2007/flags/EM64T_Intel140_flags.20170822.xml sw_base_ptrsize = 64-bit sw_peak_ptrsize = Not Applicable sw_other = None sw_mpi_library = Intel MPI Library 17u4 for Linux sw_mpi_other = None sw_preprocessors = No notes_000 = MPI startup command: notes_005 = mpiexec.hydra command was used to start MPI jobs. notes_010 = Software environment: notes_015 = export I_MPI_COMPATIBILITY=3 notes_020 = export I_MPI_FABRICS=shm:tmi notes_025 = export I_MPI_HYDRA_PMI_CONNECT=alltoall notes_030 = Network: notes_035 = Endeavour Omni-Path fabric consists of 48-port switches = 24 core switches notes_040 = connected to each leaf of the rack switch. notes_045 = Job placement: notes_050 = Each MPI job was assigned to a topologically compact set of nodes, i.e. notes_055 = the minimal needed number of leaf switches was used for each job = 1 switch notes_060 = for 40/80/160/320/640 ranks, 2 switches for 1280 and 1980 ranks. notes_065 = IBM Platform LSF was used for job submission. It has no impact on performance. notes_070 = Information can be found at: http://www.ibm.com node_compute_hw_adapter_fs_model = Intel Omni-Path Fabric Adapter 100 series node_compute_hw_cpu_char000 = Intel Turbo Boost Technology up to 3.7 GHz node_compute_hw_tcache000 = 27.5 MB I+D on chip per chip node_fileserver_hw_adapter_fs_model000 = Intel Omni-Path Fabric Adapter 100 series sw_c_compiler000 = Intel C++ Composer XE 2017 for Linux sw_c_compiler001 = Version 17.0.4.196 Build 20170411 sw_cxx_compiler000 = Intel C++ Composer XE 2017 for Linux sw_cxx_compiler001 = Version 17.0.4.196 Build 20170411 sw_f_compiler000 = Intel Fortran Composer XE 2017 for Linux sw_f_compiler001 = Version 17.0.4.196 Build 20170411 system_class = Homogeneous