# Invocation command line: # /home/rlieberm/hpc2021-1.0.2/bin/harness/runhpc -c amdgpu_clang.cfg -l --reportable -n 3 -T base --define model=mpi --threads=1 --ranks=512 -i ref tiny --rebuild # output_root was not used for this run ############################################################################ ###################################################################### # Example configuration file for AOMP LLVM/Clang compiler. # # Defines: "model" => "mpi", "omp", "omp_target", omp_host_target" # default "mpi" # "label" => ext base label, # default "clang" # "gputype" => "host" , "x86", "gfx900", "gfx906", "gfx908" # default "host" # Example runhpc commands # # MPI-only Command: # runhpc -c amdgpu_clang.cfg -I -l -n 1 -T base --define model=mpi --threads=1 --ranks=16 -i test 628 # # MPI+OpenMP Command: # runhpc -c amdgpu_clang.cfg -I -l -n 1 -T base --define model=omp --threads=16--ranks=1 -i test 628 # # MPI+OpenMP target offload Command: # runhpc -c amdgpu_clang.cfg -I -l -n 1 -T base --define model=omp_target --define gputype=gfx908 --threads=1 --ranks=4 -i test 628 # # MPI+OpenMP target offload to host Command: # runhpc -c amdgpu_clang.cfg -I -l -n 1 -T base --define model=omp_host_target --define gputype=x86 --threads=16 --ranks=1 -i test 628 # ####################################################################### %ifndef %{label} # IF label is not set use clang % define label clang %endif %ifndef %{model} # IF model is not set use mpi % define model mpi %endif %ifndef %{gputype} % define gputype host %endif ###################################################################### # The header section of the config file. Must appear # before any instances of "section markers" (see below) # # ext = how the binaries you generated will be identified # tune = specify "base" or "peak" or "all" label = %{label}_%{model}_%{gputype} tune = base output_format = text use_submit_for_speed = 1 makeflags = -j 16 #strict_rundir_verify=0 #include: desc_amdgpu.inc # ----- Begin inclusion of 'desc_amdgpu.inc' ############################################################################ # Tester Information license_num = 0017 test_sponsor = Advanced Micro Devices tester = Advanced Micro Devices ###################################################### # SUT Section ###################################################### # General SUT info system_vendor = Advanced Micro Devices system_name = Dallas Milan Cluster: Gigabyte H262-Z63 (AMD EPYC 7763) hw_avail = Apr-2021 sw_avail = Aug-2021 # Computation node info # [Node_Description: Hardware] node_compute_syslbl = Gigabyte H262-Z63 node_compute_order = 1 node_compute_count = 4 node_compute_purpose = compute node_compute_hw_vendor = Gigabyte node_compute_hw_model = Gigabyte H262-Z63 node_compute_hw_cpu_name = AMD EPYC 7763 node_compute_hw_ncpuorder = 1,2 chips node_compute_hw_nchips = 2 node_compute_hw_ncores = 128 node_compute_hw_ncoresperchip = 64 node_compute_hw_nthreadspercore = 1 node_compute_hw_cpu_char = Max Boost Clock disabled node_compute_hw_cpu_mhz = 2450 node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core node_compute_hw_scache = 512 KB I+D on chip per core node_compute_hw_tcache000 = 256 MB I+D on chip per chip node_compute_hw_tcache001 = 32 MB shared / 8 cores node_compute_hw_ocache = None node_compute_hw_memory = 512 GB (16 x 32 GB 2Rx4 PC4-3200AA-R) node_compute_hw_disk = Intel SSD 520 Series 240GB, 2.5in SATA 6Gb/s node_compute_hw_other = None #[Node_Description: Accelerator] #[Node_Description: Software] node_compute_hw_adapter_fs_model000= ConnectX-6 Dual port, model number: node_compute_hw_adapter_fs_model001 = MCX653106A node_compute_hw_adapter_fs_count = 0 node_compute_hw_adapter_fs_slot_type = None node_compute_hw_adapter_fs_data_rate = None node_compute_hw_adapter_fs_ports_used = 0 node_compute_hw_adapter_fs_interconnect = None node_compute_hw_adapter_fs_driver = None node_compute_hw_adapter_fs_firmware = None node_compute_sw_os000 = CentOS Linux release 8.3.2011 node_compute_sw_os001 = Kernel 4.18.0-193 [native to CentOS 8.3] node_compute_sw_localfile = xfs node_compute_sw_sharedfile = NFS share node_compute_sw_state = Multi-user, run level 3 node_compute_sw_other = None #[Fileserver] #[Interconnect] interconnect_fs_syslbl = Mellanox interconnect_fs_order = 0 interconnect_fs_purpose = MPI Traffic interconnect_fs_hw_vendor = Mellanox interconnect_fs_hw_model000= NVIDIA MCX653106A-EFAT ConnectX-6 VPI Adapter interconnect_fs_hw_model001 = Card HDR100/EDR/100GbE interconnect_fs_hw_switch_fs_model000 = MLNX_OFED_LINUX-5.2.1.0 (OFED-5.2.1.0) interconnect_fs_hw_switch_fs_model001 = Switch: 27_2008_2202-MQM8790-HS2X_Ax interconnect_fs_hw_switch_fs_count = 2 interconnect_fs_hw_switch_fs_ports = 40 interconnect_fs_hw_topo =non-blocking fat tree interconnect_fs_hw_switch_fs_data_rate = InfiniBand HDR 100 Gb/s interconnect_fs_hw_switch_fs_firmware = HCA: 20.29.1016 #[Software] sw_compiler001 = C/C++/Fortran: Version 13.0-0 sw_compiler002 = MLSE ROCm 4.3.0 Compilers sw_compiler003 = Compiler available by installing ROCm 4.3 or sw_compiler004 = getting sw_compiler005 = https://repo.radeon.com/rocm/apt/4.3/pool/main/l/llvm-amdgpu/llvm-amdgpu_13.0.0.21295.40300_amd64.deb sw_compiler006 = https://repo.radeon.com/rocm/apt/4.3/pool/main/o/openmp-extras4.3.0/openmp-extras4.3.0_12.43.0.40300-52_amd64.deb sw_mpi_library = OpenMPI Version 4.0.5 sw_mpi_other = None system_class = Homogenous Cluster sw_other = None #[General notes] ####################################################################### # End of SUT section # If this config file were to be applied to several SUTs, edits would # be needed only ABOVE this point. ###################################################################### # ---- End inclusion of '/home/rlieberm/hpc2021-1.0.2/config/desc_amdgpu.inc' flagsurl000=http://www.spec.org/hpc2021/flags/amd2021_flags.xml default: CC = mpicc CXX = mpicxx FC = mpif90 sw_compiler000= LLVM/Clang 13.0 CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version #preENV_OMP_PROC_BIND=true MPIRUN_OPTS = --bind-to none #socket # core submit = mpirun ${MPIRUN_OPTS} -np $ranks $command ####################################################################### default=base=default: OPTIMIZE = -O3 COPTIMIZE = CXXOPTIMIZE = PORTABILITY = -I${AOMP}/include PORTABILITY_LIBS = -lm FPPPORTABILITY += -DSPEC_USE_MPIFH -I${MPI}/include/ %if %{model} eq 'mpi' pmodel=MPI MPIRUN_OPTS += --mca topo basic submit = mpirun ${MPIRUN_OPTS} -np $ranks $command %endif %if %{model} eq 'omp' pmodel=OMP OPTIMIZE += -fopenmp MPIRUN_OPTS = --bind-to core MPIRUN_OPTS += --map-by ppr:1:numa:pe=16 # 16 cores per numa #MPIRUN_OPTS += --map-by ppr:1:numa:pe=64 # 64 cores per numa submit = mpirun ${MPIRUN_OPTS} -np $ranks $command %endif %if %{model} eq 'omp_target' pmodel=TGT OPTIMIZE += -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=%{gputype} 513.soma_t,613.soma_s: PORTABILITY += -DSPEC_NO_VAR_ARRAY_REDUCE %endif %if %{model} eq 'omp_host_target' pmodel=TGT OPTIMIZE += -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu -Xopenmp-target=x86_64-pc-linux-gnu -mcpu=%{gputype} 521.miniswp_t,621.miniswp_s: PORTABILITY += -DSPEC_USE_HOST_THREADS %endif # HIP is not a supported hpc2021 model, just for experimentation. %if %{model} eq 'hip' OPTIMIZE += -DSPEC_HIP -DSPEC_CUDA --amdgpu-target=%{gputype} %endif # No peak flags set, so make peak use the same flags as base default=peak=default: basepeak=1 # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_submit_000 = MPI startup command: notes_submit_005 = mpirun command was used to start MPI jobs.