# Invocation command line: # /home/opt/app/hpc2021/bin/harness/runhpc --reportable --define EXPID=submission/7.29/small.acc_rank_7 -c xfusion.acc.base-peak.cfg -T base,peak --define model=acc --pmodel ACC --define RANKS=7 --size ref --iterations 2 --flagsurl ./config/flags/nvhpc_flags.xml --output-format=all small # output_root was not used for this run ############################################################################ ###################################################################### # Example configuration file for the NVIDIA HPC SDK Compilers # # Before using this config file, copy it to a new config (such as nvhpc.cfg) and edit as needed # # Defines: "model" => "mpi", "acc", "omp", "tgt", "tgtgpu" default "mpi" # "label" => ext base label, default "nv" # # MPI-only Command: # runhpc -c nvhpc --reportable -T base --define model=mpi --ranks=40 tiny # # OpenACC offload to GPU Command: # runhpc -c nvhpc --reportable -T base --define model=acc --ranks=4 tiny # Add "--define ucx" if using OpenMPI 4 with UCX support. # # OpenACC offload to Multicore CPU Command: # runhpc -c nvhpc --reportable -T base --define model=accmc --ranks=4 tiny # # OpenMP Command: # runhpc -c nvhpc --reportable -T base --define model=omp --ranks=1 --threads=40 tiny # # OpenMP Target Offload to Host Command: # runhpc -c nvhpc --reportable -T base --define model=tgt --ranks=1 --threads=40 tiny # # OpenMP Target Offload to GPU Command: # runhpc -c nvhpc --reportable -T base --define model=tgtgpu --ranks=4 tiny # ####################################################################### expid= %ifdef %{EXPID} expid=%{EXPID} %endif allow_label_override = yes # label controls srcalt: simd - for simd build_in_build_dir=0 # build in run dir basepeak=0 %ifndef %{label} # IF label is not set use xfusion % define label xfusion %endif %ifndef %{model} # IF model is not set use acc % define pmodel ACC %endif teeout = yes makeflags=-j # Tester Information license_num = 6488 test_sponsor = xFusion tester = xFusion ###################################################### # SUT Section ###################################################### #include: Example_SUT.inc # ----- Begin inclusion of 'Example_SUT.inc' ############################################################################ ###################################################### # Example configuration information for a # system under test (SUT) Section ###################################################### # General SUT info system_vendor = xFusion system_name = FusionServer G5500 V6 (Intel Xeon Platinum 8380, Nvidia A100-PCIE-80G) node_compute_sw_accel_driver = NVIDIA UNIX x86_64 Kernel Module 515.43.04 hw_avail = Apr-2021 sw_avail = May-2022 prepared_by = xFusion # Computation node info # [Node_Description: Hardware] node_compute_syslbl = FusionServer G5500 V6 node_compute_order = 1 node_compute_count = 1 node_compute_purpose = compute node_compute_hw_vendor = xFusion node_compute_hw_model = FusionServer G5500 V6 node_compute_hw_cpu_name = Intel Xeon Platinum 8380 node_compute_hw_ncpuorder = 1, 2 chips node_compute_hw_nchips = 2 node_compute_hw_ncores = 80 node_compute_hw_ncoresperchip = 40 node_compute_hw_nthreadspercore = 1 node_compute_hw_cpu_char = Intel Turbo Boost Technology up to 3.4 GHz node_compute_hw_cpu_mhz = 2300 node_compute_hw_pcache = 32 KB I + 48 KB D on chip per core node_compute_hw_scache = 1.25 MB I+D on chip per core node_compute_hw_tcache = 60 MB I+D on chip per chip node_compute_hw_ocache = None node_compute_hw_memory = 1 TB (16 x 64 GB 2Rx4 PC4-3200A-R) node_compute_hw_disk = 1 x 3.2 TB NVMe SSD node_compute_hw_other = None #[Node_Description: Accelerator] node_compute_hw_accel_model = Tesla A100 PCIe 80GB node_compute_hw_accel_count = 8 node_compute_hw_accel_vendor= Nvidia Corporation node_compute_hw_accel_type = GPU node_compute_hw_accel_connect = PCIe Gen4 x16 node_compute_hw_accel_ecc = Yes node_compute_hw_accel_desc = Nvidia Tesla A100 PCIe 80GB #[Node_Description: Software] node_compute_hw_adapter_fs_model = None node_compute_hw_adapter_fs_count = 0 node_compute_hw_adapter_fs_slot_type = None node_compute_hw_adapter_fs_data_rate = None node_compute_hw_adapter_fs_ports_used = 0 node_compute_hw_adapter_fs_interconnect = None node_compute_hw_adapter_fs_driver = None node_compute_hw_adapter_fs_firmware = None node_compute_sw_os000 = CentOS Linux release 8.2.2004 node_compute_sw_os001 = 4.18.0-193.el8.x86_644 node_compute_sw_localfile = xfs node_compute_sw_sharedfile = None node_compute_sw_state = Multi-user, run level 3 node_compute_sw_other = None #[Fileserver] #[Interconnect] interconnect_fs_syslbl = None interconnect_fs_order = 0 interconnect_fs_purpose = None interconnect_fs_hw_vendor = None interconnect_fs_hw_model = None interconnect_fs_hw_switch_fs_model = None interconnect_fs_hw_switch_fs_count = 0 interconnect_fs_hw_switch_fs_ports = 0 interconnect_fs_hw_topo = None interconnect_fs_hw_switch_fs_data_rate = None interconnect_fs_hw_switch_fs_firmware = None ####################################################################### # End of SUT section # If this config file were to be applied to several SUTs, edits would # be needed only ABOVE this point. ###################################################################### # ---- End inclusion of '/home/HPC2021F1.0.1/config/Example_SUT.inc' ###################################################################### # The header section of the config file. Must appear # before any instances of "section markers" (see below) # # ext = how the binaries you generated will be identified # tune = specify "base" or "peak" or "all" label = %{label}_%{model} tune = all output_format = all use_submit_for_speed = 1 # Compiler Settings default: CC = mpicc CXX = mpicxx FC = mpif90 system_class = SMP sw_compiler = Nvidia HPC SDK 22.5 sw_mpi_library = OpenMPI Version 4.0.5, included with NVHPC SDK # Compiler Version Flags CC_VERSION_OPTION = -V CXX_VERSION_OPTION = -V FC_VERSION_OPTION = -V %ifdef %{ucx} # if using OpenMPI with UCX support, these settings are needed with use of CUDA Aware MPI # without these flags, LBM is known to hang when using OpenACC and OpenMP Target to GPUs preENV_UCX_MEMTYPE_CACHE=n preENV_UCX_TLS=self,shm,cuda_copy %endif MPIRUN_OPTS = --allow-run-as-root --bind-to none submit = mpirun --allow-run-as-root -x UCX_MEMTYPE_CACHE=n -np $ranks perl $[top]/bind.pl $command # Optimization default: pmodel=ACC default=base=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -Mfprelaxed -Mnouniform -Mstack_arrays -DSPEC_ACCEL_AWARE_MPI CXXPORTABILITY = --c++17 505.lbm_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -DSPEC_ACCEL_AWARE_MPI 513.soma_t=peak=default: basepeak=1 518.tealeaf_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -Msafeptr -DSPEC_ACCEL_AWARE_MPI 519.clvleaf_t=peak=default: basepeak=1 521.miniswp_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -gpu=pinned # -DSPEC_ACCEL_AWARE_MPI 528.pot3d_t=peak=default: basepeak=1 532.sph_exa_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -Mstack_arrays -static-nvidia -DSPEC_ACCEL_AWARE_MPI 534.hpgmgfv_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -static-nvidia -DSPEC_ACCEL_AWARE_MPI 535.weather_t=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -Mstack_arrays -static-nvidia -DSPEC_ACCEL_AWARE_MPI 605.lbm_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -DSPEC_ACCEL_AWARE_MPI 613.soma_s=peak=default: basepeak=1 618.tealeaf_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -Msafeptr -DSPEC_ACCEL_AWARE_MPI 619.clvleaf_s=peak=default: basepeak=1 621.miniswp_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -gpu=pinned # -DSPEC_ACCEL_AWARE_MPI 628.pot3d_s=peak=default: basepeak=1 632.sph_exa_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -Mstack_arrays -static-nvidia -DSPEC_ACCEL_AWARE_MPI 634.hpgmgfv_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -static-nvidia -DSPEC_ACCEL_AWARE_MPI 635.weather_s=peak=default: ranks = %{RANKS} OPTIMIZE = -w -fast -acc=gpu -O3 -Mfprelaxed -Mnouniform -Mstack_arrays -static-nvidia -DSPEC_ACCEL_AWARE_MPI # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000 = http://www.spec.org/hpc2021/flags/nv2021_flags_v1.0.3.2022-08-24.xml notes_submit_000 =MPIRUN_OPTS = --allow-run-as-root --bind-to none notes_submit_005 =submit = mpirun --allow-run-as-root -x UCX_MEMTYPE_CACHE=n -np $ranks perl $[top]/bind.pl $command