# Invocation command line:
# /home/HPC2021v1.1.7/bin/harness/runhpc --reportable --config=6Nsnew_icx_peak.cfg --tune=base,peak --pmodel=OMP --define model=omp --define THREADS=8 --define RANKS=144 --size=ref --define NRNK=24 --iterations=3 tiny small
# output_root was not used for this run
############################################################################
build_in_build_dir=0        # build in run dir

strict_rundir_verify = 1
%ifndef %{label}         # IF acctype is not set use mpi
%   define label icx_I204_IMPI204
%endif

%ifndef %{model}         # IF acctype is not set use mpi
%   define model mpi
%endif

teeout = yes
makeflags=-j 40

flagsurl000=http://www.spec.org/hpc2021/flags/Intel_compiler_flags.2022-11-10.xml

######################################################################
# The header section of the config file.  Must appear
# before any instances of "section markers" (see below)
#
# ext = how the binaries you generated will be identified
# tune = specify "base" or "peak" or "all"

label         = %{label}_%{model}
tune          = base
output_format = text
use_submit_for_speed = 1

default:
AR           = ar
ARFLAGS      = cr
CC           = mpiicc -cc=icx
CXX          = mpiicpc -cxx=icx
FC           = mpiifort  -fc=ifx
system_class = Homogeneous Cluster
sw_compiler  = Intel oneAPI Compiler 2022.1.0
sw_mpi_library = Intel MPI Library for Linux OS, Build 20220227

test_sponsor = Lenovo Global Technology
license_num = 28
tester = Lenovo Global Technology
hw_avail     = Nov-2022
sw_avail     = Nov-2022
prepared_by = Lenovo Global Technology
system_vendor = Lenovo Global Technology
system_name = ThinkSystem SR665 V3 (AMD EPYC 9654)
node_compute_syslbl = ThinkSystem SR665 V3
node_compute_sw_state = Multi-user, run level 3
node_compute_sw_sharedfile = None
node_compute_sw_other = None
node_compute_sw_localfile = xfs
node_compute_purpose = Compute
node_compute_order = 1
node_compute_hw_vendor = Lenovo Global Technology
node_compute_hw_scache = 1 MB I+D on chip per core
node_compute_hw_pcache = 32 KB I + 32 KB D on chip per core
node_compute_hw_tcache000= 384 MB I+D on chip per chip
node_compute_hw_tcache001 = 32 MB shared / 8 cores
node_compute_hw_other = None
node_compute_hw_ocache = None
node_compute_hw_nthreadspercore = 2
node_compute_hw_ncpuorder = 1,2 chips
node_compute_hw_ncoresperchip = 96
node_compute_hw_ncores = 192
node_compute_hw_nchips = 2
node_compute_hw_model = ThinkSystem SR665 V3
node_compute_hw_memory = 1536 GB (24 x 64 GB 2Rx4 PC5-4800B-R)
node_compute_hw_disk = 1x ThinkSystem 2.5" 5300 480GB SSD
node_compute_hw_cpu_name = AMD EPYC 9654
node_compute_hw_cpu_mhz = 2400
node_compute_hw_cpu_char = Max Boost Clock up to 3.7 GHz
node_compute_hw_adapter_fs_slot_type = PCI-Express 4.0 x16
node_compute_hw_adapter_fs_ports_used = 1
node_compute_hw_adapter_fs_model = Nvidia Mellanox ConnectX-6 HDR
node_compute_hw_adapter_fs_interconnect = ConnectX-6 HDR
node_compute_hw_adapter_fs_firmware = 20.28.1002
node_compute_hw_adapter_fs_driver = 5.7-1.0.2
node_compute_hw_adapter_fs_data_rate = 200 Gb/s
node_compute_hw_adapter_fs_count = 1
node_compute_count = 6
interconnect_fs_syslbl = Nvidia Mellanox ConnectX-6 HDR
interconnect_fs_purpose = MPI Traffic, NFS Access
interconnect_fs_order = 0
interconnect_fs_label = Nvidia Mellanox ConnectX-6 HDR
interconnect_fs_hw_vendor = Nvidia
interconnect_fs_hw_topo = Mesh
interconnect_fs_hw_switch_fs_ports = 40
interconnect_fs_hw_switch_fs_model = QM8700
interconnect_fs_hw_switch_fs_firmware = 3.9.0606
interconnect_fs_hw_switch_fs_data_rate = 200 Gb/s
interconnect_fs_hw_switch_fs_count = 1
interconnect_fs_hw_model = Nvidia Mellanox ConnectX-6 HDR

CC_VERSION_OPTION  = -V -c
CXX_VERSION_OPTION = -V -c
FC_VERSION_OPTION  = -V -c


%if %{VEC} eq 'novec'
    vec=-no-vec
%elif %{VEC} eq 'avx2'
    vec=-xCORE-AVX2
%elif %{VEC} eq 'avx512'
    vec=-xCORE-AVX512
%elif %{VEC} eq 'avx512_high'
    vec=-xCORE-AVX512 -qopt-zmm-usage=high
%else
    vec=-xCORE-AVX512
%endif


default=base,peak=default:
OPTIMIZE      = -Ofast -mprefer-vector-width=512 -march=core-avx2 -ipo #-no-prec-div
COPTIMIZE     = -ansi-alias
CXXOPTIMIZE   = -ansi-alias
FOPTIMIZE     = -nostandard-realloc-lhs -align array64byte
PORTABILITY   = -lstdc++
%if %{model} eq 'mpi'
        submit = mpirun -hosts 192.168.99.15,192.168.99.16,192.168.99.31,192.168.99.32 -np $ranks -ppn %{NRNK} $command
%else %{model} eq 'omp'
#       submit = mpiexec --allow-run-as-root -host 192.168.99.15:24,192.168.99.16:24 -np $ranks $command
#        submit = mpiexec -hosts 192.168.99.15,192.168.99.16,192.168.99.31,192.168.99.32 -np $ranks -genv OMP_NUM_THREADS=$threads  -ppn %{NRNK} $command
	submit = mpiexec -hostfile $[top]/6nodes -np $ranks -genv OMP_NUM_THREADS=$threads  -ppn %{NRNK} $command
%endif

%if %{model} eq 'omp'
  pmodel=OMP
  OPTIMIZE += -fiopenmp
%endif

default=base,peak=default:
    ranks   = %{RANKS}
    threads = %{THREADS}
    ppn     = %{NRNK}

#default=peak=default:
basepeak=0

613.soma_s=default=default:
%if %{model} eq 'omp'
        PORTABILITY += -DSPEC_NO_VAR_ARRAY_REDUCE
%endif

513.soma_t=default=default:
%if %{model} eq 'omp'
        PORTABILITY += -DSPEC_NO_VAR_ARRAY_REDUCE
%endif

505.lbm_t,605.lbm_s,705.lbm_m=peak:
    basepeak=1

513.soma_t,613.soma_s=peak:
    basepeak=1

518.tealeaf_t,618.tealeaf_s,718.tealeaf_m=peak:
    ranks=288
    ppn=48
    NRNK=48
    threads=4
    pmodel=OMP

519.clvleaf_t,619.clvleaf_s,719.clvleaf_m=peak:
    ranks=288
    ppn=48
    NRNK=48
    threads=4
    pmodel=OMP

521.miniswp_t,621.miniswp_s=peak:
    ranks=12
    ppn=2
    NRNK=2
    threads=96
    pmodel=OMP
    submit = mpiexec -hostfile $[top]/6nodes -np 12 -genv OMP_NUM_THREADS=96 -ppn 2 $command

528.pot3d_t,628.pot3d_s,728.pot3d_m=peak:
    ranks=288
    ppn=48
    NRNK=48
    threads=4
    pmodel=OMP

532.sph_exa_t=peak:
    ranks=24
    ppn=4
    NRNK=4
    threads=48
    pmodel=OMP
    submit = mpiexec -hostfile $[top]/6nodes -np 24 -genv OMP_NUM_THREADS=48 -ppn 4 $command

534.hpgmgfv_t,634.hpgmgfv_s,734.hpgmgfv_m=peak:
    basepeak=1

535.weather_t,635.weather_s,735.weather_m=peak:
     basepeak=1


# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
node_compute_sw_os000 = Red Hat Enterprise Linux Server release 8.6,
node_compute_sw_os001 = Kernel 4.18.0-372.9.1.el8.x86_64
notes_submit_000 =The config file option 'submit' was used.
notes_submit_005 =submit = mpiexec -hostfile $[top]/6nodes -np ranks -genv OMP_NUM_THREADS=$threads -ppn %{NRNK} $command