#!/usr/bin/env bash
#
#
# data collection script for SPECosgcloud_2018 under KVM
# sabaset@us.ibm.com
#
# modified from collect_data_kvm.sh 15 2011-04-18 12:06:42Z robert.wen@hp.com
#




OUTPUT_DIR=/tmp/data-collection
CONFIG=/opt/SPECvirt/collect_data.cfg
#DC_SCRIPT="/opt/SPECvirt/collect_data_kvm.sh"
DC_SCRIPT=$0



# temp settings
#SCRIPT_REMOTE=/home/cbuser/collect_support_data.sh
BASEDIR_SYS=/tmp/data-collection-sys
BASEDIR_SW=/tmp/data-collection-sw


if echo "$0" | grep -q '^\/.*'
then
    SCRIPT_LOCAL="$0"
else
    SCRIPT_LOCAL="$(pwd)/$0"
fi

sanity_check()
{
    data_collect_log "Checking hostname..." 3
    ping_host "sut"

    for client in $(gen_list $PHYCLIENT_LIST)
    do
        ping_host "phyclient${client}"
    done

    for tile in $(seq $TILE_LIST)
    do
        ping_host "client${tile}" 
        ping_host "wclient${tile}" 

        vmlist="appserver batchserver infraserver mailserver webserver"
        rest=$[$tile%4]
        if [ "$rest" -eq 1 ]; then
            vmlist="$vmlist dbserver"
        fi

        for host in $vmlist
        do
            ping_host "${host}${tile}"
        done
    done

    return 0;
}

ping_host()
{
    local host="$1"
    echo ${host}
    if ! ping -w2 -c1 ${host} &>/dev/null
    then
        data_collect_log "${host} not pingable. Please check /etc/hosts!" 2
        exit 2
    fi
}

#
# not used yet
#
setup_host_key()
{
    local CLIENT="$1"
    local PASS="$2"

    expect <<ENDEXP
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${CLIENT}
expect "*?assword:*"
send -- "${PASS}\r"
send -- "\r"
expect eof
ENDEXP
}

#
# this will setup the key auth
#
prepare_run_env()
{
    setup_host_key sut

    for client in $(gen_list $PHYCLIENT_LIST)
    do
        setup_host_key "phyclient${client}"
    done

    for tile in $(seq $TILE_LIST)
    do
        setup_host_key "client${tile}" 

        vmlist="appserver batchserver infraserver mailserver webserver"
        rest=$[$tile%4]
        if [ "$rest" -eq 1 ]; then
            vmlist="$vmlist dbserver"
        fi

        for host in $vmlist
        do
            setup_host_key "${host}${tile}"
        done
    done
echo "hi"
}

build_dir_tree()
{
    while read DIR
    do
        mkdir -p $DIR
    done <<ENDLINE
./VIRT
./PHYS
./PHYS/SUT
./PHYS/Clients
./support_files
ENDLINE
}

data_collect_log()
{
    local text="$1"
    local class="$2"
    shift 2

    if [ "$class" = "2" ]
    then
        echo "$text" 1>&2
    elif [ "$class" = "3" ]
    then
        echo $* "$text"
    fi

    echo "$(date "+%b %d %Y %T") $text" >> $OUTPUT_DIR/data-collection.log
}

gen_list()
{
    local IFS=","
    local LIST="$1"

    for i in $LIST
    do
        if echo $i | grep -q '^[0-9]*-[0-9]*$'
        then
            seq ${i%-*} ${i#*-}
        else
            echo $i
        fi
    done
}

collect_proc_info()
{
    local procfile="$1"
    if [ ! -e "$procfile" ]
    then
        return 1
    fi

    local DIR=$(dirname $procfile)
    local FILE=$(basename $procfile)

    DIR=${DIR#/}
    mkdir -p "$DIR" &>/dev/null

    if [ $? -ne 0 ]
    then
        return 1
    fi

    cat $procfile > "$DIR/$FILE"
}

collect_sys_info()
{
# hw and device info
    lspci > lspci.txt

# os info
    sudo hostname > hostname.txt

# network
    sudo ifconfig > ifconfig.txt
    sudo route -n > route.txt
    sudo brctl show > brctl.txt 

# storage and filesystem
    collect_proc_info "/proc/partitions"
    vgdisplay -v >vgdisplay.txt 2>/dev/null
    lvdisplay > lvdisplay.txt 2>/dev/null
    sudo mount > mount.txt
    sudo df > df.txt

# packages and service
    sudo rpm -qa > rpmpkgs.txt
    sudo dpkg -l > dpkg.txt
    sudo chkconfig --list > chkconfig.txt

# runtime info
    sudo ps axlf | grep qemu-kv[m] > running-vms.txt
    sudo virsh list > virsh-list.txt

# ntp info
    sudo cat /etc/ntp.conf > ntp.conf

# date
    date > date.txt

# proc info
    collect_proc_info "/proc/cpuinfo"
    collect_proc_info "/proc/devices"
    collect_proc_info "/proc/meminfo"
    collect_proc_info "/proc/modules"
    collect_proc_info "/proc/swaps"
    collect_proc_info "/proc/version"
    collect_proc_info "/proc/cmdline"

# other static configuration and logs
    find /etc/fstab \
/etc/redhat-release \
/etc/iproute2 \
/etc/hosts \
/etc/inittab \
/etc/sysctl.conf \
/usr/lib/sysctl.d/specvirt.conf	\
/etc/nsswitch.conf \
/etc/kvm \
/etc/security/limits.conf \
/etc/rc.d/init.d \
/etc/rc.d/rc3.d \
/usr/lib/systemd/system/multi-user.target.wants \
/opt/common_scripts/ostune_misc.sh	\
/var/log/dmesg \
/boot/grub*/grub.{conf,cfg}	\
/etc/bashrc		\
/etc/exports		\
/etc/rc.d/rc.local	\
/etc/tune-profiles/active-profile	\
2>/dev/null | cpio -o 2>/dev/null | cpio -idm --no-absolute-filenames &>/dev/null

# clear zero size file
find . -size 0 -exec rm -f {} \;

}

collect_sys_info_vm()
{
# hw and device info
    sudo lspci > lspci.txt

# os info
    sudo hostname > hostname.txt

# network
    sudo ifconfig > ifconfig.txt
    sudo route -n > route.txt
    sudo brctl show > brctl.txt 
    sudo netstat -tulpn > netstat.txt

# storage and filesystem
    collect_proc_info "/proc/partitions"
    vgdisplay -v >vgdisplay.txt 2>/dev/null
    lvdisplay > lvdisplay.txt 2>/dev/null
    sudo mount > mount.txt
    sudo df > df.txt

# packages and service
    sudo rpm -qa > rpmpkgs.txt
    sudo dpkg -l > dpkg.txt
    sudo chkconfig --list > chkconfig.txt

# runtime info
    sudo ps axlf | grep qemu-kv[m] > running-vms.txt
    #virsh list > virsh-list.txt

# ntp info
    sudo cat /etc/ntp.conf > ntp.conf

# date
    date > date.txt

# proc info
    collect_proc_info "/proc/cpuinfo"
    collect_proc_info "/proc/devices"
    collect_proc_info "/proc/meminfo"
    collect_proc_info "/proc/modules"
    collect_proc_info "/proc/swaps"
    collect_proc_info "/proc/version"
    collect_proc_info "/proc/cmdline"

# other static configuration and logs
    find /etc/fstab \
/etc/redhat-release \
/etc/iproute2 \
/etc/hosts \
/etc/inittab \
/etc/sysctl.conf \
/etc/nsswitch.conf \
/etc/kvm \
/etc/security/limits.conf \
/etc/rc.d/init.d \
/etc/rc.d/rc3.d \
/opt/common_scripts/ostune_misc.sh	\
/var/log/dmesg \
/boot/grub*/grub.conf	\
/etc/bashrc		\
/etc/exports		\
/etc/rc.d/rc.local	\
/etc/tune-profiles/active-profile	\
2>/dev/null | cpio -o 2>/dev/null | cpio -idm --no-absolute-filenames &>/dev/null

# clear zero size file
find . -size 0 -exec rm -f {} \;

}

collect_remote_vm_sys_info()
{
    local host="$1"
    local port="$2"
    local vmuser="$3"
    local key="$4"
    local dst="$5"

    scp -P ${port} -o "StrictHostKeyChecking no" -i $key -q $SCRIPT_LOCAL ${vmuser}@$host:$SCRIPT_REMOTE
    ssh -p ${port} -o "StrictHostKeyChecking no" -i $key ${vmuser}@${host} bash $SCRIPT_REMOTE sysinfo_vm
    ssh -p ${port} -o "StrictHostKeyChecking no" -i $key ${vmuser}@${host} tar -cz /tmp/data-collection-sys 2>/dev/null | tar -xz 2>/dev/null
    mv tmp/data-collection-sys/* $dst
    rm -fr tmp
}

collect_remote_sys_info()
{
    local host="$1"
    local port="$2"
    
    scp -P ${port} -q $SCRIPT_LOCAL $host:$SCRIPT_REMOTE
    ssh -p ${port} $host bash $SCRIPT_REMOTE sysinfo
    ssh -p ${port} $host tar -cz /tmp/data-collection-sys 2>/dev/null | tar -xz 2>/dev/null
    mv tmp/data-collection-sys/* .
    rm -fr tmp
}

collect_SUT()
{
    cd SUT
    data_collect_log "Collecting SUT info..." 3
    data_collect_log "Entering $(pwd)..."

    collect_remote_sys_info "sut"

    cd ..
}

collect_clients()
{
    cd Clients
    data_collect_log "Collecting physical clients info..." 3
    data_collect_log "Entering $(pwd)..."

    for client in $(gen_list $PHYCLIENT_LIST)
    do
        dir="phyclient${client}"
        mkdir $dir && cd $dir
        data_collect_log "Collecting $dir..." 
        collect_remote_sys_info $dir &
        cd ..
    done

    wait

    data_collect_log "Collecting virtual clients info..." 3
    for vclient in $(seq $TILE_LIST)
    do
        dir="client${vclient}"
        mkdir $dir && cd $dir
        data_collect_log "Collecting $dir..."
        collect_remote_vm_sys_info "client${vclient}" &
        cd ..

        dir="wclient${vclient}"
        mkdir $dir && cd $dir
        data_collect_log "Collecting $dir..."
        collect_remote_vm_sys_info "wclient${vclient}" &
        cd ..
    done

    wait

    data_collect_log "Linking virtual clients to physical clients..." 3
    for client in $(gen_list $PHYCLIENT_LIST)
    do
        dir="phyclient${client}"
        cd $dir
        for x in ${VCLIENT_LIST[$client]}
        do
            ln -s ../client${x} vclient${x}
            ln -s ../wclient${x} vwclient${x}
        done
        cd ..
    done

    cd ..
}

collect_physical_configuration()
{
    cd PHYS
    data_collect_log "Entering $(pwd)..."

    collect_SUT
    collect_clients

    cd ..
}

collect_local_software_config()
{
    local type="$1"

    case $type in
        "cassandra_ycsb")
     
            source $(echo $0 | sed -e "s/\(.*\/\)*.*/\1.\//g")/cb_ycsb_common.sh
            my_role=`get_my_vm_attribute role`


            echo ${my_role} > ${BASEDIR_SW}/role 
            if [ ${my_role} == "seed" ]; then
                mkdir ${BASEDIR_SW}/cassandra_conf
                mkdir ${BASEDIR_SW}/cassandra

                nodetool cfstats > ${BASEDIR_SW}/cassandra/nodetool_cfstats
                nodetool status  > ${BASEDIR_SW}/cassandra/nodetool_status
                CASSANDRA_CONF_PATH=$(get_my_ai_attribute_with_default cassandra_conf_path /etc/cassandra/)
                cp -r -v ${CASSANDRA_CONF_PATH}/* ${BASEDIR_SW}/cassandra_conf

                CASSANDRA_DATA_DIR=$(get_my_ai_attribute_with_default cassandra_data_dir /dbstore)
                du -s -b ${CASSANDRA_DATA_DIR} > ${BASEDIR_SW}/cassandra/du_datadir
                du -s -b ${CASSANDRA_DATA_DIR}/cassandra > ${BASEDIR_SW}/cassandra/du_datadir_cassandra
                du -s -b ${CASSANDRA_DATA_DIR}/cassandra/data/usertable > ${BASEDIR_SW}/cassandra/du_datadir_cassandra_usertable
               

            fi

            if [ ${my_role} == "ycsb" ]; then
                mkdir ${BASEDIR_SW}/YCSB
                YCSB_PATH=$(get_my_ai_attribute_with_default ycsb_path ~/YCSB)
                cp -r -v ${YCSB_PATH}/custom_workload.dat ${BASEDIR_SW}/YCSB/
                cp -r -v ${YCSB_PATH}/CHANGELOG ${BASEDIR_SW}/YCSB/
                cp -r -v ${YCSB_PATH}/workloads ${BASEDIR_SW}/YCSB/
            fi

            java -version 2>javaVersion.out
            

        ;;

         "hadoop")
             source $(echo $0 | sed -e "s/\(.*\/\)*.*/\1.\//g")/cb_hadoop_common.sh
             my_role=`get_my_vm_attribute role`
             echo ${my_role} > ${BASEDIR_SW}/role
             
             mkdir ${BASEDIR_SW}/hadoop_conf
             mkdir ${BASEDIR_SW}/hadoop

             cp -r -v ${HADOOP_CONF_DIR}/* ${BASEDIR_SW}/hadoop_conf 
           
             source ~/.bashrc
             ${HADOOP_HOME}/bin/hdfs version > ${BASEDIR_SW}/hadoop/version 
             ${HADOOP_HOME}/bin/hdfs dfsadmin -report > ${BASEDIR_SW}/hadoop/dfsadmin_report

             DFS_NAME_DIR=`get_my_ai_attribute_with_default dfs_name_dir /tmp/cbhadoopname`
             DFS_DATA_DIR=`get_my_ai_attribute_with_default dfs_data_dir /tmp/cbhadoopdata`
             du -s -b ${DFS_NAME_DIR} > ${BASEDIR_SW}/hadoop/du_namenodedir
             du -s -b ${DFS_DATA_DIR} > ${BASEDIR_SW}/hadoop/du_datanodedir

             DATA_HDFS=`get_my_ai_attribute_with_default dfs_data_dir /tmp/cbhadoopdata`
             INPUT_HDFS=${DATA_HDFS}/KMeans/Input-comp
             OUTPUT_HDFS=${DATA_HDFS}/KMeans/Output-comp

             if [ ${my_role} == "hadoopmaster" ]; then
                 ${HADOOP_HOME}/bin/hadoop fs -ls ${DATA_HDFS} > ${BASEDIR_SW}/hadoop/datahdfs
                 ${HADOOP_HOME}/bin/hadoop fs -du ${INPUT_HDFS} > ${BASEDIR_SW}/hadoop/input_hdfs_size
                 ${HADOOP_HOME}/bin/hadoop fs -du ${OUTPUT_HDFS} > ${BASEDIR_SW}/hadoop/output_hdfs_size
             fi

             java -version 2>javaVersion.out 

        ;;

    esac
}

collect_remote_software_config()
{
    local host="$1"
    local port="$2"
    local vmuser="$3"
    local key="$4"
    local dst="$5"
    local type="$6"

    scp -P ${port} -o "StrictHostKeyChecking no" -i $key -q $SCRIPT_LOCAL ${vmuser}@$host:$SCRIPT_REMOTE
    ssh -p ${port} -o "StrictHostKeyChecking no" -i $key ${vmuser}@$host bash $SCRIPT_REMOTE "software" $type
    ssh -p ${port} -o "StrictHostKeyChecking no" -i $key ${vmuser}@$host tar -cz /tmp/data-collection-sw 2>/dev/null | tar -xz 2>/dev/null
    mv tmp/data-collection-sw/* $dst
    rm -fr tmp
}

collect_software_config()
{
    local type="$1"
    local tile="$2"
    local host="${type}${tile}"

    data_collect_log "Collecting software configuration on ${host}..."
    mkdir SOFTW && cd SOFTW
    collect_remote_software_config "$host" "$type" &
    cd ..
}

collect_vm_config()
{
    local host="$1"

    data_collect_log "Collecting vm configuration on ${host}..."
    mkdir VM && cd VM
    collect_remote_vm_sys_info "$host" &
    cd ..
}


collect_one_tile()
{
    local tile="$1"
    local host=""

    vmlist="appserver batchserver infraserver mailserver webserver"
    rest=$[$tile%4]
    if [ "$rest" -eq 1 ]; then
        vmlist="$vmlist dbserver"
    fi

    for host in $vmlist
    do
        mkdir $host && cd $host
        #collect KVM xml for tile
        if [ ${tile} -lt 10 ]
	then
	    scp sut:/etc/libvirt/qemu/${host}0${tile}*.xml . 
	else
	    scp sut:/etc/libvirt/qemu/${host}${tile}*.xml .
	fi

        data_collect_log "Collecting ${host} in tile${tile}..."

        collect_software_config "$host" "$tile"
        collect_vm_config "${host}${tile}"

        cd ..
    done

    # per tile wait
    #wait
}

collect_virtual_configuration()
{
    cd VIRT
    data_collect_log "Entering $(pwd)..."

    for tile in $(seq $TILE_LIST)
    do
        mkdir "tile${tile}" && cd "tile${tile}"
        data_collect_log "Collecting tile${tile}..." 3
        data_collect_log "Entering $(pwd)..."
        collect_one_tile "${tile}"
        cd ..
    done

    cd ..
}

collect_support_files()
{
	cd support_files
	cp $DC_SCRIPT ./
#	scp -r appserver1:/opt/SPECjAppServer2004/bin/glassfish ./
cp /opt/SPECvirt/collect_data_kvm.sh ./

	cd ..
}


#################################################################
# main routine starts here
#

if [ "$1" = "sysinfo" ]
then
    rm -fr $BASEDIR_SYS 2>/dev/null && mkdir $BASEDIR_SYS 2>/dev/null && cd $BASEDIR_SYS
    collect_sys_info
elif [ "$1" = "sysinfo_vm" ]
then
    rm -fr $BASEDIR_SYS 2>/dev/null && mkdir $BASEDIR_SYS 2>/dev/null && cd $BASEDIR_SYS
    collect_sys_info_vm
elif [ "$1" = "remote_vm_sysinfo" ]
then
    host="$2"
    port="$3"
    user="$4"
    keypath="$5"
    storedir="$6"

    rm -fr ${storedir} 2>/dev/null && mkdir -p ${storedir} 2>/dev/null && cd ${storedir}

	sudo chmod 0600 $keypath
		
    SCRIPT_REMOTE=/home/${user}/collect_support_data.sh

collect_remote_vm_sys_info ${host} ${port} ${user} ${keypath} ${storedir}

elif [ "$1" = "remote_vm_software" ]
then
    host="$2"
    port="$3"    
    user="$4"
    keypath="$5"
    storedir="$6"
    type="$7"

    rm -fr ${storedir} 2>/dev/null && mkdir -p ${storedir} 2>/dev/null && cd ${storedir}

	sudo chmod 0600 $keypath

    SCRIPT_REMOTE=/home/${user}/collect_support_data.sh

collect_remote_software_config ${host} ${port} ${user} ${keypath} ${storedir} ${type}
elif [ "$1" = "software" ]
then
    rm -fr $BASEDIR_SW 2>/dev/null && mkdir $BASEDIR_SW 2>/dev/null && cd $BASEDIR_SW
    collect_local_software_config "$2"
elif [ "$1" = "prepare" ]
then
    prepare_run_env
else

    if [ -r "$CONFIG" ]
    then
        source $CONFIG
    else
        data_collect_log "Config $CONFIG not readable, please check!" 2
        exit 1
    fi

    rm -fr $OUTPUT_DIR 2>/dev/null && mkdir $OUTPUT_DIR 2>/dev/null && cd $OUTPUT_DIR
    sanity_check

    build_dir_tree

    date > Data-Collection-Start.txt

    collect_support_files

    collect_physical_configuration

    collect_virtual_configuration

    # final wait
    wait

    # Remove the obsolete files
    pwd
    cd $OUTPUT_DIR
    find ./ -name "*.bak*" | xargs rm -rfv -
    find ./ -name "*.orig*" | xargs rm -rfv -
    find ./ -name "*_sc2010*" | xargs rm -rfv -
    find ./ -name "*.rpmsave*" | xargs rm -rfv -
    find ./ -name "*.rpmnew*" | xargs rm -rfv -
    find ./ -name "*before*" | xargs rm -rfv -

    date > Data-Collection-End.txt

    data_collect_log "Data has been collected under $OUTPUT_DIR" 3

    mv data-collection.log /tmp/

    exit 0
fi
