--
-- Copyright 2015-2019 Intel Corporation.
-- This software and the related documents are Intel copyrighted materials, and your use of them 
-- is governed by the express license under which they were provided to you ("License"). Unless the 
-- License provides otherwise, you may not use, modify, copy, publish, distribute, disclose or 
-- transmit this software or the related documents without Intel's prior written permission.
-- 
-- This software and the related documents are provided as is, with no express or implied warranties, 
-- other than those that are expressly stated in the License.
-- 
--


-- TASK: (Based, but not equal to tpc-ds q6)
-- List top 10 states in descending order with at least 10 customers who during
-- a given month bought products with the price tag at least 20% higher than the
-- average price of products in the same category.


-- helper table: items with 20% higher then avg prices of product from same category
DROP TABLE IF EXISTS ${hiveconf:TEMP_TABLE};
CREATE TABLE ${hiveconf:TEMP_TABLE} AS
-- "price tag at least 20% higher than the average price of products in the same category."
SELECT k.i_item_sk
FROM item k,
(
  SELECT
    i_category,
    AVG(j.i_current_price) * ${hiveconf:q07_HIGHER_PRICE_RATIO} AS avg_price
  FROM item j
  GROUP BY j.i_category
) avgCategoryPrice
WHERE avgCategoryPrice.i_category = k.i_category
AND k.i_current_price > avgCategoryPrice.avg_price
;


--Result  --------------------------------------------------------------------
--keep result human readable
set hive.exec.compress.output=false;
set hive.exec.compress.output;
--CREATE RESULT TABLE. Store query result externally in output_dir/qXXresult/
DROP TABLE IF EXISTS ${hiveconf:RESULT_TABLE};
CREATE TABLE ${hiveconf:RESULT_TABLE} (
  ca_state STRING,
  cnt      BIGINT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS ${env:BIG_BENCH_hive_default_fileformat_result_table} LOCATION '${hiveconf:RESULT_DIR}';

-- the real query part
INSERT INTO TABLE ${hiveconf:RESULT_TABLE}
SELECT
  ca_state,
  COUNT(*) AS cnt
FROM
  customer_address a,
  customer c,
  store_sales s,
  ${hiveconf:TEMP_TABLE} highPriceItems
WHERE a.ca_address_sk = c.c_current_addr_sk
AND c.c_customer_sk = s.ss_customer_sk
AND ca_state IS NOT NULL
AND ss_item_sk = highPriceItems.i_item_sk --cannot use "ss_item_sk IN ()". Hive only supports a single "IN" subquery per SQL statement.
AND s.ss_sold_date_sk IN
( --during a given month
  SELECT d_date_sk
  FROM date_dim
  WHERE d_year = ${hiveconf:q07_YEAR}
  AND d_moy = ${hiveconf:q07_MONTH}
)
GROUP BY ca_state
HAVING cnt >= ${hiveconf:q07_HAVING_COUNT_GE} --at least 10 customers
ORDER BY cnt DESC, ca_state --top 10 states in descending order 
LIMIT ${hiveconf:q07_LIMIT}
;


--cleanup
DROP TABLE IF EXISTS ${hiveconf:TEMP_TABLE};
