All,
I have been attempting to get some of our larger code for analytics running through BigQuery, but I continue to run into issues for a lot of the states and the amount of data that is present. We are talking about years of data. It may be that my query is not up-to-snuff, but attempting to get the sums based on the the specific group by's is what I need.
What do I need to change within the query to prevent the resources exceeded error?
SELECT
COMPANY_NAME,
RATING_CLASS,
COMPANY_KEY,
-- State Info & Calculations
-- Over is used as a WINDOW function to SUM ALL results within the given query
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3 Info & Calculations
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 Info & Calculations
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3
ZIP3,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
ZIP5,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
LOWEST,
RATIO_TO_MIN,
RATE_ORDER,
TOP5,
TOP10,
CENT_DIFF,
DISCOUNTED_LOWEST,
DISCOUNTED_RATIO_TO_MIN,
DISCOUNTED_RATE_ORDER,
DISCOUNTED_TOP5,
DISCOUNTED_TOP10,
DISCOUNTED_CENT_DIFF,
LOGGING_KEY )
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3 General
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK,
ZIP3_TOP5,
ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 General
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK,
ZIP5_TOP5,
ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF
Updated Query with suggested fixes:
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824",
"4872666167115776",
"6396348765044736",
"6139303562313728",
"4988973881491456")
AND portal_key NOT IN ("5878607637381120")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
) AS MAIN
LEFT JOIN (
SELECT
ZIP3,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP3_SUB
ON
main.ZIP3 = ZIP3_SUB.ZIP3
AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP3,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3 ) AS ZIP3_COUNT
ON
main.ZIP3 = ZIP3_COUNT.ZIP3
LEFT JOIN (
SELECT
ZIP5,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP5_SUB
ON
main.ZIP5 = ZIP5_SUB.ZIP5
AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP5,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5 ) AS ZIP5_COUNT
ON
main.ZIP5 = ZIP5_COUNT.ZIP5
LEFT JOIN (
SELECT
STATE,
EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
STATE ) AS STATE_COUNT
ON
main.STATE = STATE_COUNT.STATE
Explanation: Due to the fact BigQuery is a shared resource, BigQuery approximates the amount of resources that the query will require and allocate resources accordingly. By splitting out the query into multiple joins on the same table, the computation can be performed by more resources than originally intended. For a more technical explanation as to why restricting the query works well, please see Jordan Tigani on stack overflow response here.
A table can have up to 10,000 columns. The total size for all of your CSV, JSON, Avro, Parquet, and ORC input files can be up to 15 TB. A job configuration can have up to 10,000 source URIs.
The shell command bq cancel job_id will do this now. You can get the job_id from the Query History tab in the BigQuery console. If you started the query via the CLI, it will have logged the job_id to the standard output.
Somehow I feel below is what you need. I can be wrong as it is really just relatively blind attempt to reverse engineer your logic, so do not judge heavily if i am wrong here
Cannot test it, but I feel it is going to be cheap to actually succeed
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF
FROM (
SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
) AS main
LEFT JOIN (
SELECT
ZIP3, company_key, rating_class,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3, company_key, rating_class
) AS zip3_sub
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class
LEFT JOIN (
SELECT
ZIP5, company_key, rating_class,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5, company_key, rating_class
) AS zip5_sub
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class
LEFT JOIN (
SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3
) AS zip3_count
ON main.ZIP3 = zip3_count.ZIP3
LEFT JOIN (
SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5
) AS zip5_count
ON main.ZIP5 = zip5_count.ZIP5
Also, please note: In BigQuery Legacy SQL - COUNT(DISTINCT) function is probabilistic - gives statistical approximation and is not guaranteed to be exact.
You can use EXACT_COUNT_DISTINCT() function instead - this one gives you exact number but a little more expensive on back-end
And of course whole query can be rewritten for BigQuery Standard SQL when COUNT(DISTINCT) produces exact count and Standard SQL is preferable to use per BigQuery Team recommendation
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With