I am developing a synchronization mechanism to move data form aws redshift to aurora. In order to lower the load on network I/O I am transforming the queries on redshift and add them a checksum query so I will export only the records that have changed. I am wrapping the base query with another Select * query and adding the a checksum function.
I have tried to remove some inner sub queries and after that the new query is working, but I can't change the query since is provided by some other platform.
Base Query:
select x.player_id as playerid,
p.player_nickname,
r.region_code,
s.title as season_name,
x.rating as ranking_score,
x.rank_no as rank_no,
x.rank_no_change as rank_no_change,
x.game_mode,
to_char(ga.total_score, 'FM9D00') as gyo_perf_total_score,
to_char(agressive_score*100/aggresive_weight, 'FM9D000') as gyo_perf_aggressive_score,
to_char(defensive_score*100/defensive_weight, 'FM9D000') as gyo_perf_defensive_score,
to_char(survival_score*100/survival_weight, 'FM9D000') as gyo_perf_survival_score,
ga.match_place_avg,
rounds_played,
kills,
(kills*1.0)/rounds_played as avg_kills_per_round,
assists,
(assists*1.0)/rounds_played as avg_assists_per_round,
headshot_kills,
top10s,
(top10s*1.0)/rounds_played as top10s_ratio,
wins,
(wins*1.0)/rounds_played as win_ratio,
case when top10s = 0 then 0 else (wins*1.0)/(top10s*1.0) end as win_to_top10_ratio,
losses,
ga.match_group as ___matchgroup,
ga.last_update_id as ___lastupdateid_ga,
___lastupdateid_st,
x.last_update_id as ___lastupdateid_rn
from hd_stats.calc_player_ranking x
inner join hd_core.core_player p on x.player_id = p.player_id
and p.player_game_id =
(select g.game_id
from hd_core.core_game g
where lower(g.game_short_title) = 'xxxx')
left join hd_core.core_region r on x.region_id = r.region_id
left join hd_core.core_season s on x.season_id = s.season_id
--gyo perf
left join hd_stats.calc_pubg_gyo_average ga on
ga.player_id = x.player_id
and nvl(x.season_id,0) = nvl(ga.season_id,0)
and nvl(x.region_id,0) = nvl(ga.region_id,0)
and nvl(x.game_mode,'') = nvl(ga.match_mode,'')
left join (select player_id, region_id,
season_id, match_mode,
rounds_played as rounds_played,
kills as kills,
assists as assists,
headshot_kills as headshot_kills,
top10s as top10s,
wins as wins,
losses as losses,
lastupdate as ___lastupdateid_st
from hd_stats.calc_pubg_player_season_stats s
) as y
on x.player_id = y.player_id
and nvl(x.season_id,0) = nvl(y.season_id,0)
and nvl(x.region_id,0) = nvl(y.region_id,0)
and nvl(x.game_mode,'') = nvl(y.match_mode,'')
order by ___lastupdateid_rn
Transformed Query that fails:
Select top 100 *
,func_sha1(
''
)
as synch_checksum
from
(
select x.player_id as playerid,
p.player_nickname,
r.region_code,
s.title as season_name,
x.rating as ranking_score,
x.rank_no as rank_no,
x.rank_no_change as rank_no_change,
x.game_mode,
to_char(ga.total_score, 'FM9D00') as gyo_perf_total_score,
to_char(agressive_score*100/aggresive_weight, 'FM9D000') as gyo_perf_aggressive_score,
to_char(defensive_score*100/defensive_weight, 'FM9D000') as gyo_perf_defensive_score,
to_char(survival_score*100/survival_weight, 'FM9D000') as gyo_perf_survival_score,
ga.match_place_avg,
rounds_played,
kills,
(kills*1.0)/rounds_played as avg_kills_per_round,
assists,
(assists*1.0)/rounds_played as avg_assists_per_round,
headshot_kills,
top10s,
(top10s*1.0)/rounds_played as top10s_ratio,
wins,
(wins*1.0)/rounds_played as win_ratio,
case when top10s = 0 then 0 else (wins*1.0)/(top10s*1.0) end as win_to_top10_ratio,
losses,
ga.match_group as ___matchgroup,
ga.last_update_id as ___lastupdateid_ga,
___lastupdateid_st,
x.last_update_id as ___lastupdateid_rn
from hd_stats.calc_player_ranking x
inner join hd_core.core_player p on x.player_id = p.player_id
and p.player_game_id =
(select g.game_id
from hd_core.core_game g
where lower(g.game_short_title) = 'xxxx')
left join hd_core.core_region r on x.region_id = r.region_id
left join hd_core.core_season s on x.season_id = s.season_id
--gyo perf
left join hd_stats.calc_pubg_gyo_average ga on
ga.player_id = x.player_id
and nvl(x.season_id,0) = nvl(ga.season_id,0)
and nvl(x.region_id,0) = nvl(ga.region_id,0)
and nvl(x.game_mode,'') = nvl(ga.match_mode,'')
left join (select player_id, region_id,
season_id, match_mode,
rounds_played as rounds_played,
kills as kills,
assists as assists,
headshot_kills as headshot_kills,
top10s as top10s,
wins as wins,
losses as losses,
lastupdate as ___lastupdateid_st
from hd_stats.calc_pubg_player_season_stats s
) as y
on x.player_id = y.player_id
and nvl(x.season_id,0) = nvl(y.season_id,0)
and nvl(x.region_id,0) = nvl(y.region_id,0)
and nvl(x.game_mode,'') = nvl(y.match_mode,'')
order by ___lastupdateid_rn
)
Transformed Query that works:
Select top 100 *
,func_sha1(
''
)
as synch_checksum
from
(
select x.player_id as playerid,
p.player_nickname,
r.region_code,
s.title as season_name,
x.rating as ranking_score,
x.rank_no as rank_no,
x.rank_no_change as rank_no_change,
x.game_mode,
to_char(ga.total_score, 'FM9D00') as gyo_perf_total_score,
to_char(agressive_score*100/aggresive_weight, 'FM9D000') as gyo_perf_aggressive_score,
to_char(defensive_score*100/defensive_weight, 'FM9D000') as gyo_perf_defensive_score,
to_char(survival_score*100/survival_weight, 'FM9D000') as gyo_perf_survival_score,
ga.match_place_avg,
rounds_played,
kills,
(kills*1.0)/rounds_played as avg_kills_per_round,
assists,
(assists*1.0)/rounds_played as avg_assists_per_round,
headshot_kills,
top10s,
(top10s*1.0)/rounds_played as top10s_ratio,
wins,
(wins*1.0)/rounds_played as win_ratio,
case when top10s = 0 then 0 else (wins*1.0)/(top10s*1.0) end as win_to_top10_ratio,
losses,
ga.match_group as ___matchgroup,
ga.last_update_id as ___lastupdateid_ga,
___lastupdateid_st,
x.last_update_id as ___lastupdateid_rn
from hd_stats.calc_player_ranking x
inner join hd_core.core_player p on x.player_id = p.player_id
and p.player_game_id = 2
--(select g.game_id
-- from hd_core.core_game g
-- where lower(g.game_short_title) = 'xxxx')
left join hd_core.core_region r on x.region_id = r.region_id
left join hd_core.core_season s on x.season_id = s.season_id
--gyo perf
left join hd_stats.calc_pubg_gyo_average ga on
ga.player_id = x.player_id
and nvl(x.season_id,0) = nvl(ga.season_id,0)
and nvl(x.region_id,0) = nvl(ga.region_id,0)
and nvl(x.game_mode,'') = nvl(ga.match_mode,'')
left join (select player_id, region_id,
season_id, match_mode,
rounds_played as rounds_played,
kills as kills,
assists as assists,
headshot_kills as headshot_kills,
top10s as top10s,
wins as wins,
losses as losses,
lastupdate as ___lastupdateid_st
from hd_stats.calc_pubg_player_season_stats s
) as y
on x.player_id = y.player_id
and nvl(x.season_id,0) = nvl(y.season_id,0)
and nvl(x.region_id,0) = nvl(y.region_id,0)
and nvl(x.game_mode,'') = nvl(y.match_mode,'')
order by ___lastupdateid_rn
)
Expected to get all the records and columns on the base query and another column for the checksum.
Instead I get an error :
Amazon Invalid operation: unrecognized node type: 407; [SQL State=XX000, DB Errorcode=500310]
Leave out the ORDER BY clause. That alleviated the error for me.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With