I thought I'd be able to get what I needed by asking a simpler question referencing a simpler data example here, but I still need some help.
I'm pretty new to querying json style data within BigQuery, and am having trouble with the analytics (events) data that Firebase dumps into BigQuery for me. The format of 1 row of data is below (trimmed out some fluff).
{
"user_dim": {
"user_id": "some_identifier_here",
"user_properties": [
{
"key": "special_key1",
"val": {
"val": {
"str_val": "894",
"int_val": null
}
}
},
{
"key": "special_key2",
"val": {
"val": {
"str_val": "1",
"int_val": null
}
}
},
{
"key": "special_key3",
"val": {
"val": {
"str_val": "23",
"int_val": null
}
}
}
],
"device_info": {
"device_category": "mobile",
"mobile_brand_name": "Samsung",
"mobile_model_name": "model_phone"
},
"dt_a": "1470625311138000",
"dt_b": "1470620345566000"
},
"event_dim": [
{
"name": "user_engagement",
"params": [
{
"key": "firebase_event_origin",
"value": {
"string_value": "auto",
"int_value": null,
"float_value": null,
"double_value": null
}
},
{
"key": "engagement_time_msec",
"value": {
"string_value": null,
"int_value": "30006",
"float_value": null,
"double_value": null
}
}
],
"timestamp_micros": "1470675614434000",
"previous_timestamp_micros": "1470675551092000"
},
{
"name": "new_game",
"params": [
{
"key": "total_time",
"value": {
"string_value": "496048",
"int_value": null,
"float_value": null,
"double_value": null
}
},
{
"key": "armor",
"value": {
"string_value": "2",
"int_value": null,
"float_value": null,
"double_value": null
}
},
{
"key": "reason",
"value": {
"string_value": "power_up",
"int_value": null,
"float_value": null,
"double_value": null
}
}
],
"timestamp_micros": "1470675825988001",
"previous_timestamp_micros": "1470675282500001"
},
{
"name": "user_engagement",
"params": [
{
"key": "firebase_event_origin",
"value": {
"string_value": "auto",
"int_value": null,
"float_value": null,
"double_value": null
}
},
{
"key": "engagement_time_msec",
"value": {
"string_value": null,
"int_value": "318030",
"float_value": null,
"double_value": null
}
}
],
"timestamp_micros": "1470675972778002",
"previous_timestamp_micros": "1470675614434002"
},
{
"name": "won_game",
"params": [
{
"key": "total_time",
"value": {
"string_value": "497857",
"int_value": null,
"float_value": null,
"double_value": null
}
},
{
"key": "level",
"value": {
"string_value": null,
"int_value": "207",
"float_value": null,
"double_value": null
}
},
{
"key": "sword",
"value": {
"string_value": "iron",
"int_value": null,
"float_value": null,
"double_value": null
}
}
],
"timestamp_micros": "1470677171374007",
"previous_timestamp_micros": "1470671343784007"
}
]
}
Based on the answers to my original question I've been able to work just fine with the first part of the object user_dim
. However, whenever I try similar approaches to the event_dim
field (unnesting it) the queries fail with the message "Error: Scalar subquery produced more than one element." I have a suspicion this is due to the fact that event_dim
is an array itself, and contains structs that have arrays in them as well.
If it helps here is the basic query that is giving me the error, although it should be noted that I am quite out of my element working with this type of data in BQ and could be going completely off course:
SELECT
(SELECT name FROM UNNEST(event_dim) WHERE name = 'user_engagement') AS event_name
FROM
my_table;
The end result I am going for is a query that can turn a table that contains many of these types of objects into a table that outputs 1 row per event in each objects event_dim
array. i.e. for the example object above, I'd want it to output 4 rows where the first set of columns are identical and are just the metadata from user_dim
. Then I'd like columns that I can explicitly define based on what I know will exist for each possible event, like event_name, firebase_event_origin, engagement_time_msec, total_time, armor, reason, level, sword
and then fill with the value from that event parameter, or NULL if it doesn't exist.
Based on Mikhail's answer, but over an actual Firebase dataset:
SELECT
user_dim.app_info.app_instance_id,
timestamp_micros,
(SELECT value.int_value FROM UNNEST(dim.params) WHERE key = "level") AS level,
(SELECT value.int_value FROM UNNEST(dim.params) WHERE key = "coins") AS coins,
(SELECT value.int_value FROM UNNEST(dim.params) WHERE key = "powerups") AS powerups
FROM `dataset.table`, UNNEST(event_dim) AS dim
WHERE timestamp_micros=1464718937589000
(saving it here for future reference, and easier copy-pastability)
Hope, below can give you next push
WITH YourTable AS (
SELECT ARRAY[
STRUCT(
"user_engagement" AS name,
ARRAY<STRUCT<key STRING, val STRUCT<str_val STRING, int_val INT64>>>[
STRUCT("firebase_event_origin", STRUCT("auto", NULL)),
STRUCT("engagement_time_msec", STRUCT("30006", NULL))] AS params,
1470675614434000 AS TIMESTAMP_MICROS,
1470675551092000 AS previous_timestamp_micros
),
STRUCT(
"new_game" AS name,
ARRAY<STRUCT<key STRING, val STRUCT<str_val STRING, int_val INT64>>>[
STRUCT("total_time", STRUCT("496048", NULL)),
STRUCT("armor", STRUCT("2", NULL)),
STRUCT("reason", STRUCT("power_up", NULL))] AS params,
1470675825988001 AS TIMESTAMP_MICROS,
1470675282500001 AS previous_timestamp_micros
),
STRUCT(
"user_engagement" AS name,
ARRAY<STRUCT<key STRING, val STRUCT<str_val STRING, int_val INT64>>>[
STRUCT("firebase_event_origin", STRUCT("auto", NULL)),
STRUCT("engagement_time_msec", STRUCT("318030", NULL))] AS params,
1470675972778002 AS TIMESTAMP_MICROS,
1470675614434002 AS previous_timestamp_micros
),
STRUCT(
"won_game" AS name,
ARRAY<STRUCT<key STRING, val STRUCT<str_val STRING, int_val INT64>>>[
STRUCT("total_time", STRUCT("497857", NULL)),
STRUCT("level", STRUCT("207", NULL)),
STRUCT("sword", STRUCT("iron", NULL))] AS params,
1470677171374007 AS TIMESTAMP_MICROS,
1470671343784007 AS previous_timestamp_micros
)
] AS event_dim
)
SELECT
name,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "firebase_event_origin") AS firebase_event_origin,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "engagement_time_msec") AS engagement_time_msec,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "total_time") AS total_time,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "armor") AS armor,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "reason") AS reason,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "level") AS level,
(SELECT val.str_val FROM UNNEST(dim.params) WHERE key = "sword") AS sword
FROM YourTable, UNNEST(event_dim) AS dim
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With