Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

HIVE_METASTORE_ERROR expected 'STRING' but 'STRING' is found

I've been unable to get any query to work against my AWS Glue Partitioned table. The error I'm getting is

HIVE_METASTORE_ERROR: com.facebook.presto.spi.PrestoException: Error: type expected at the position 0 of 'STRING' but 'STRING' is found. (Service: null; Status Code: 0; Error Code: null; Request ID: null)

I've found one other thread that brings up the fact that the database name and table cannot have characters other than alphanumeric and underscores. So, I made sure the database name, table name and all column names adhere to this restriction. The only object that does not adhere to this restriction is my s3 bucket name which would be very difficult to change.

Here are the table definitions and parquet-tools dumps of the data.

AWS Glue Table Definition

{
    "Table": {
        "UpdateTime": 1545845064.0, 
        "PartitionKeys": [
            {
                "Comment": "call_time year", 
                "Type": "INT", 
                "Name": "date_year"
            }, 
            {
                "Comment": "call_time month", 
                "Type": "INT", 
                "Name": "date_month"
            }, 
            {
                "Comment": "call_time day", 
                "Type": "INT", 
                "Name": "date_day"
            }
        ], 
        "StorageDescriptor": {
            "OutputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", 
            "SortColumns": [], 
            "InputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", 
            "SerdeInfo": {
                "SerializationLibrary": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", 
                "Name": "ser_de_info_system_admin_created", 
                "Parameters": {
                    "serialization.format": "1"
                }
            }, 
            "BucketColumns": [], 
            "Parameters": {}, 
            "Location": "s3://ph-data-lake-cududfs2z3xveg5t/curated/system/admin_created/", 
            "NumberOfBuckets": 0, 
            "StoredAsSubDirectories": false, 
            "Columns": [
                {
                    "Comment": "Unique user ID", 
                    "Type": "STRING", 
                    "Name": "user_id"
                }, 
                {
                    "Comment": "Unique group ID", 
                    "Type": "STRING", 
                    "Name": "group_id"
                }, 
                {
                    "Comment": "Date and time the message was published", 
                    "Type": "TIMESTAMP", 
                    "Name": "call_time"
                }, 
                {
                    "Comment": "call_time year", 
                    "Type": "INT", 
                    "Name": "date_year"
                }, 
                {
                    "Comment": "call_time month", 
                    "Type": "INT", 
                    "Name": "date_month"
                }, 
                {
                    "Comment": "call_time day", 
                    "Type": "INT", 
                    "Name": "date_day"
                }, 
                {
                    "Comment": "Given name for user", 
                    "Type": "STRING", 
                    "Name": "given_name"
                }, 
                {
                    "Comment": "IANA time zone for user", 
                    "Type": "STRING", 
                    "Name": "time_zone"
                }, 
                {
                    "Comment": "Name that links to geneaology", 
                    "Type": "STRING", 
                    "Name": "family_name"
                }, 
                {
                    "Comment": "Email address for user", 
                    "Type": "STRING", 
                    "Name": "email"
                }, 
                {
                    "Comment": "RFC BCP 47 code set in this user's profile language and region", 
                    "Type": "STRING", 
                    "Name": "language"
                }, 
                {
                    "Comment": "Phone number including ITU-T ITU-T E.164 country codes", 
                    "Type": "STRING", 
                    "Name": "phone"
                }, 
                {
                    "Comment": "Date user was created", 
                    "Type": "TIMESTAMP", 
                    "Name": "date_created"
                }, 
                {
                    "Comment": "User role", 
                    "Type": "STRING", 
                    "Name": "role"
                }, 
                {
                    "Comment": "Provider dashboard preferences", 
                    "Type": "STRUCT<portal_welcome_done:BOOLEAN,weekend_digests:BOOLEAN,patients_hidden:BOOLEAN,last_announcement:STRING>", 
                    "Name": "preferences"
                }, 
                {
                    "Comment": "Provider notification settings", 
                    "Type": "STRUCT<digest_email:BOOLEAN>", 
                    "Name": "notifications"
                }
            ], 
            "Compressed": true
        }, 
        "Parameters": {
            "classification": "parquet", 
            "parquet.compress": "SNAPPY"
        }, 
        "Description": "System wide admin_created messages", 
        "Name": "system_admin_created", 
        "TableType": "EXTERNAL_TABLE", 
        "Retention": 0
    }
}

AWS Athena schema

CREATE EXTERNAL TABLE `system_admin_created`(
  `user_id` STRING COMMENT 'Unique user ID', 
  `group_id` STRING COMMENT 'Unique group ID', 
  `call_time` TIMESTAMP COMMENT 'Date and time the message was published', 
  `date_year` INT COMMENT 'call_time year', 
  `date_month` INT COMMENT 'call_time month', 
  `date_day` INT COMMENT 'call_time day', 
  `given_name` STRING COMMENT 'Given name for user', 
  `time_zone` STRING COMMENT 'IANA time zone for user', 
  `family_name` STRING COMMENT 'Name that links to geneaology', 
  `email` STRING COMMENT 'Email address for user', 
  `language` STRING COMMENT 'RFC BCP 47 code set in this user\'s profile language and region', 
  `phone` STRING COMMENT 'Phone number including ITU-T ITU-T E.164 country codes', 
  `date_created` TIMESTAMP COMMENT 'Date user was created', 
  `role` STRING COMMENT 'User role', 
  `preferences` STRUCT<portal_welcome_done:BOOLEAN,weekend_digests:BOOLEAN,patients_hidden:BOOLEAN,last_announcement:STRING> COMMENT 'Provider dashboard preferences', 
  `notifications` STRUCT<digest_email:BOOLEAN> COMMENT 'Provider notification settings')
PARTITIONED BY ( 
  `date_year` INT COMMENT 'call_time year', 
  `date_month` INT COMMENT 'call_time month', 
  `date_day` INT COMMENT 'call_time day')
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
  's3://ph-data-lake-cududfs2z3xveg5t/curated/system/admin_created/'
TBLPROPERTIES (
  'classification'='parquet', 
  'parquet.compress'='SNAPPY')

parquet-tools cat

role = admin
date_created = 2018-01-11T14:40:23.142Z
preferences:
.patients_hidden = false
.weekend_digests = true
.portal_welcome_done = true
email = [email protected]
notifications:
.digest_email = true
group_id = 5a5399df23a804001aa25227
given_name = foo
call_time = 2018-01-11T14:40:23.000Z
time_zone = US/Pacific
family_name = bar
language = en-US
user_id = 5a5777572060a700170240c3

parquet-tools schema

message spark_schema {
  optional binary role (UTF8);
  optional binary date_created (UTF8);
  optional group preferences {
    optional boolean patients_hidden;
    optional boolean weekend_digests;
    optional boolean portal_welcome_done;
    optional binary last_announcement (UTF8);
  }
  optional binary email (UTF8);
  optional group notifications {
    optional boolean digest_email;
  }
  optional binary group_id (UTF8);
  optional binary given_name (UTF8);
  optional binary call_time (UTF8);
  optional binary time_zone (UTF8);
  optional binary family_name (UTF8);
  optional binary language (UTF8);
  optional binary user_id (UTF8);
  optional binary phone (UTF8);
}
like image 587
Zambonilli Avatar asked Dec 26 '18 19:12

Zambonilli


1 Answers

I ran into a similar PrestoException and the cause was using uppercase letters for the column type. Once I changed 'VARCHAR(10)' to 'varchar(10)' then it worked.

like image 181
colini Avatar answered Oct 31 '22 15:10

colini