import pandas as pd
from google.cloud import bigquery
import google.auth
# from google.cloud import bigquery
# Create credentials with Drive & BigQuery API scopes
# Both APIs must be enabled for your project before running this code
credentials, project = google.auth.default(scopes=[
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/bigquery',
])
client = bigquery.Client(credentials=credentials, project=project)
# Configure the external data source and query job
external_config = bigquery.ExternalConfig('GOOGLE_SHEETS')
# Use a shareable link or grant viewing access to the email address you
# used to authenticate with BigQuery (this example Sheet is public)
sheet_url = (
'https://docs.google.com/spreadsheets'
'/d/1uknEkew2C3nh1JQgrNKjj3Lc45hvYI2EjVCcFRligl4/edit?usp=sharing')
external_config.source_uris = [sheet_url]
external_config.schema = [
bigquery.SchemaField('name', 'STRING'),
bigquery.SchemaField('post_abbr', 'STRING')
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table_id = 'BambooHRActiveRoster'
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}
# Get Top 10
sql = 'SELECT * FROM workforce.BambooHRActiveRoster LIMIT 10'
query_job = client.query(sql, job_config=job_config) # API request
top10 = list(query_job) # Waits for query to finish
print('There are {} states with names starting with W.'.format(
len(top10)))
The error I get is:
BadRequest: 400 Error while reading table: workforce.BambooHRActiveRoster, error message: Failed to read the spreadsheet. Errors: No OAuth token with Google Drive scope was found.
I can pull data in from a BigQuery table created from CSV upload, but when I have a BigQuery table created from a linked Google Sheet, I continue to receive this error.
I have tried to replicate the sample in Google's documentation (Creating and querying a temporary table):
https://cloud.google.com/bigquery/external-data-drive
You are authenticating as yourself, which is generally fine for BQ if you have the correct permissions. Using tables linked to Google Sheets often requires a service account. Create one (or have your BI/IT team create one), and then you will have to share the underlying Google Sheet with the service account. Finally, you will need to modify your python script to use the service account credentials and not your own.
The quick way around this is to use the BQ interface, select *
from the Sheets-linked table, and save the results to a new table, and query that new table directly in your python script. This works well if this is a one-time upload/analysis. If the data in the sheets will be changing consistently and you will need to routinely query the data, this is not a long-term solution.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With