I have a small pyhton code that build a dataframe with one (or more) nans and then write it to a postgres database with psycopg2 module using copy_from function. Here it is:
table_name = "test"
df = pd.DataFrame([[1.0, 2.0], [3.0, np.nan]], columns=["VALUE0", "VALUE1"], index=pd.date_range("2000-01-01", "2000-01-02"))
database = "xxxx"
user = "xxxxxxx"
password = "xxxxxx"
host = "127.0.0.1"
port = "xxxxx"
def nan_to_null(f,
_NULL=psycopg2.extensions.AsIs('NULL'),
_NaN=np.NaN,
_Float=psycopg2.extensions.Float):
if f != f:
return _NULL
else:
return _Float(f)
psycopg2.extensions.register_adapter(float, nan_to_null)
psycopg2.extensions.register_adapter(np.float, nan_to_null)
psycopg2.extensions.register_adapter(np.float64, nan_to_null)
with psycopg2.connect(database=database,
user=user,
password=password,
host=host,
port=port) as conn:
try:
with conn.cursor() as cur:
cmd = "CREATE TABLE {} (TIMESTAMP timestamp PRIMARY KEY NOT NULL, VALUE0 FLOAT, VALUE1 FLOAT)"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
buffer = StringIO()
df.to_csv(buffer, index_label='TIMESTAMP', header=False)
buffer.seek(0)
cur.copy_from(buffer, table_name, sep=",")
conn.commit()
except Exception as e:
conn.rollback()
logging.error(traceback.format_exc())
raise e
The problème is that psycopg2 fail to transform nan into posgres NULL, although I have used this trick: How do I convert numpy NaN objects to SQL nulls? (the nan_to_null function). I cannot make it work, it throws the following exception:
psycopg2.errors.InvalidTextRepresentation: invalid input syntax for type double precision: ""
CONTEXT: COPY test, line 2, column value1: ""
I am using python 3.8 on windows 10 with anaconda 3, psycopg2 v2.8.5 and postgres v12.3. Thanks!
I put here the same code with the solution updated of Adrian Klaver. The line that changed is:
df.to_csv(buffer, index_label='TIMESTAMP', header=False, na_rep='NaN')
We've added na_rep='NaN' in to_csv function. No need to replace nans with another line of code. replacing with 'NULL' does not work.
import psycopg2, logging, numpy as np, pandas as pd
from psycopg2 import sql
import traceback
from io import StringIO
if __name__ == '__main__':
table_name = "test"
df = pd.DataFrame([[1.0, 2.0], [3.0, np.nan]], columns=["VALUE0", "VALUE1"], index=pd.date_range("2000-01-01", "2000-01-02"))
database = "xxxxxx"
user = "xxxxx"
password = "xxxxxx"
host = "127.0.0.1"
port = "xxxxxx"
with psycopg2.connect(database=database,
user=user,
password=password,
host=host,
port=port) as conn:
try:
with conn.cursor() as cur:
#Creating a new table test
cmd = "CREATE TABLE {} (TIMESTAMP timestamp PRIMARY KEY NOT NULL, VALUE0 FLOAT, VALUE1 FLOAT);"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
#Writting content
buffer = StringIO()
df.to_csv(buffer, index_label='TIMESTAMP', header=False, na_rep='NaN')
buffer.seek(0)
cur.copy_from(buffer, table_name, sep=",")
#Reading the table content
cmd = "SELECT * FROM {};"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
test_data = pd.DataFrame(cur.fetchall())
print(test_data)
print(type(test_data.loc[1, 2]))
#Deleting the test table
cmd = "DROP TABLE {};"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
conn.commit()
except Exception as e:
conn.rollback()
logging.error(traceback.format_exc())
raise e
The prints shows that nan is well interpreted and stored in the DB.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With