I am using this example to upload a csv file into a sqlite database:
this is my code:
from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
def Load_Data(file_name):
data = genfromtxt(file_name, delimiter=',')# skiprows=1, converters={0: lambda s: str(s)})
return data.tolist()
Base = declarative_base()
class cdb1(Base):
#Tell SQLAlchemy what the table name is and if there's any table-specific arguments it should know about
__tablename__ = 'cdb1'
__table_args__ = {'sqlite_autoincrement': True}
#tell SQLAlchemy the name of column and its attributes:
id = Column(Integer, primary_key=True, nullable=False)
name = Column(VARCHAR(40))
shack = Column(VARCHAR)
db = Column(Integer)
payments = Column(Integer)
status = Column(VARCHAR)
if __name__ == "__main__":
t = time()
print 'creating database'
#Create the database
engine = create_engine('sqlite:///cdb.db')
Base.metadata.create_all(engine)
#Create the session
session = sessionmaker()
session.configure(bind=engine)
s = session()
try:
file_name = 'client_db.csv'
data = Load_Data(file_name)
for i in data:
record = cdb1(**{
'name' : i[0],
'shack' : i[1],
'db' : i[2],
'payments' : i[3],
'status' : i[4]
})
s.add(record) #Add all the records
s.commit() #Attempt to commit all the records
except:
s.rollback() #Rollback the changes on error
print 'error in reading'
finally:
s.close() #Close the connection
print "Time elapsed: " + str(time() - t) + " s." #0.091s
and this is the first few rows of the csv file:
Name,Shack,DB,Payments,Status
Loyiso Dwala,I156,13542,37,LightsOnly ON
Attwell Fayo,I157,13077,32,LightsON
David Mbhele,G25,13155,33,LightsON
The DB is created ok, but only some of the data is captured into the attributes: the 'payments' and 'db' column are populated correctly, but everything else comes out as NULL.
UPDATED CORRECT CODE (using pandas dataframe):
from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import csv
import pandas as pd
#def Load_Data(file_name):
#data = csv.reader(file_name, delimiter=',')# skiprows=1, converters={0: lambda s: str(s)})
#return data.tolist()
Base = declarative_base()
class cdb1(Base):
#Tell SQLAlchemy what the table name is and if there's any table-specific arguments it should know about
__tablename__ = 'cdb1'
__table_args__ = {'sqlite_autoincrement': True}
#tell SQLAlchemy the name of column and its attributes:
id = Column(Integer, primary_key=True, nullable=False)
Name = Column(VARCHAR(40))
Shack = Column(VARCHAR)
DB = Column(Integer)
Payments = Column(Integer)
Status = Column(VARCHAR)
engine = create_engine('sqlite:///cdb.db')
Base.metadata.create_all(engine)
file_name = 'client_db.csv'
df = pd.read_csv(file_name)
df.to_sql(con=engine, index_label='id', name=cdb1.__tablename__, if_exists='replace')
One of the key aspects of any data science workflow is the sourcing, cleaning, and storing of raw data in a form that can be used upstream. This process is commonly referred to as “Extract-Transform-Load,” or ETL for short.
You can read a CSV file into a DataFrame using the read_csv() function (this function should be familiar to you, but you can run help(pd. read_csv) in the console to refresh your memory!). Then, you can call the . to_sql() method on the DataFrame to load it into a SQL table in a database.
Are you familiar with Pandas Dataframe?
Really simple to use (and debug)
pandas.read_csv(file_name)
In [5]: pandas.read_csv('/tmp/csvt.csv')
Out[5]:
Name Shack DB Payments Status
0 Loyiso Dwala I156 13542 37 LightsOnly ON
1 Attwell Fayo I157 13077 32 LightsON
2 David Mbhele G25 13155 33 LightsON
For inserting the DataFrames data into a table, you can simply use pandas.DataFrame.to_sql
So your main code will end up looking something like this:
engine = create_engine('sqlite:///cdb.db')
Base.metadata.create_all(engine)
file_name = 'client_db.csv'
df = pandas.read_csv(file_name)
df.to_sql(con=engine, index_label='id', name=cdb1.__tablename__, if_exists='replace')
You should read further in the documentation link I added, and set the function Parameters as suits your purpose (specially look at - if_exists, index, index_label, dtype)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With