I have a csv file of about 5000 rows in python i want to split it into five files.
I wrote a code for it but it is not working
import codecs import csv NO_OF_LINES_PER_FILE = 1000 def again(count_file_header,count): f3 = open('write_'+count_file_header+'.csv', 'at') with open('import_1458922827.csv', 'rb') as csvfile: candidate_info_reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_ALL) co = 0 for row in candidate_info_reader: co = co + 1 count = count + 1 if count <= count: pass elif count >= NO_OF_LINES_PER_FILE: count_file_header = count + NO_OF_LINES_PER_FILE again(count_file_header,count) else: writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL) writer.writerow(row) def read_write(): f3 = open('write_'+NO_OF_LINES_PER_FILE+'.csv', 'at') with open('import_1458922827.csv', 'rb') as csvfile: candidate_info_reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_ALL) count = 0 for row in candidate_info_reader: count = count + 1 if count >= NO_OF_LINES_PER_FILE: count_file_header = count + NO_OF_LINES_PER_FILE again(count_file_header,count) else: writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL) writer.writerow(row) read_write()
The above code creates many fileswith empty content.
How to split one files into five csv files?
CSV SplitterOpen the CSV splitter and enter the CSV file you want to split. Enter the number of rows and finally click on Execute. 2. After the split is done, the files will be saved in the location where your original CSV file is present.
Use a Batch File You can use a batch file to process the CSV into smaller chunks, customizing the file to deliver different chunks. REM Edit this value to change the name of the file that needs splitting. Include the extension. REM Edit this value to change the number of lines per file.
In Python
Use readlines()
and writelines()
to do that, here is an example:
>>> csvfile = open('import_1458922827.csv', 'r').readlines() >>> filename = 1 >>> for i in range(len(csvfile)): ... if i % 1000 == 0: ... open(str(filename) + '.csv', 'w+').writelines(csvfile[i:i+1000]) ... filename += 1
the output file names will be numbered 1.csv
, 2.csv
, ... etc.
From terminal
FYI, you can do this from the command line using split
as follows:
$ split -l 1000 import_1458922827.csv
I suggest you not inventing a wheel. There is existing solution. Source here
import os def split(filehandler, delimiter=',', row_limit=1000, output_name_template='output_%s.csv', output_path='.', keep_headers=True): import csv reader = csv.reader(filehandler, delimiter=delimiter) current_piece = 1 current_out_path = os.path.join( output_path, output_name_template % current_piece ) current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter) current_limit = row_limit if keep_headers: headers = reader.next() current_out_writer.writerow(headers) for i, row in enumerate(reader): if i + 1 > current_limit: current_piece += 1 current_limit = row_limit * current_piece current_out_path = os.path.join( output_path, output_name_template % current_piece ) current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter) if keep_headers: current_out_writer.writerow(headers) current_out_writer.writerow(row)
Use it like:
split(open('/your/pat/input.csv', 'r'));
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With