I am trying to read about 1M documents from mongodb to csv file using pymongo. My code looks like:
import csv
from pymongo import MongoClient
from datetime import datetime
from bson import json_util
from tempfile import NamedTemporaryFile
client = MongoClient('mongodb://login:pass@server:port')
db = client.some_mongo_database
collection = db.some_mongo_collection
fromDate = datetime.strptime("2018-05-15 21:00", '%Y-%m-%d %H:%M')
tillDate = datetime.strptime("2018-05-16 21:00", '%Y-%m-%d %H:%M')
query = {
"$or": [
{"LastUpdated": {"$gte": fromDate
, "$lt": tillDate}
},
{"$and": [
{"Created": {"$gte": fromDate
, "$lt": tillDate}
},
{"LastUpdated": None}
]
}
]
}
cursor = collection.find(query, no_cursor_timeout=True)
after that if I'l do:
for row in cursor:
print(row)
cursor.close()
everything works fine, and I could get all the documents. But if I do something like that:
with NamedTemporaryFile("w", delete=False) as temp:
csv_writer = csv.writer(temp, delimiter='\t', quotechar='\b', quoting=csv.QUOTE_MINIMAL)
for row in cursor:
csv_row = [ [[row['_id']], str(json.dumps(row,default=json_util.default))] ]
csv_writer.writerows(csv_row)
cursor.close()
After about 2 minutes and 200k documents I'm receive:
Traceback (most recent call last):
File "mongo_data_loader.py", line 25, in <module>
for row in cursor:
File "/Library/Python/2.7/site-packages/pymongo/cursor.py", line 1169, in next
if len(self.__data) or self._refresh():
File "/Library/Python/2.7/site-packages/pymongo/cursor.py", line 1106, in _refresh
self.__send_message(g)
File "/Library/Python/2.7/site-packages/pymongo/cursor.py", line 975, in __send_message
helpers._check_command_response(first)
File "/Library/Python/2.7/site-packages/pymongo/helpers.py", line 142, in _check_command_response
raise CursorNotFound(errmsg, code, response)
pymongo.errors.CursorNotFound: cursor id 184972541202 not found
What am I doing wrong?
Python 2.7.10
pymongo 3.6.1
mongo db.version() 3.6.5
As a temporary workaround I've made:
processed = 0
while True:
cursor = collection.find(query, no_cursor_timeout=True).skip(processed)
try:
for row in cursor:
csv_row = [ [[row['_id']], str(json.dumps(row,default=json_util.default))] ]
csv_writer.writerows(csv_row)
processed += 1
cursor.close()
break
except CursorNotFound:
print("Lost cursor. Retry with skip")
But the question about behavior described above is still actual
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With