Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Do I need to close connection in mongodb?

I am using python and django to make web based application. I am using mongodb as backend database. I have a base class named MongoConnection that uses pymongo layers to communicate with the mongodb. I am very fine with this layer, as it seperates database from the business layer for me. My custom MongoConnenction class is as follows:-

#!/usr/bin/env python
# encoding: utf-8
# Create your views here.
from pymongo import MongoClient
import pymongo
from pymongo import Connection
import json
from bson import BSON
from bson import json_util


class MongoConnection():
    def __init__ (self, host="localhost",port=27017, db_name='indexer', conn_type="local", username='', password=''):
        self.host = host
        self.port = port
        self.conn = Connection(self.host, self.port)
        self.db = self.conn[db_name]
        self.db.authenticate(username, password)

    def ensure_index(self, table_name, index=None):
        self.db[table_name].ensure_index([(index,pymongo.GEOSPHERE)])

    def create_table(self, table_name, index=None):
        self.db[table_name].create_index( [(index, pymongo.DESCENDING)] )


    def get_one(self,table_name,conditions={}):
        single_doc = self.db[table_name].find_one(conditions)
        json_doc = json.dumps(single_doc,default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(json_doc)

    def get_all(self,table_name,conditions={}, sort_index ='_id', limit=100):
        all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING).limit(limit)
        json_doc = json.dumps(list(all_doc),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(str(json_doc))


    def insert_one(self, table_name, value):
        self.db[table_name].insert(value)

    def update_push(self, table_name, where, what):
        #print where, what
        self.db[table_name].update(where,{"$push":what},upsert=False)

    def update(self, table_name, where, what):
        #print where, what
        self.db[table_name].update(where,{"$set":what},upsert=False)

    def update_multi(self, table_name, where, what):
        self.db[table_name].update(where,{"$set":what},upsert=False, multi=True)

    def update_upsert(self, table_name, where, what):
        self.db[table_name].update(where,{"$set":what},upsert=True)


    def map_reduce(self, table_name, mapper, reducer, query, result_table_name):
        myresult = self.db[table_name].map_reduce(mapper, reducer, result_table_name, query)
        return myresult

    def map_reduce_search(self, table_name, mapper, reducer,query, sort_by, sort = -1, limit = 20):
        if sort_by == "distance":
            sort_direction = pymongo.ASCENDING
        else:
            sort_direction = pymongo.DESCENDING
        myresult = self.db[table_name].map_reduce(mapper,reducer,'results', query)
        results = self.db['results'].find().sort("value."+sort_by, sort_direction).limit(limit)
        json_doc = json.dumps(list(results),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(str(json_doc))

    def aggregrate_all(self,table_name,conditions={}):
        all_doc = self.db[table_name].aggregate(conditions)['result']
        json_doc = json.dumps(list(all_doc),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(str(json_doc))

    def group(self,table_name,key, condition, initial, reducer):
        all_doc = self.db[table_name].group(key=key, condition=condition, initial=initial, reduce=reducer)
        json_doc = json.dumps(list(all_doc),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(str(json_doc))

    def get_distinct(self,table_name, distinct_val, query):
        all_doc = self.db[table_name].find(query).distinct(distinct_val)
        count = len(all_doc)        
        parameter = {}
        parameter['count'] = count
        parameter['results'] = all_doc
        return parameter

    def get_all_vals(self,table_name,conditions={}, sort_index ='_id'):
        all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING)
        json_doc = json.dumps(list(all_doc),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(json_doc)

    def get_paginated_values(self, table_name, conditions ={}, sort_index ='_id', pageNumber = 1):
        all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING).skip((pageNumber-1)*15).limit(15)
        json_doc = json.dumps(list(all_doc),default=json_util.default)
        json_doc = json_doc.replace("$oid", "id")
        json_doc = json_doc.replace("_id", "uid")
        return json.loads(json_doc)        

    def get_count(self, table_name,conditions={}, sort_index='_id'):
        count = self.db[table_name].find(conditions).count()
        return count

Now, the problem is my moongodb uses huge amount of processing power and RAM. Normally it consumes around 80-90 % of CPU.

I suspect I am not closing the mongoconnection everytime I create instance of this class. Do I need to close connection manually in mongodb??

like image 987
bor Avatar asked May 28 '14 11:05

bor


2 Answers

There's no need to close a Connection instance, it will clean up after itself when Python garbage collects it.

You should use MongoClient instead of Connection; Connection is deprecated. To take advantage of connection pooling, you could create one MongoClient that lasts for the entire life of your process.

PyMongo represents documents as dicts. Why are you encoding each dict it gives you as JSON, then decoding it again? It may be more efficient to modify the objects directly.

That said, I agree with user3683180 that the real problem--the reason MongoDB is taking so much CPU--is in your schema or index design, not in your Python code.

like image 200
A. Jesse Jiryu Davis Avatar answered Oct 16 '22 10:10

A. Jesse Jiryu Davis


Given the name of your database 'indexer', and the 'unique' property which requires an index, I'm thinking your CPU usage might have nothing to do with this code.

Try using mongostat and mongotop to see what mongo is spending its time doing.. I'm thinking you'll find it's spending time crunching data and that your code is just fine.

like image 44
user3683180 Avatar answered Oct 16 '22 10:10

user3683180