Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

MongoDB java driver can do db.collection.group() under sharded environment

Tags:

java

mongodb

As the Manual of Mongodb mentioned, "The db.collection.group() method does not work with sharded clusters. Use the aggregation framework or map-reduce in sharded environments." But today, surprisely, I found it works in Java driver.

In my test, the sharded collection is called "spams", which contains 4,001,633 documents. It is sharded in 7 shards.

Every document in the collection has a format like this.

shard1:PRIMARY> db.spams.findOne()
{
    "IP" : "113.162.134.245",
    "_id" : ObjectId("4ebe8c84466e8b1a56000028"),
    "attach" : [ ],
    "bot" : "Lethic",
    "charset" : "iso-8859-1",
    "city" : "",
    "classA" : "113",
    "classB" : "113.162",
    "classC" : "113.162.134",
    "content_type" : [ ],
    "country" : "Vietnam",
    "cte" : "7bit",
    "date" : ISODate("2011-11-11T00:07:12Z"),
    "day" : "2011-11-11",
    "from_domain_a" : "domain157939.com",
    "geo" : "VN",
    "host" : "",
    "lang" : "unknown",
    "lat" : 16,
    "long" : 106,
    "sequenceID" : "user648",
    "size" : 1060,
    "smtp-mail-from_a" : "[email protected]",
    "smtp-rcpt-to_a" : "[email protected]",
    "subject_ta" : "nxsy8",
    "uri" : [ ],
    "uri_domain" : [ ],
    "x_p0f_detail" : "2000 SP4, XP SP1+",
    "x_p0f_genre" : "Windows",
    "x_p0f_signature" : "65535:105:1:48:M1402,N,N,S:."
}

What I wanted to do is to find the documents with "date" after "2012-01-01T00:00:00Z", and group them by field "classA" and calculate the amount in each group. So I wrote the Java code as below:

final private String mongoUrl = "172.16.10.61:30000";
final private String databaseName = "test";
final private String collecName = "spams";
private DBCollection collection = null;
private DB db;

public void init(){
    Mongo mongo = null;
    try {
        mongo = new Mongo(new DBAddress(mongoUrl));
    } catch (MongoException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnknownHostException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    db = mongo.getDB(databaseName);
    db.requestStart();
    collection = db.getCollection(collecName);
}

public void group_range_normal(boolean printResult){
    BasicDBObject key = new BasicDBObject("classA", true);
    BasicDBObject initial = new BasicDBObject("cou", 0);

    DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'");
    try {
        Date fromDate = formatter.parse("2012-01-01T00:00:00Z");

        BasicDBObject cond = new BasicDBObject();
        cond.put("date", new BasicDBObject("$gt", fromDate));

        String reduce = "function(obj,pre){pre.cou++}";
        Long runBefore = Calendar.getInstance().getTime().getTime();
        BasicDBList returnList = (BasicDBList) collection.group(key, cond, initial, reduce);
        Long runAfter = Calendar.getInstance().getTime().getTime();
        DBObject errors = db.getLastError();
        if(printResult){
            for (Object o : returnList) {
                System.out.println(o.toString());
            }
        }

        System.out.println("[Group Range Normal]: " + (runAfter - runBefore) + " ms.");
    } catch (ParseException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

The result looks good:

  ------Group Range Normal------
{ "classA" : "72" , "cou" : 3.0}
{ "classA" : "85" , "cou" : 21.0}
{ "classA" : "115" , "cou" : 23.0}
{ "classA" : "217" , "cou" : 25.0}
{ "classA" : "46" , "cou" : 31.0}
{ "classA" : "117" , "cou" : 58.0}
{ "classA" : "122" , "cou" : 20.0}
{ "classA" : "195" , "cou" : 7.0}
{ "classA" : "190" , "cou" : 44.0}
{ "classA" : "94" , "cou" : 83.0}
{ "classA" : "87" , "cou" : 22.0}
{ "classA" : "95" , "cou" : 53.0}
{ "classA" : "178" , "cou" : 108.0}
{ "classA" : "219" , "cou" : 37.0}
{ "classA" : "76" , "cou" : 1.0}
{ "classA" : "101" , "cou" : 1.0}
{ "classA" : "111" , "cou" : 20.0}
{ "classA" : "194" , "cou" : 34.0}
{ "classA" : "93" , "cou" : 31.0}
{ "classA" : "98" , "cou" : 2.0}
{ "classA" : "180" , "cou" : 45.0}
{ "classA" : "211" , "cou" : 17.0}
{ "classA" : "92" , "cou" : 31.0}
{ "classA" : "177" , "cou" : 21.0}
{ "classA" : "189" , "cou" : 23.0}
{ "classA" : "89" , "cou" : 44.0}
{ "classA" : "78" , "cou" : 12.0}
{ "classA" : "77" , "cou" : 18.0}
{ "classA" : "125" , "cou" : 22.0}
{ "classA" : "200" , "cou" : 16.0}
{ "classA" : "74" , "cou" : 4.0}
{ "classA" : "58" , "cou" : 21.0}
{ "classA" : "80" , "cou" : 12.0}
{ "classA" : "79" , "cou" : 14.0}
{ "classA" : "186" , "cou" : 24.0}
{ "classA" : "105" , "cou" : 2.0}
{ "classA" : "41" , "cou" : 11.0}
{ "classA" : "213" , "cou" : 8.0}
{ "classA" : "220" , "cou" : 10.0}
{ "classA" : "201" , "cou" : 17.0}
{ "classA" : "176" , "cou" : 7.0}
{ "classA" : "112" , "cou" : 46.0}
{ "classA" : "118" , "cou" : 38.0}
{ "classA" : "124" , "cou" : 11.0}
{ "classA" : "82" , "cou" : 19.0}
{ "classA" : "59" , "cou" : 24.0}
{ "classA" : "120" , "cou" : 14.0}
{ "classA" : "114" , "cou" : 17.0}
{ "classA" : "182" , "cou" : 33.0}
{ "classA" : "39" , "cou" : 7.0}
{ "classA" : "90" , "cou" : 7.0}
{ "classA" : "109" , "cou" : 48.0}
{ "classA" : "81" , "cou" : 13.0}
{ "classA" : "27" , "cou" : 16.0}
{ "classA" : "84" , "cou" : 27.0}
{ "classA" : "187" , "cou" : 14.0}
{ "classA" : "91" , "cou" : 25.0}
{ "classA" : "203" , "cou" : 7.0}
{ "classA" : "168" , "cou" : 1.0}
{ "classA" : "123" , "cou" : 25.0}
{ "classA" : "62" , "cou" : 5.0}
{ "classA" : "67" , "cou" : 4.0}
{ "classA" : "2" , "cou" : 48.0}
{ "classA" : "113" , "cou" : 44.0}
{ "classA" : "221" , "cou" : 5.0}
{ "classA" : "121" , "cou" : 26.0}
{ "classA" : "188" , "cou" : 35.0}
{ "classA" : "83" , "cou" : 17.0}
{ "classA" : "119" , "cou" : 21.0}
{ "classA" : "61" , "cou" : 17.0}
{ "classA" : "218" , "cou" : 9.0}
{ "classA" : "49" , "cou" : 15.0}
{ "classA" : "173" , "cou" : 2.0}
{ "classA" : "14" , "cou" : 6.0}
{ "classA" : "159" , "cou" : 4.0}
{ "classA" : "1" , "cou" : 6.0}
{ "classA" : "151" , "cou" : 4.0}
{ "classA" : "181" , "cou" : 2.0}
{ "classA" : "116" , "cou" : 14.0}
{ "classA" : "202" , "cou" : 17.0}
{ "classA" : "42" , "cou" : 2.0}
{ "classA" : "171" , "cou" : 6.0}
{ "classA" : "222" , "cou" : 6.0}
{ "classA" : "209" , "cou" : 1.0}
{ "classA" : "210" , "cou" : 5.0}
{ "classA" : "175" , "cou" : 8.0}
{ "classA" : "71" , "cou" : 3.0}
{ "classA" : "212" , "cou" : 11.0}
{ "classA" : "24" , "cou" : 6.0}
{ "classA" : "110" , "cou" : 18.0}
{ "classA" : "31" , "cou" : 9.0}
{ "classA" : "139" , "cou" : 1.0}
{ "classA" : "196" , "cou" : 2.0}
{ "classA" : "183" , "cou" : 11.0}
{ "classA" : "193" , "cou" : 3.0}
{ "classA" : "207" , "cou" : 5.0}
{ "classA" : "108" , "cou" : 1.0}
{ "classA" : "75" , "cou" : 1.0}
{ "classA" : "106" , "cou" : 3.0}
{ "classA" : "86" , "cou" : 9.0}
{ "classA" : "96" , "cou" : 1.0}
{ "classA" : "174" , "cou" : 2.0}
{ "classA" : "158" , "cou" : 2.0}
{ "classA" : "197" , "cou" : 4.0}
{ "classA" : "141" , "cou" : 6.0}
{ "classA" : "65" , "cou" : 1.0}
{ "classA" : "223" , "cou" : 1.0}
{ "classA" : "184" , "cou" : 2.0}
{ "classA" : "37" , "cou" : 2.0}
{ "classA" : "88" , "cou" : 10.0}
{ "classA" : "149" , "cou" : 1.0}
{ "classA" : "130" , "cou" : 1.0}
{ "classA" : "99" , "cou" : 1.0}
{ "classA" : "208" , "cou" : 1.0}
[Group Range Normal Result]: { "serverUsed" : "/127.0.0.1:27017/172-16-10-61:30000" , "n" : 0 , "lastOp" : { "$ts" : 0 , "$inc" : 0} , "connectionId" : 17496 , "err" :  null  , "ok" : 1.0}
[Group Range Normal Time]: 85 ms.

The version of java driver is mongo-2.9.1.jar. MongoDB is version 2.2.2.

I have also tried with shell but it reminds me group does not work on sharded cluster.

Does it mean db.collection.group() can work under sharded cluster?

like image 343
Felix Avatar asked Jan 24 '13 17:01

Felix


1 Answers

Indeed, MongoDb mentions that

The db.collection.group() method does not work with sharded clusters.

and that one must

Use the aggregation framework or map-reduce in sharded environments.

But you are not using db.collection.group(), the javascript function used in mongo shell. You are using the java driver. In fact, Javadocs for the driver suggest that DBCollection.group(...) uses the aggregation framework. That's why it works.

like image 84
Helio Santos Avatar answered Oct 06 '22 23:10

Helio Santos