mognodb聚合以及mapreduce

7. 聚合 : 聚合的结果必须限制在16M以内
1. 投射 : db.coll.aggregate({"$project" : {"auth" : 1}})
2. db.coll.aggregate({"$group" : {"_id" : "$auth", "count" : {"$sum" : 1}}})
3. db.coll.aggregate({"$sort" : {"auth" : -1}})
   db.coll.aggregate({"$sort" : {"_id" : -1}})
4. db.coll.aggregate({"$limit" : 3})
    5. db.coll.aggregate({"$project" : {"title" : 1}}, {"$group" : {"_id" : "$title", "count" : {"$sum" : 1}}}, {"$sort" : {"title" : -1}}, {"$limit" : 3})
6. db.coll.aggregate({"$match" : {"auth" : "hbw"}})
7. db.coll.aggregate({"$project" : {"title" : 1, "_id" : 0}})
   db.coll.aggregate({"$project" : {"userid" : "$_id", "_id" : 0}})
管道表达式 :
1. $project : db.coll.aggregate({"$project" : {"auth" : 1, "_id" : 0}})
数学表达式 :
1. $add : db.cll.aggregate({"$project" : {"sum" : {"$add" : ["$val1", "$val2"]}}})
2. $subtract : db.cll.aggregate({"$project" : {"sub" : {"$subtract" : ["$val1", "$val2"]}}})
3. $multiply : db.cll.aggregate({"$project" : {"mul" : {"$multiply" : ["$val1", "$val2"]}}})
4. $divide : db.cll.aggregate({"$project" : {"div" : {"$divide" : ["$val1", "$val2"]}}})
5. $mod : db.cll.aggregate({"$project" : {"mod" : {"$mod" : ["$val1", "$val2"]}}})
日期表达式 :
1. db.colle.insert({"hdt" : new Date("1989-1-10")})
   db.colle.insert({"hdt" : new Date("1888-08-09T12:23:24")})
2. $year : db.colle.aggregate({"$project" : {"hdt" : {"$year" : "$hdt"}}})
3. $month : db.colle.aggregate({"$project" : {"hdt" : {"$month" : "$hdt"}}})
4. $dayOfMonth : db.colle.aggregate({"$project" : {"hdt" : {"$dayOfMonth" : "$hdt"}}})
5. $dayOfWeek : db.colle.aggregate({"$project" : {"hdt" : {"$dayOfWeek" : "$hdt"}}})
6. $dayOfYear : db.colle.aggregate({"$project" : {"hdt" : {"$dayOfYear" : "$hdt"}}})
7. $hour : db.colle.aggregate({"$project" : {"hdt" : {"$hour" : "$hdt"}}})
8. $minute : db.colle.aggregate({"$project" : {"hdt" : {"$minute" : "$hdt"}}})
9. $second : db.colle.aggregate({"$project" : {"hdt" : {"$second" : "$hdt"}}})
字符串表达式 :
1. $substr : 截取的是字节不是字符
db.coll.aggregate({"$project" : {"hhh" : {"$substr" : ["$title", 0, 6]}}})
2. $concat :
db.coll.aggregate({"$project" : {"hhh" : {"$concat" : ["$title", "$auth"]}}})
3. $toLower :
db.coll.aggregate({"$project" : {"hhh" : {"$toLower" : "$auth"}}})
4. $toUpper :
db.coll.aggregate({"$project" : {"hhh" : {"$toUpper" : "$auth"}}})
逻辑表达式 :
1. $cmp : 返回-1,1
db.cll.aggregate({"$project" : {"hhh" : {"$cmp" : ["$val1", "$val2"]}}})
2. $strcasecmp : 区分大小写,只对罗马字符有效
db.cll.aggregate({"$project" : {"hhh" : {"$strcasecmp" : ["$val1", "$val2"]}}})
3. $eq/$ne/$gt/$gte/$lt/$lte : 返回true、false
db.cll.aggregate({"$project" : {"hhh" : {"$eq" : ["$val1", "$val2"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$ne" : ["$val1", "$val2"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$gt" : ["$val1", "$val2"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$gte" : ["$val1", "$val2"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$lt" : ["$val1", "$val2"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$lte" : ["$val1", "$val2"]}}})
4. $and :
db.cll.aggregate({"$project" : {"hhh" : {"$and" : [{"$lte" : ["$val1", "$val2"]}, {"$lt" : ["$val1", "$val2"]}]}}})
5. $or :
db.cll.aggregate({"$project" : {"hhh" : {"$or" : [{"$lt" : ["$val1", "$val2"]}, {"$gt" : ["$val1", "$val2"]}]}}})
6. $not :
db.cll.aggregate({"$project" : {"hhh" : {"$not" : {"$lt" : ["$val1", "$val2"]}}}})
7. $cond : 为true返回第一个,为false返回第二个
db.cll.aggregate({"$project" : {"hhh" : {"$cond" : [{"$lt" : ["$val1", "$val2"]}, "hello", "seee"]}}})
8. $ifNull : 为空返回第二个,不为空返回第一个
db.cll.aggregate({"$project" : {"hhh" : {"$ifNull" : [null, "hello"]}}})
db.cll.aggregate({"$project" : {"hhh" : {"$ifNull" : ["good", "hello"]}}})
分组 :
1. $group : 接收到所有文档后才能进行分组
db.cll.aggregate({"$group": {"_id" : "$val1"}})
db.cll.aggregate({"$group" : {"_id" : {"val1" : "$val1", "val2" : "$val2"}}})
2. $sum :
db.cll.aggregate({"$group" : {"_id" : {"val1" : "$val1", "val2" : "$val2"}, "count" : {"$sum" : 1}}})
3. $avg :
db.cll.aggregate({"$group" : {"_id" : {"val1" : "$val1", "val2" : "$val2"}, "count" : {"$sum" : 1}, "avg" : {"$avg" : "$val1"}}})
4. $max、$min :
db.cll.aggregate({"$group" : {"_id" : "$val1", "max" : {"$max" : "$val2"}, "min" : {"$min" : "$val2"}}})
5. $first、$last :
db.cll.aggregate({"$group" : {"_id" : "$val1", "first" : {"$first" : "$val2"}, "last" : {"$last" : "$val2"}}})
数组操作 :
1. $addToSet :
db.cll.update({}, {"$addToSet" : {"contents" : ["hello", "good", "hbw"]}}, true, true)
2. $push :
db.cll.update({}, {"$push" : {"name" : "hello"}}, true, true)
3. $unwind : 数组拆分
db.cll.aggregate({"$unwind" : "$contents"})
db.cll.aggregate({"$project" : {"contents" : 1}}, {"$unwind" : "$contents"}, {"$match" : {"contents.$" : "good"}})
db.cll.aggregate({"$project" : {"contents" : 1}}, {"$unwind" : "$contents"}, {"$match" : {"contents" : "good"}})
4. $sort : 接收到所有文档后才能进行排序
db.cll.aggregate({"$sort" : {"val1" : 1}})
5. $limit :
db.cll.aggregate({"$limit" : 3})
6. $skip : 跳过n个文档,效率低
db.cll.aggregate({"$skip" : 5})
MapReduce : 聚合工具中的明星,以js作为查询语言,复杂,速度慢 Map : 映射;reduce :化简;
1. map = function() { for(var key in this) { emit(key, {count : 1}); }};
   reduce = function(key, emits) { total = 0; for(var i in emits) { total += emits[i].count; } return {"count" : total}; }
   mr = db.runCommand({"mapreduce" : "colle", "map" : map, "reduce" : reduce, "out" : {inline : 1}})
   mr = db.coll.mapReduce(map, reduce, {out : {inline : 1}})
2. shell的使用 : mongo abc.js 可以直接运行js, 在~目录下建.mongorc.js,连接mongo可以自动执行
3. vi news.js :
conn = new Mongo("192.168.2.6:27017");
db = conn.getDB("test");


map = function() {
for(var i in this.tags) {
var recency = 1/(new Date() - this.date);
var score = recency * this.score;


emit(this.tags[i], {"urls" : [this.url], "score" : score});
}
};


reduce = function(key, emits) {
var total = {"urls" : [], "score" : 0};
for(var i in emits) {
emits[i].urls.forEach(function(url) {
total.urls.push(url);
});
total.score += emits[i].score;
}
return total;
};


mr = db.news.mapReduce(map, reduce, {out : {inline : 1}})


printjson(mr);

  执行 : mongo --nodb news.js
4. mapreduce的其他参数 :
1. finalize : reduce的结果
vi coll.js :
conn = new Mongo("192.168.2.107:27017");
db = conn.getDB("test");
map = function() {
for(var key in this) {
emit(key, {count : 1})
}
};
reduce = function(key, emits) {
total = 0;
for(var i in emits) {
total += emits[i].count;
}
return {count : total};
};
final = function(key, values) {
for(var val in values) {
val.count = val.count + 1;
}
return values;
}
result = db.coll.mapReduce(map, reduce, {out : {inline : 1}}, {finalize : final});
printjson(result);
2. out : 输出集合的名称,系统会自动设置keeptemp : true
db.coll.mapReduce(map, reduce, {out : {inline : 1}})
3. query : 查询后传给map
db.coll.mapReduce(map, reduce, {query : {}})
4. sort : 排序后传给map
db.coll.mapReduce(map, reduce, {sort : {}})
5. limit : 截取前几个文档传给map
db.coll.mapReduce(map, reduce, {limit : {}})
6. scope : 给map的function传参的位置,作用域
db.coll.mapReduce(map, reduce, {scope : {}})
7. verbose : 是否记录详细的服务器日志
db.coll.mapReduce(map, reduce, {verbose : true}})

5. 聚合命令 :
db.coll.count({"name" : "hou"})
db.coll.distinct("name")

猜你喜欢

转载自blog.csdn.net/a13662080711/article/details/80161613