python - Mongodb How to add addtional information when aggregating? -
i beginner , wrote line pipeline works, wanna add other information output, screen name , or number of tweets.i tried add under $group gave me syntax error everytime
here pipeline:
def make_pipeline(): # complete aggregation pipeline pipeline = [ { '$match': { "user.statuses_count": {"$gt":99 }, "user.time_zone": "brasilia" } }, { "$group": { "_id": "$user.id", "followers": { "$max": "$user.followers_count" } } }, { "$sort": { "followers": -1 } }, { "$limit" : 1 } ];
i using on example :
{ "_id" : objectid("5304e2e3cc9e684aa98bef97"), "text" : "first week of school on :p", "in_reply_to_status_id" : null, "retweet_count" : null, "contributors" : null, "created_at" : "thu sep 02 18:11:25 +0000 2010", "geo" : null, "source" : "web", "coordinates" : null, "in_reply_to_screen_name" : null, "truncated" : false, "entities" : { "user_mentions" : [ ], "urls" : [ ], "hashtags" : [ ] }, "retweeted" : false, "place" : null, "user" : { "friends_count" : 145, "profile_sidebar_fill_color" : "e5507e", "location" : "ireland :)", "verified" : false, "follow_request_sent" : null, "favourites_count" : 1, "profile_sidebar_border_color" : "cc3366", "profile_image_url" : "http://a1.twimg.com/profile_images/1107778717/phpkhoxzmam_normal.jpg", "geo_enabled" : false, "created_at" : "sun may 03 19:51:04 +0000 2009", "description" : "", "time_zone" : null, "url" : null, "screen_name" : "catherinemull", "notifications" : null, "profile_background_color" : "ff6699", "listed_count" : 77, "lang" : "en", "profile_background_image_url" : "http://a3.twimg.com/profile_background_images/138228501/149174881-8cd806890274b828ed56598091c84e71_4c6fd4d8-full.jpg", "statuses_count" : 2475, "following" : null, "profile_text_color" : "362720", "protected" : false, "show_all_inline_media" : false, "profile_background_tile" : true, "name" : "catherine mullane", "contributors_enabled" : false, "profile_link_color" : "b40b43", "followers_count" : 169, "id" : 37486277, "profile_use_background_image" : true, "utc_offset" : null }, "favorited" : false, "in_reply_to_user_id" : null, "id" : numberlong("22819398300") }
use $first , aggregation pipeline query below :
db.collectionname.aggregate({ "$match": { "user.statuses_count": { "$gt": 99 }, "user.time_zone": "brasilia" } }, { "$sort": { "user.followers_count": -1 // sort followers_count first } }, { "$group": { "_id": "$user.id", "followers": { "$first": "$user.followers_count" //use mongo $first method followers count or max followers count }, "screen_name": { "$first": "$user.screen_name" }, "retweet_count": { "$first": "$retweet_count" } } })
db.collectionname.aggregate({ "$match": { "user.statuses_count": { "$gt": 99 }, "user.time_zone": "brasilia" } }, { "$sort": { "user.followers_count": -1 // sort followers_count } }, { "$limit": 1 // set limit 1 max followers_count document first }, { "$project": { // user project here "userid": "$user.id", "screen_name": "$user.screen_name", "retweet_count": "$retweet_count" } }).pretty()
Comments
Post a Comment