An Approach for Creating Pivot Tables with MongoDB

A Microsoft Excel Pivot table is as much -- if not more -- of a GUI control interface as it is a dimensional data aggregation engine. It's very easy to move and format rows and columns to present the rolled up data just the way you want.

MongoDB, of course, is just a database -- no specialized "pivoting GUI." But very often we want to extract the same kind of data from the database, and to do so efficiently. This article demonstrates the use of the powerful $facet operator and less than 60 lines of javascript to programmatically create an aggregation framework pipeline that does the heavy lifting to yield pivot-table-esque output.

A Brief Overview of $facet

 {region: "A", state: "MA", year: 2001, q1:1, q2:2, q3:3, q4:4}
 ,{region: "A", state: "NH", year: 2001, q1:6, q2:2, q3:3, q4:4}
 ,{region: "A", state: "CT", year: 2001, q1:1, q2:2, q3:3, q4:4}
 ,{region: "B", state: "NY", year: 2001, q1:8, q2:2, q3:3, q4:4}
 ,{region: "B", state: "NJ", year: 2001, q1:1, q2:2, q3:3, q4:4}

 ,{region: "A", state: "MA", year: 2002, q1:52, q2:32, q3:43, q4:54}
 ,{region: "A", state: "NH", year: 2002, q1:21, q2:32, q3:43, q4:54}
 ,{region: "A", state: "CT", year: 2002, q1:67, q2:32, q3:43, q4:54}
 ,{region: "B", state: "NY", year: 2002, q1:8, q2:32, q3:43, q4:54}
 ,{region: "B", state: "NJ", year: 2002, q1:9, q2:32, q3:43, q4:54}

db.collection.aggregate([
    {$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
]}

{ "_id" : "B", "tot_q1" : 26, "tot_q2" : 68 }
{ "_id" : "A", "tot_q1" : 148, "tot_q2" : 102 }

db.collection.aggregate([
    {$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
]}

{ "_id" : "NJ", "tot_q1" : 10, "tot_q2" : 34 }
{ "_id" : "CT", "tot_q1" : 68, "tot_q2" : 34 }
{ "_id" : "NH", "tot_q1" : 27, "tot_q2" : 34 }
{ "_id" : "NY", "tot_q1" : 16, "tot_q2" : 34 }
{ "_id" : "MA", "tot_q1" : 53, "tot_q2" : 34 }

db.collection.aggregate([
{$facet: {
	"byRegion": [
{$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byState": [
{$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ]
    }}
		      ]);

{
	"byRegion" : [
		{"_id" : "B","tot_q1" : 26,"tot_q2" : 68},
		{"_id" : "A","tot_q1" : 148,"tot_q2" : 102}
	],
	"byState" : [
		{"_id" : "NJ","tot_q1" : 10,"tot_q2" : 34},
		{"_id" : "CT","tot_q1" : 68,"tot_q2" : 34},
		{"_id" : "NH","tot_q1" : 27,"tot_q2" : 34},
		{"_id" : "NY","tot_q1" : 16,"tot_q2" : 34},
		{"_id" : "MA","tot_q1" : 53,"tot_q2" : 34}
	]
}

db.collection.aggregate([
{$match: {"year": 2002}},  // only get 2002 
{$facet: {
	"byRegion": [
{$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byState": [
{$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ]
    }}
		      ]);

{
	"byRegion" : [
		{"_id" : "B","tot_q1" : 17,"tot_q2" : 64},
		{"_id" : "A","tot_q1" : 140,"tot_q2" : 96}
	],
	"byState" : [
		{"_id" : "NJ","tot_q1" : 9,"tot_q2" : 32},
		{"_id" : "CT","tot_q1" : 67,"tot_q2" : 32},
		{"_id" : "NH","tot_q1" : 21,"tot_q2" : 32},
		{"_id" : "NY","tot_q1" : 8,"tot_q2" : 32},
		{"_id" : "MA","tot_q1" : 52,"tot_q2" : 32}
	]
}

$facet: Simple but ... tedious

db.collection.aggregate([
{$facet: {
	"byRegion": [
{$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byRegionYear": [
{$group: {_id: {region:"$region",year:"$year"}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byRegionState": [
{$group: {_id: {region:"$region",year:"$state"}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
//...

    }}
		      ]);

db.collection.aggregate([
{$facet: {
	"byRegion": [
{$group: {_id: "$region", x: {$push: {q1:"$q1", q2:"$q2", state:"$state",year:"$year"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byRegionYear": [
{$group: {_id: {region:"$region",year:"$year"}, x: {$push: {q1:"$q1", q2:"$q2", state:"$state"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
	"byRegionState": [
{$group: {_id: {region:"$region",year:"$state"}, x: {$push: {q1:"$q1", q2:"$q2", year:"$year"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} }
		     ],
//...

    }}
		      ]);

"byRegion" : [
	{
		"_id" : "B",
		"x" : [
			{"state" : "NY","year" : 2001,"q1" : 8,"q2" : 2	},
			{"state" : "NJ","year" : 2001,"q1" : 1,"q2" : 2	},
			{"state" : "NY","year" : 2002,"q1" : 8,"q2" : 32},
			{"state" : "NJ","year" : 2002,"q1" : 9,"q2" : 32}
		],
		"tot_q1" : 26,
		"tot_q2" : 68
	},
        // ...

Javascript to the rescue

function getCombinations(items) {
    var result = [];
    var f = function(keyarr, items, lvl) {
	for (var i = 0; i < items.length; i++) {
	    var nka = keyarr.concat(items[i]);
	    result.push(nka);
	    f(nka, items.slice(i + 1), lvl+1);
	}
    }
    f([], items, 0);
    return result;
}

function createPivotFacets(dims, measures) {
    var fcts = {};

    // This is where we build the $project stage to turn
    //   sum_q1: nnn
    //   avg_q1: nnn
    //   max_q1: nnn
    // into
    //   q1: { sum:nnn, avg:nnn, ... }
    // Only need to do this once; the $project is the same for all
    // variations of $group.			    
    var prj = {_id:1, _x:1, _n:1};
    measures.forEach(function(m) {
	    var z = {};
	    ["sum","avg","max","min"].forEach(function(f) { 	    
		    z[f] = "$"+f+"_"+m;
		    prj[m] = z;
		})
	});

    var combinations = getCombinations(dims);

    for (var i = 0; i < combinations.length; i++) {
	var karr = combinations[i];
	var parr = dims.filter(function(i) {return karr.indexOf(i) < 0;});

	var grp = {};

	var idd = {};
	karr.forEach(function(k) { idd[k] = "$" + k; });
	grp['_id'] = idd;

	if(parr.length > 0) {
	    var pdd = {};
	    measures.forEach(function(m) { pdd[m] = "$"+m });

	    parr.forEach(function(k) { pdd[k] = "$" + k; });
	    grp['_x'] = {"$push": pdd};
	}

	grp['_n'] = {"$sum":1}; // handy but basically {$size: "$_x"}

	measures.forEach(function(m) {
		["sum","avg","max","min"].forEach(function(f) { 
			var z = {};
			z["$"+f] = "$"+m;
			grp[f+"_"+m] = z;
		    });
	    });

	fcts[karr.join(':')] = [ {$group:grp}, {$project:prj} ];
    }

    return {"$facet":fcts};
}

pipe = [
	createPivotFacets(["region","year","state"], ["q1","q2","q3"])
	];
db.collection.aggregate(pipe);

{
  "region" : [
    {
      "_id" : { "region" : "B" },
      "_x" : [
        {"q1" : 8,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NY"},
        {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NJ"},
        {"q1" : 8,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NY"},
        {"q1" : 9,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NJ"}
      ],
      "_n" : 4,
      "q1" : {"sum" : 26,"avg" : 6.5,"max" : 9,"min" : 1},
      "q2" : {"sum" : 68,"avg" : 17,"max" : 32,"min" : 2},
      "q3" : {"sum" : 92,"avg" : 23,"max" : 43,"min" : 3}
    },
    {
      "_id" : { "region" : "A" },
      "_x" : [
        {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "MA"},
        {"q1" : 6,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NH"},
        {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "CT"},
        {"q1" : 52,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "MA"},
        {"q1" : 21,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NH"},
        {"q1" : 67,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "CT"}
      ],
      "_n" : 6,
      "q1" : {"sum" : 148,"avg" : 24.666666666666668,"max" : 67,"min" : 1},
      "q2" : {"sum" : 102,"avg" : 17,"max" : 32,"min" : 2},
      "q3" : {"sum" : 138,"avg" : 23,"max" : 43,"min" : 3}
    }
  ],
  "region:year" : [
    {
      "_id" : { "region" : "B", "year" : 2002 },
      "_x" : [
        {"q1" : 8,"q2" : 32,"q3" : 43,"state" : "NY"},
        {"q1" : 9,"q2" : 32,"q3" : 43,"state" : "NJ"}
      ],
      "_n" : 2,
      "q1" : {"sum" : 17,"avg" : 8.5,"max" : 9,"min" : 8},
      "q2" : {"sum" : 64,"avg" : 32,"max" : 32,"min" : 32},
      "q3" : {"sum" : 86,"avg" : 43,"max" : 43,"min" : 43}
    },
    {
      "_id" : { "region" : "A", "year" : 2002 },
      "_x" : [
        {"q1" : 52,"q2" : 32,"q3" : 43,"state" : "MA"},
        {"q1" : 21,"q2" : 32,"q3" : 43,"state" : "NH"},
        {"q1" : 67,"q2" : 32,"q3" : 43,"state" : "CT"}
      ],
      "_n" : 3,
      "q1" : {"sum" : 140,"avg" : 46.666666666666664,"max" : 67,"min" : 21},
      "q2" : {"sum" : 96,"avg" : 32,"max" : 32,"min" : 32},
      "q3" : {"sum" : 129,"avg" : 43,"max" : 43,"min" : 43}
    },
    // ...

There is room for improvement here but it is left as an exercise to the reader to experiment. Suggestions:

An Approach for Creating Pivot Tables with MongoDB

A Brief Overview of `$facet`

`$facet`: Simple but ... tedious

Javascript to the rescue

An Approach for Creating Pivot Tables with MongoDB

A Brief Overview of $facet

$facet: Simple but ... tedious

Javascript to the rescue

A Brief Overview of `$facet`

`$facet`: Simple but ... tedious