Forgive the n00b-ish question but I am new to data structures. I had been recently asked to aggregate a given array over another array and produce a tree based result.
Can someone give me some pointers on how to attain this output?
INPUT
var T = [
['COUNTRY', 'GENDER', 'MARITAL STATUS', 'SALES'],
['India', 'Female', 'Single', 2400],
['India', 'Male', 'Single', 5200],
['India', 'Female', 'Married', 4300],
['India', 'Male', 'Married', 3200],
['England', 'Female', 'Single', 1600],
['England', 'Female', 'Married', 2000],
['England', 'Male', 'Single', 4800],
['England', 'Male', 'Married', 6400],
];
var A = ['GENDER', 'MARITAL STATUS', 'COUNTRY'];
OUTPUT: Use 2*spaces for each leaf node.
TOTAL 29900
Female <Female Total>
Single <Single Female Total>
India <Single Female Total India>
England <Single Female Total England>
Married <Married Female Total>
India <Married Female Total India>
England <Married Female Total England>
Male <Male Total>
Single <Single Male Total>
India <Single Male Total India>
England <Single Male Total England>
Married <Married Male Total>
India <Married Male Total India>
England <Married Male Total England>
The result can be represented by a nested object, each inner object is a sub tree with its total:
{
total: 29900,
Female: {
total: 10300,
Single: {
total: 4000,
India: {
total: 2400
},
...
},
...
},
...
}
Just loop through all then entries, and add values to corresponding sub tree nodes.
For the output, you can use JSON.stringify
and strip unnecessary text from it.
Warning: spoilers below
const T = [
['COUNTRY', 'GENDER', 'MARITAL STATUS', 'SALES'],
['India', 'Female', 'Single', 2400],
['India', 'Male', 'Single', 5200],
['India', 'Female', 'Married', 4300],
['India', 'Male', 'Single', 3200],
['England', 'Female', 'Single', 1600],
['England', 'Female', 'Married', 2000],
['England', 'Male', 'Single', 4800],
['England', 'Male', 'Married', 6400],
]
const A = ['GENDER', 'MARITAL STATUS', 'COUNTRY']
function aggregate(T, A) {
const [fields, ...data] = T
const columns = A.map(name => fields.findIndex(it => name === it))
const count = fields.length - 1
const result = { total: 0 }
data.forEach(values => {
result.total += values[count]
//Go through the tree path, reduce is used here
//to avoid creating extra tracking variable for current position
columns.reduce((ref, index) => {
const key = values[index]
const next = ref[key] || (ref[key] = { total: 0 })
next.total += values[count]
return next
}, result)
})
return result
}
function pack(data) {
return Object.keys(data).reduce((result, key) => {
if (key !== 'total') {
const name = key + " " + data[key].total
result[name] = pack(data[key])
}
return result
}, {})
}
function format(result) {
return JSON.stringify(pack(result), null, 2)
.replace(/[^A-z0-9\n\s]/g, '')
.replace(/\n?\s*\n/g, '\n')
}
function output(it) {
const result = "TOTAL " + it.total + format(it)
console.log(result)
}
output(aggregate(T, A))
A common approach for working with tree structures is to represent them as nested objects, as demonstrated by DarkKnight's answer, then create a string representation from this data structure.
In the case presented by the OP, an alternative approach is to first sort the data, then create the string representation of the tree directly from the sorted data without needing any intermediate data structure of nested objects.
Given the array of columns to aggregate by,
['GENDER', 'MARITAL STATUS', 'COUNTRY']
we can sort the data by these columns:
GENDER STATUS COUNTRY SALES
Female Single India 2400
Female Single England 1600
Female Married India 4300
Female Married England 2000
Male Single India 5200
Male Single England 4800
Male Married India 3200
Male Married England 6400
Looping backwards from the last row, while aggregating, we can build the string representation of the tree bottom up. The last row differs from the previous at level 3 (COUNTRY), which gives the following output:
England 6400
The row before differs on both level 3 (COUNTRY) and 2 (MARITAL STATUS), prepended to the current output:
Married 9600
India 3200
England 6400
After the row before that:
England 4800
Married 9600
India 3200
England 6400
Then, the 5th row differs from the one before on all 3 levels:
Male 19600
Single 10000
India 5200
England 4800
Married 9600
India 3200
England 6400
And so on, until the whole tree is represented:
Total 29900
Female 10300
Single 4000
India 2400
England 1600
Married 6300
India 4300
England 2000
Male 19600
Single 10000
India 5200
England 4800
Married 9600
India 3200
England 6400
Below is working code (ES3 compliant), demonstrating the approach.
var T = [
['COUNTRY', 'GENDER', 'MARITAL STATUS', 'SALES'],
['India', 'Female', 'Single', 2400],
['India', 'Male', 'Single', 5200],
['India', 'Female', 'Married', 4300],
['India', 'Male', 'Married', 3200],
['England', 'Female', 'Single', 1600],
['England', 'Female', 'Married', 2000],
['England', 'Male', 'Single', 4800],
['England', 'Male', 'Married', 6400],
];
var A = ['GENDER', 'MARITAL STATUS', 'COUNTRY'];
var valueField = 'SALES';
// sortBy GENDER ascending
// MARITAL STATUS descending
// COUNTRY descending
var sortDirection = [1, -1, -1];
function find(arr, val){
for(var i = 0 ; i < arr.length; i++){
if(arr[i] === val) return i;
}
return -1;
}
function buildTreeString(
data, aggregateBy, sortDirection, valueField
) {
var i, record,
value, level,
groupBy = [],
result = [],
sums = [],
total = 0;
// get column index of valueField
valueField = find(data[0], valueField);
// get column indexes to groupBy
for(var i = 0; i < aggregateBy.length; i++) {
groupBy[i] = find(data[0], aggregateBy[i]);
}
// sort data
data = data.slice(1)
.sort(function(a, b) {
var i, compare = 0, column;
for(i = 0; i < groupBy.length && !compare; i++) {
column = groupBy[i];
compare = a[column] < b[column] ?
sortDirection[i] :
(a[column] > b[column] ?
-sortDirection[i] : 0);
}
return compare;
});
// loop over data rows, output tree nodes
for(i = 0; i < data.length; i++) {
record = data[i];
value = record[valueField];
total += value;
//loop over columns, starting from deepest level
for(level = groupBy.length - 1; level > -1; level--) {
sums[level] = (sums[level] || 0) + value;
if(i == data.length - 1 ||
record[groupBy[level]] != data[i + 1][groupBy[level]]) {
//output tree node
result.push(
Array(level + 2).join(' ') +
record[groupBy[level]] + ' ' +
sums[level]);
//reset level aggregation
sums[level] = 0;
}
}
}
result.push('Total ' + total);
result.reverse();
return result.join('\n');
}
console.log(
buildTreeString(T, A, sortDirection, valueField)
);
Tree is a good option to implement this question. You can also do the aggregation in one-time pass. The basic idea is
sort the data by the given columns.
loop the array, check the given column's value
2.1 aggregate the group count if column's value is as same as previous line
2.2 output group name and count if column's value is different with previous line
Here is a example which I guided a CS student for his assignment, which is pretty similar to yours.
The method sumaryStage3 at Here implement the logic of steps 2.
Pls ignore the code style and quality. It's not my code.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With