Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide generalized aggregation #4

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_layouts/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<script src="{{ site.baseurl }}/assets/js/vendor/moment-with-locales.min.js"></script>
<script src="{{ site.baseurl }}/assets/js/vendor/Chart-2.7.1.min.js"></script>
<script src="{{ site.baseurl }}/assets/js/vendor/spin-2.3.2.min.js"></script>
<script src="{{ site.baseurl }}/assets/js/charts.js?version=1ff0187"></script>
<script src="{{ site.baseurl }}/assets/js/charts.js?version=e7e9c5a"></script>
</head>
<body>
<section class="page-header">
Expand Down
152 changes: 113 additions & 39 deletions docs/assets/js/charts.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,116 @@ function createSpinner(canvas)
};
}

function aggregateTimeData(data, aggregationConfig)
{
if (!(data instanceof Array))
throw 'expected data array as input';

if (data.length < 1)
return;

// Turn date strings into proper date objects
for (let i = 0; i < data.length; i++)
data[i]['date'] = d3.isoParse(data[i]['date']);

// Sort data, just in case it isn’t already
data.sort((row1, row2) => row1['date'] - row2['date']);

const dateStart = data[0]['date'];
// Ranges are exclusive, so add one more day to include the last date
const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);

let period;

switch (aggregationConfig['period'])
{
case 'week':
period = d3.utcMonday;
break;
case 'month':
period = d3.utcMonth;
break;
default:
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
}

let t0, t1;

if (['start', 'both'].includes(aggregationConfig['includeIncomplete']))
t0 = period.floor(dateStart);
else
t0 = period.ceil(dateStart);

if (['end', 'both'].includes(aggregationConfig['includeIncomplete']))
t1 = period.ceil(dateEnd);
else
t1 = period.floor(dateEnd);

// In d3, ranges include the start value but exclude the end value
// We want to include the last period as well, so add one more period
t1 = period.offset(t1, 1);
const periods = period.range(t0, t1);

let aggregatedData = Array();

for (let i = 0; i < periods.length - 1; i++)
{
const t0 = periods[i];
const t1 = periods[i + 1];

const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);

let row = Object();
row['date'] = t0;

$.each(Object.keys(data[0]),
function(keyID, key)
{
if (key == 'date')
return;

if (dates.length == 0)
{
row[key] = undefined;
return;
}

const accessor = (row => row[key]);

switch (aggregationConfig['method'])
{
case 'sum':
row[key] = d3.sum(dates, accessor);
break;
case 'mean':
row[key] = d3.mean(dates, accessor);
break;
case 'median':
row[key] = d3.median(dates, accessor);
break;
case 'first':
row[key] = dates[0][key];
break;
case 'last':
row[key] = dates[dates.length - 1][key];
break;
case 'min':
row[key] = d3.min(dates, accessor);
break;
case 'max':
row[key] = d3.max(dates, accessor);
break;
default:
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
}
});

aggregatedData.push(row);
}

return aggregatedData;
}

function createHistoryChart(canvas)
{
const url = $(canvas).data('url');
Expand All @@ -153,48 +263,12 @@ function createHistoryChart(canvas)

const context = canvas.getContext('2d');

if ($(canvas).data('config') && 'aggregate' in $(canvas).data('config') &&
$(canvas).data('config').aggregate == 'weekly')
{
let aggregatedData = Array();
data.sort(
function(row1, row2)
{
let date1 = new Date(row1['date']);
let date2 = new Date(row2['date']);
return date1 - date2;
});

let currentRow = Object();

for (let i = 0; i < data.length; i++)
{
if (i % 7 == 0)
$.each(Object.keys(data[i]).slice(1),
function(keyID, key)
{
currentRow[key] = 0;
});

currentRow['date'] = data[i]['date'];

$.each(Object.keys(data[i]).slice(1),
function(keyID, key)
{
currentRow[key] += data[i][key];
});

if (i % 7 == 6)
// Store a copy of the aggregated data
aggregatedData.push($.extend({}, currentRow));
}

data = aggregatedData;
}

if ($(canvas).data('config') && 'sliceData' in $(canvas).data('config'))
data = data.slice($(canvas).data('config').sliceData[0], $(canvas).data('config').sliceData[1]);

if ($(canvas).data('config') && 'aggregate' in $(canvas).data('config'))
data = aggregateTimeData(data, $(canvas).data('config').aggregate);

const originalDataSeries = Object.keys(data[0]).slice(1);

let dataSeries, visibleDataSeries;
Expand Down
37 changes: 12 additions & 25 deletions docs/pr-usage.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,18 @@
permalink: /pr-usage
---

<div class="chart-placeholder">
<h3>Pull Request Usage</h3>
<canvas
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
data-type="history"
></canvas>
<div class="info-box">
<p>
The percentage of active repositories located in organizations, having more than one contributor, and using pull requests for contributions.
</p>
<p>
In this context, <em>active</em> designates repositories with at least two users that have pushed commits in the last four weeks.
The rationale behind this is that pull request usage is most important in a collaborative environment.
</p>
<p>
High pull request usage might indicate a high number of code reviews.
</p>
</div>
<div class="info-box">
<p>
For reference, the percentage of status usage in these pull requests is visualized.
</p>
<p>
High status usage might indicate high CI usage.
</p>
<h3>Pull Request Usage</h3>

<div class="row">
<div class="col-main">
<div class="chart-container">
<canvas
class="chart"
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
data-type="history"
data-config='{"aggregate": {"period": "month", "method": "first", "includeIncomplete": "both"}}'
></canvas>
</div>
</div>
<div class="info-box">
<p>
Expand Down
157 changes: 156 additions & 1 deletion docs/spec/charts.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
/* global createCollaborationChart, createHistoryChart, createList, createTable, createSpinner */
/* global
aggregateTimeData,
createCollaborationChart,
createHistoryChart,
createList,
createTable,
createSpinner,
d3,
*/

describe('global charts.js', function()
{
Expand Down Expand Up @@ -62,4 +70,151 @@ describe('global charts.js', function()
});
});
});
describe('aggregation for time series', function()
{
// Generate data from startDate to endDate (both inclusive) with a generator functor
function generateData(startDate, endDate, generator)
{
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
let data = Array();

for (let i = 0; i < dates.length; i++)
data.push({'date': dates[i], 'value': generator(i)});

return data;
}

// Integer range generator
function integerRangeGenerator(start, modulo)
{
if (modulo)
return (i => (start + i) % modulo);

return (i => start + i);
}

const dateToString = d3.utcFormat('%Y-%m-%d');

it('should aggregate over weeks correctly', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
const generator = integerRangeGenerator(0, 28);
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
const data = generateData('2018-01-01', '2018-09-30', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
expect(aggregatedData[0]['value']).toEqual(6);
expect(aggregatedData[1]['value']).toEqual(13);
expect(aggregatedData[2]['value']).toEqual(20);
expect(aggregatedData[4]['value']).toEqual(6);
expect(aggregatedData[5]['value']).toEqual(13);
expect(aggregatedData[36]['value']).toEqual(6);
expect(aggregatedData[37]['value']).toEqual(13);
expect(aggregatedData[38]['value']).toEqual(20);
});

it('should not have off-by-one errors (1)', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
const generator = integerRangeGenerator(27, 28);
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
const data = generateData('2017-12-31', '2018-10-01', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 41);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-25');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[3]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-17');
expect(dateToString(aggregatedData[39]['date'])).toEqual('2018-09-24');
expect(dateToString(aggregatedData[40]['date'])).toEqual('2018-10-01');
expect(aggregatedData[0]['value']).toEqual(27);
expect(aggregatedData[1]['value']).toEqual(6);
expect(aggregatedData[39]['value']).toEqual(20);
expect(aggregatedData[40]['value']).toEqual(21);
});

it('should not have off-by-one errors (2)', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
const generator = integerRangeGenerator(1, 28);
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
const data = generateData('2018-01-02', '2018-09-29', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
expect(aggregatedData[0]['value']).toEqual(6);
expect(aggregatedData[1]['value']).toEqual(13);
expect(aggregatedData[37]['value']).toEqual(13);
expect(aggregatedData[38]['value']).toEqual(19);
});

it('should not include incomplete periods with incomplete data if requested', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'none'};
const generator = integerRangeGenerator(1, 28);
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
const data = generateData('2018-01-02', '2018-09-29', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 37);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
expect(aggregatedData[0]['value']).toEqual(13);
expect(aggregatedData[1]['value']).toEqual(20);
expect(aggregatedData[35]['value']).toEqual(6);
expect(aggregatedData[36]['value']).toEqual(13);
});

it('should aggregate sums correctly', function()
{
const aggregationConfig = {'period': 'week', 'method': 'sum', 'includeIncomplete': 'both'};
const generator = integerRangeGenerator(0, 10);
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
const data = generateData('2018-01-01', '2018-09-30', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(aggregatedData[0]['value']).toEqual(21);
expect(aggregatedData[1]['value']).toEqual(30);
expect(aggregatedData[2]['value']).toEqual(39);
expect(aggregatedData[36]['value']).toEqual(35);
expect(aggregatedData[37]['value']).toEqual(24);
expect(aggregatedData[38]['value']).toEqual(33);
});

it('should aggregate over months correctly', function()
{
const aggregationConfig = {'period': 'month', 'method': 'first', 'includeIncomplete': 'both'};
const generator = integerRangeGenerator(9, 10);
const data = generateData('2017-12-31', '2019-01-01', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 14);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-02-01');
expect(dateToString(aggregatedData[12]['date'])).toEqual('2018-12-01');
expect(dateToString(aggregatedData[13]['date'])).toEqual('2019-01-01');
expect(aggregatedData[0]['value']).toEqual(9);
expect(aggregatedData[1]['value']).toEqual(0);
expect(aggregatedData[2]['value']).toEqual(1);
expect(aggregatedData[12]['value']).toEqual(4);
expect(aggregatedData[13]['value']).toEqual(5);
});
});
});