Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

collect GitHub activity metrics for reports #1003

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ let
python ${pkgs.writeText "live.py" script}
'';
};
metrics = with lib; collect isDerivation (pkgs.callPackage ./maintainers/metrics { });
update-nix-releases = pkgs.callPackage ./nix/update-nix-releases.nix { };
update-nixpkgs-releases = pkgs.callPackage ./nix/update-nixpkgs-releases.nix { };
in
Expand All @@ -116,6 +117,7 @@ in
inputsFrom = [ nix-dev ];
packages = [
devmode
metrics
update-nix-releases
update-nixpkgs-releases
pkgs.niv
Expand Down
18 changes: 18 additions & 0 deletions maintainers/metrics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# GitHub metrics

These helper tools show activity metrics on GitHub for repositories the documentation team is working on.
The tools are available in the Nix shell environment for this repository.

The `metrics` tool requires a JSON dump of **all** GitHub issues and pull requests from the given repository (this may take a while, since Nixpkgs has more than 300 000 items):

```shell-session
github-dump
```

Then, to view the metrics, run:

```shell-session
metrics
```

and follow the command-line help.
21 changes: 21 additions & 0 deletions maintainers/metrics/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{ python3, lib, writeShellApplication, gh }:
{
github-dump = writeShellApplication {
name = "github-dump";
runtimeInputs = [ gh ];
text = builtins.readFile ./github-dump.sh;
};
metrics = python3.pkgs.buildPythonPackage {
name = "metrics";
propagatedBuildInputs = with python3.pkgs; [
pandas
];
src = with lib.fileset; toSource {
root = ./.;
fileset = unions [
./metrics.py
./setup.py
];
};
};
}
20 changes: 20 additions & 0 deletions maintainers/metrics/github-dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

prs() {
fields="author,labels,state,createdAt,mergedAt,closedAt"
gh pr list --repo "$1" --state all --limit 1000000 --json "$fields"
}

issues() {
fields="author,labels,state,closedAt,createdAt"
gh issue list --repo "$1" --state all --limit 1000000 --json "$fields"
}

#repos=("nixpkgs" "nix" "nix.dev")
repos=("nix" "nix.dev")
for repo in "${repos[@]}"; do
echo fetching pull requests for nixos/"$repo"
prs nixos/"$repo" > "$repo"-prs.json
echo fetching issues for nixos/"$repo"
issues nixos/"$repo" > "$repo"-issues.json
done
69 changes: 69 additions & 0 deletions maintainers/metrics/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python

import pandas as pd
import argparse
import os
from datetime import datetime
from enum import Enum, auto


def valid_path(path):
if not os.path.exists(path):
raise argparse.ArgumentTypeError(f"The file '{path}' does not exist.")
return path


def valid_date(date_string):
try:
# Try to parse the date string into a datetime object
return datetime.fromisoformat(date_string)
except ValueError:
raise argparse.ArgumentTypeError(f"'{date_string}' must be an ISO 8601 date.")


class Interval(Enum):
day = auto()
week = auto()
month = auto()
quarter = auto()


def valid_interval(interval):
try:
return Interval[interval.lower()]
except KeyError:
raise argparse.ArgumentTypeError(
f"'{interval}' is not a valid interval."
f"Valid values:\n{[str(e) + ', ' for e in Interval]}."
)


def main():
parser = argparse.ArgumentParser(description="View metrics on GitHub activities")
parser.add_argument("issues", type=valid_path, help="Path to a JSON file with all issues. Must contain at least the fields: author,labels,state,closedAt,createdAt")
parser.add_argument("pulls", type=valid_path, help="Path to a JSON file with all pull requests. Must contain at least the fields: author,labels,state,createdAt,mergedAt,closedAt")
parser.add_argument('-f', '--from', type=valid_date)
parser.add_argument('-t', '--to', nargs='?', type=valid_date, default=datetime.today().date())
parser.add_argument('-i', '--interval', nargs='?', type=valid_date, default=Interval.month, help=f'The time interval ({", ".join([str(e) for e in Interval])}). Default is monthly.')
parser.add_argument('-l', '--labels', nargs='*', type=str)

args = parser.parse_args()

issues = pd.read_json(args.issues)
pulls = pd.read_json(args.pulls)

# add a new column so it's easier to access
pulls["author_login"] = pulls["author"].apply(lambda x: x['login'])
# overwrite string dates with the parsed values
pulls.loc[:, 'mergedAt'] = pd.to_datetime(pulls['mergedAt'])
pulls.loc[:, 'createdAt'] = pd.to_datetime(pulls['createdAt'])
pulls.loc[:, 'closedAt'] = pd.to_datetime(pulls['closedAt'])

merged = pulls.dropna(subset=['mergedAt'])
first_merge = merged.groupby('author_login')['mergedAt'].min().reset_index()
# TODO: actual metrics along the lines of https://www.tweag.io/blog/2024-05-02-right-words-right-place/
print(first_merge.sort_values(by="mergedAt"))

if __name__ == '__main__':
main()

11 changes: 11 additions & 0 deletions maintainers/metrics/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from setuptools import setup

setup(
name='metrics',
py_modules=['metrics'],
entry_points={
'console_scripts': [
'metrics = metrics:main',
],
},
)
Loading