From 5234d20acc7ea04dc1a4aa296279b747a112c407 Mon Sep 17 00:00:00 2001 From: Valentin Gagarin Date: Tue, 18 Jun 2024 13:15:19 +0200 Subject: [PATCH] WIP: collect GitHub activity metrics for reports --- default.nix | 2 + maintainers/metrics/README.md | 18 ++++++++ maintainers/metrics/default.nix | 21 +++++++++ maintainers/metrics/github-dump.sh | 20 +++++++++ maintainers/metrics/metrics.py | 69 ++++++++++++++++++++++++++++++ maintainers/metrics/setup.py | 11 +++++ 6 files changed, 141 insertions(+) create mode 100644 maintainers/metrics/README.md create mode 100644 maintainers/metrics/default.nix create mode 100755 maintainers/metrics/github-dump.sh create mode 100755 maintainers/metrics/metrics.py create mode 100644 maintainers/metrics/setup.py diff --git a/default.nix b/default.nix index 314f4e503..00d853c7a 100644 --- a/default.nix +++ b/default.nix @@ -105,6 +105,7 @@ let python ${pkgs.writeText "live.py" script} ''; }; + metrics = with lib; collect isDerivation (pkgs.callPackage ./maintainers/metrics { }); update-nix-releases = pkgs.callPackage ./nix/update-nix-releases.nix { }; update-nixpkgs-releases = pkgs.callPackage ./nix/update-nixpkgs-releases.nix { }; in @@ -116,6 +117,7 @@ in inputsFrom = [ nix-dev ]; packages = [ devmode + metrics update-nix-releases update-nixpkgs-releases pkgs.niv diff --git a/maintainers/metrics/README.md b/maintainers/metrics/README.md new file mode 100644 index 000000000..d74794299 --- /dev/null +++ b/maintainers/metrics/README.md @@ -0,0 +1,18 @@ +# GitHub metrics + +These helper tools show activity metrics on GitHub for repositories the documentation team is working on. +The tools are available in the Nix shell environment for this repository. + +The `metrics` tool requires a JSON dump of **all** GitHub issues and pull requests from the given repository (this may take a while, since Nixpkgs has more than 300 000 items): + +```shell-session +github-dump +``` + +Then, to view the metrics, run: + +```shell-session +metrics +``` + +and follow the command-line help. diff --git a/maintainers/metrics/default.nix b/maintainers/metrics/default.nix new file mode 100644 index 000000000..6a05f55f6 --- /dev/null +++ b/maintainers/metrics/default.nix @@ -0,0 +1,21 @@ +{ python3, lib, writeShellApplication, gh }: +{ + github-dump = writeShellApplication { + name = "github-dump"; + runtimeInputs = [ gh ]; + text = builtins.readFile ./github-dump.sh; + }; + metrics = python3.pkgs.buildPythonPackage { + name = "metrics"; + propagatedBuildInputs = with python3.pkgs; [ + pandas + ]; + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./metrics.py + ./setup.py + ]; + }; + }; +} diff --git a/maintainers/metrics/github-dump.sh b/maintainers/metrics/github-dump.sh new file mode 100755 index 000000000..72bf8dc25 --- /dev/null +++ b/maintainers/metrics/github-dump.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +prs() { + fields="author,labels,state,createdAt,mergedAt,closedAt" + gh pr list --repo "$1" --state all --limit 1000000 --json "$fields" +} + +issues() { + fields="author,labels,state,closedAt,createdAt" + gh issue list --repo "$1" --state all --limit 1000000 --json "$fields" +} + +#repos=("nixpkgs" "nix" "nix.dev") +repos=("nix" "nix.dev") +for repo in "${repos[@]}"; do + echo fetching pull requests for nixos/"$repo" + prs nixos/"$repo" > "$repo"-prs.json + echo fetching issues for nixos/"$repo" + issues nixos/"$repo" > "$repo"-issues.json +done diff --git a/maintainers/metrics/metrics.py b/maintainers/metrics/metrics.py new file mode 100755 index 000000000..3416bbd4e --- /dev/null +++ b/maintainers/metrics/metrics.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +import pandas as pd +import argparse +import os +from datetime import datetime +from enum import Enum, auto + + +def valid_path(path): + if not os.path.exists(path): + raise argparse.ArgumentTypeError(f"The file '{path}' does not exist.") + return path + + +def valid_date(date_string): + try: + # Try to parse the date string into a datetime object + return datetime.fromisoformat(date_string) + except ValueError: + raise argparse.ArgumentTypeError(f"'{date_string}' must be an ISO 8601 date.") + + +class Interval(Enum): + day = auto() + week = auto() + month = auto() + quarter = auto() + + +def valid_interval(interval): + try: + return Interval[interval.lower()] + except KeyError: + raise argparse.ArgumentTypeError( + f"'{interval}' is not a valid interval." + f"Valid values:\n{[str(e) + ', ' for e in Interval]}." + ) + + +def main(): + parser = argparse.ArgumentParser(description="View metrics on GitHub activities") + parser.add_argument("issues", type=valid_path, help="Path to a JSON file with all issues. Must contain at least the fields: author,labels,state,closedAt,createdAt") + parser.add_argument("pulls", type=valid_path, help="Path to a JSON file with all pull requests. Must contain at least the fields: author,labels,state,createdAt,mergedAt,closedAt") + parser.add_argument('-f', '--from', type=valid_date) + parser.add_argument('-t', '--to', nargs='?', type=valid_date, default=datetime.today().date()) + parser.add_argument('-i', '--interval', nargs='?', type=valid_date, default=Interval.month, help=f'The time interval ({", ".join([str(e) for e in Interval])}). Default is monthly.') + parser.add_argument('-l', '--labels', nargs='*', type=str) + + args = parser.parse_args() + + issues = pd.read_json(args.issues) + pulls = pd.read_json(args.pulls) + + # add a new column so it's easier to access + pulls["author_login"] = pulls["author"].apply(lambda x: x['login']) + # overwrite string dates with the parsed values + pulls.loc[:, 'mergedAt'] = pd.to_datetime(pulls['mergedAt']) + pulls.loc[:, 'createdAt'] = pd.to_datetime(pulls['createdAt']) + pulls.loc[:, 'closedAt'] = pd.to_datetime(pulls['closedAt']) + + merged = pulls.dropna(subset=['mergedAt']) + first_merge = merged.groupby('author_login')['mergedAt'].min().reset_index() + # TODO: actual metrics along the lines of https://www.tweag.io/blog/2024-05-02-right-words-right-place/ + print(first_merge.sort_values(by="mergedAt")) + +if __name__ == '__main__': + main() + diff --git a/maintainers/metrics/setup.py b/maintainers/metrics/setup.py new file mode 100644 index 000000000..08fc320c3 --- /dev/null +++ b/maintainers/metrics/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup( + name='metrics', + py_modules=['metrics'], + entry_points={ + 'console_scripts': [ + 'metrics = metrics:main', + ], + }, +)