Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial version of github metrics tables and updates to legacy bigquery tables to avoid windowing issues #1222

Merged
merged 19 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,6 @@ jobs:
OPLABS_RUNTIME: "gha"
OPLABS_ENV: "prod"

- run:
name: GitHub Analytics
command: uv run opdata pulls github_analytics
environment:
OPLABS_RUNTIME: "gha"
OPLABS_ENV: "prod"

- run:
name: GrowThePie Chain Summary
command: uv run opdata pulls growthepie_chain_summary
Expand Down
2 changes: 1 addition & 1 deletion helm/dagster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ dagster-user-deployments:
# deployment pod.
dagsterApiGrpcArgs:
- "--module-name"
- "op_analytics.dagster"
- "op_analytics.dagster.defs"
port: 3030

# Whether or not to include configuration specified for this user code deployment in the pods
Expand Down
114 changes: 114 additions & 0 deletions notebooks/adhoc/platform_github_metrics/utilities_prototyping.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2m2025-01-14 06:49:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mloaded vault from .env file \u001b[0m \u001b[36mcounter\u001b[0m=\u001b[35m001/009\u001b[0m \u001b[36meta\u001b[0m=\u001b[35mNone\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mvault.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m32\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m22565\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-14 06:49:00\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mloaded vault: 18 items \u001b[0m \u001b[36mcounter\u001b[0m=\u001b[35m001/009\u001b[0m \u001b[36meta\u001b[0m=\u001b[35mNone\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mvault.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m76\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m22565\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Request GET /repos/ethereum-optimism/optimism failed with 403: Forbidden\n",
"Setting next backoff to 1640.40558s\n",
"Request GET /repos/ethereum-optimism/supersim failed with 403: Forbidden\n",
"Setting next backoff to 1640.403205s\n"
]
}
],
"source": [
"from op_analytics.cli.subcommands.pulls.github.actvity.allrepos import (\n",
" GithubActivityData,\n",
")\n",
"\n",
"\n",
"dfs = GithubActivityData.fetch(\n",
" closed_items_last_n_days=4000,\n",
" current_dt=\"2025-01-14\",\n",
" repo_concurrent_workers=2,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'dfs' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdfs\u001b[49m\u001b[38;5;241m.\u001b[39mprs\n",
"\u001b[0;31mNameError\u001b[0m: name 'dfs' is not defined"
]
}
],
"source": [
"dfs.prs"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2m2025-01-13 19:31:02\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfetched 116 open in 3.89s \u001b[0m \u001b[36mfetch\u001b[0m=\u001b[35mPULLS\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m172\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-13 19:31:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfound 157 closed after 2024-12-22 in 6.07s\u001b[0m \u001b[36mfetch\u001b[0m=\u001b[35mPULLS\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m186\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-13 19:31:19\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfetched 497 open in 9.95s \u001b[0m \u001b[36mfetch\u001b[0m=\u001b[35mISSUES\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m172\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-13 19:31:20\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfound 17 closed after 2024-12-22 in 0.58s\u001b[0m \u001b[36mfetch\u001b[0m=\u001b[35mISSUES\u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m186\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-13 19:31:20\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfetching comments for 273 prs \u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m90\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n",
"\u001b[2m2025-01-13 19:33:54\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfetching reviews for 273 prs \u001b[0m \u001b[36mfilename\u001b[0m=\u001b[35mgithubapi.py\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m97\u001b[0m \u001b[36mprocess\u001b[0m=\u001b[35m90324\u001b[0m \u001b[36mrepo\u001b[0m=\u001b[35moptimism\u001b[0m\n"
]
}
],
"source": [
"from op_analytics.cli.subcommands.pulls.github.actvity.singlerepo import (\n",
" GithubRepoActivityData,\n",
")\n",
"\n",
"\n",
"dfs = GithubRepoActivityData.fetch(\n",
" \"optimism\",\n",
" current_dt=\"2025-01-01\",\n",
" last_n_days=10,\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"py-markdown-table>=1.1.0",
"pyarrow>=17.0.0",
"pydantic>=2.9.2",
"pygithub>=2.5.0",
"pyyaml>=6.0.2",
"stamina>=24.3.0",
"structlog>=24.4.0",
Expand Down Expand Up @@ -85,7 +86,6 @@ only-include = ["src/op_analytics"]
[tool.hatch.build.targets.wheel]
packages = ["src/op_analytics"]


[tool.dagster]
module_name = "op_analytics.dagster" ## name of project's Python module and where to find the definitions
module_name = "op_analytics.dagster.defs" ## name of project's Python module and where to find the definitions
code_location_name = "dev_code_location"
2 changes: 1 addition & 1 deletion src/op_analytics/cli/subcommands/chains/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def noargs_blockbatch():
"""No-args command to run blockbatch models."""
compute_blockbatch(
chains=normalize_chains("ALL"),
models=normalize_blockbatch_models("ALL"),
models=normalize_blockbatch_models("MODELS"),
range_spec="m8hours",
read_from=DataLocation.GCS,
write_to=DataLocation.GCS,
Expand Down
7 changes: 4 additions & 3 deletions src/op_analytics/cli/subcommands/pulls/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .defillama.protocols import pull_protocol_tvl
from .defillama.stablecoins import pull_stablecoins
from .defillama.volume_fees_revenue import execute_pull, write_to_clickhouse
from .github_analytics import pull_github_analytics
from .github import execute as github_execute
from .growthepie.chains_daily_fundamentals import pull_growthepie_summary
from .l2beat import pull_l2beat

Expand Down Expand Up @@ -65,9 +65,10 @@ def defillama_protocol_tvl():


@app.command()
def github_analytics():
def github():
"""Pull repo analytics data from GitHub."""
pull_github_analytics()
github_execute.execute_pull_traffic()
github_execute.execute_pull_activity()


@app.command()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@

import polars as pl

from op_analytics.coreutils.bigquery.write import (
most_recent_dates,
)
from op_analytics.coreutils.bigquery.write import most_recent_dates
from op_analytics.coreutils.logger import structlog
from op_analytics.coreutils.request import get_data, new_session
from op_analytics.coreutils.threads import run_concurrently
from op_analytics.coreutils.time import now_dt

from .dataaccess import DefiLlama
from .dexs.by_chain import get_chain_breakdown_df, get_chain_df
from .dexs.protocols import get_protocols_df
from .dexs.by_chain import get_chain_df, get_chain_breakdown_df

log = structlog.get_logger()

Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from dataclasses import dataclass

import polars as pl

from op_analytics.coreutils.logger import structlog
from op_analytics.coreutils.threads import run_concurrently
from op_analytics.coreutils.time import now_dt

from .singlerepo import GithubRepoActivityData

log = structlog.get_logger()


# Repos to track
REPOS = [
"optimism",
"supersim",
"superchainerc20-starter",
"superchain-registry",
"superchain-ops",
"docs",
"specs",
"design-docs",
"infra",
]


# Nuber of days to look back when fetching closed PRs or Issues.
CLOSED_ITEMS_LAST_N_DAYS = 7


@dataclass
class GithubActivityData:
prs: pl.DataFrame
issues: pl.DataFrame

pr_comments: pl.DataFrame
pr_reviews: pl.DataFrame

@classmethod
def fetch(
cls,
current_dt: str | None = None,
closed_items_last_n_days: int | None = None,
repo_concurrent_workers: int = 4,
) -> "GithubActivityData":
current_dt = current_dt or now_dt()
closed_items_last_n_days = closed_items_last_n_days or CLOSED_ITEMS_LAST_N_DAYS

# Fetch analytics for all repos.
repo_dfs: dict[str, GithubRepoActivityData] = run_concurrently(
lambda repo: GithubRepoActivityData.fetch(
repo=repo,
current_dt=current_dt,
closed_items_last_n_days=closed_items_last_n_days,
),
targets=REPOS,
max_workers=repo_concurrent_workers,
)

# Consolidate into one dataframe per table for all repos.
return cls(
prs=pl.concat([_.prs for _ in repo_dfs.values()]),
issues=pl.concat([_.issues for _ in repo_dfs.values()]),
pr_comments=pl.concat([_.pr_comments for _ in repo_dfs.values()]),
pr_reviews=pl.concat([_.pr_reviews for _ in repo_dfs.values()]),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import polars as pl
from github.PullRequestComment import PullRequestComment

from .pullrequests import user_to_row

COMMENTS_SCHEMA = pl.Schema(
[
("pr_number", pl.Int64),
("id", pl.Int64),
("body", pl.String),
("author_association", pl.String),
("created_at", pl.String),
("updated_at", pl.String),
("user", pl.Struct({"login": pl.String, "id": pl.Int64})),
]
)


def comment_to_row(pr_number: int, comment: PullRequestComment) -> dict:
return {
"pr_number": pr_number,
"id": comment._rawData["id"],
"body": comment._rawData["body"],
"author_association": comment._rawData["author_association"],
"created_at": comment._rawData["created_at"],
"updated_at": comment._rawData["updated_at"],
"user": user_to_row(comment._rawData["user"]),
}
Loading
Loading