Skip to content
This repository has been archived by the owner on Nov 5, 2024. It is now read-only.

Commit

Permalink
Initial Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
snadi committed Oct 20, 2023
0 parents commit 7e85061
Show file tree
Hide file tree
Showing 88 changed files with 10,266 additions and 0 deletions.
211 changes: 211 additions & 0 deletions .github/workflows/run_experiment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
name: Run Upgraider Experiment

on:
workflow_dispatch:
inputs:
model:
description: "Model to use for fixing (gpt-3.5, gpt-4)"
type: string
default: "gpt-3.5"
useModelOnly:
description: "Run experiment with no external sources"
type: boolean
default: false
useDoc:
description: "Run experiment with references from Documentation/release notes"
type: boolean
default: true
compareTo:
description: "Run number of previous run to compare to (leave empty to skip comparison)"
default: ""
simthreshold:
description: "Similarity threshold for retrieval"
default: "0" # include all info
debug_enabled:
type: boolean
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
default: false
jobs:
setup:
runs-on: ubuntu-latest
outputs:
libraries: "${{ steps.parse_libraries.outputs.libraries }}"
model: "${{ github.event.inputs.model }}"
useModelOnly: "${{ github.event.inputs.useModelOnly || false }}"
useDoc: "${{ github.event.inputs.useDoc || true }}"
threshold: "${{ github.event.inputs.simthreshold || 0 }}"
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: '3.10'

- run: |
pip install -r requirements.txt
python setup.py develop
- id: parse_libraries
run: |
libraries=$(python ${GITHUB_WORKSPACE}/src/benchmark/list_libraries.py)
echo "got libraries $libraries"
echo "libraries=$libraries" >> $GITHUB_OUTPUT
benchmark:
needs:
- setup
runs-on: ubuntu-latest
continue-on-error: true
strategy:
fail-fast: false
matrix:
library: ${{ fromJson(needs.setup.outputs.libraries) }}
steps:
- name: Checkout github repo (+ download lfs dependencies)
uses: actions/checkout@v3
with:
lfs: true

- name: Pull LFS objects
run: git lfs pull

- uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
pip install -r requirements.txt
python setup.py develop
- name: Setup scratch venv
run: |
curr_dir=`pwd`
SCRATCH_VENV="$curr_dir/../scratchvenv"
echo "SCRATCH_VENV=$SCRATCH_VENV" >> $GITHUB_ENV
mkdir $SCRATCH_VENV
cd $SCRATCH_VENV
python -m venv .venv
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

- name: Run example update with no sources
if: ${{ github.event.inputs.useModelOnly == 'true' }}
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
run: |
library_name=${{ matrix.library.name }}
curr_dir=`pwd`
outputdir="$curr_dir/results/$library_name/modelonly"
mkdir -p $outputdir
python src/upgraider/fix_lib_examples.py \
--libpath ${{ matrix.library.path }} \
--outputDir $outputdir \
--dbsource modelonly \
--threshold ${{ needs.setup.outputs.threshold }} \
--model ${{ needs.setup.outputs.model }} \
- name: Run example update with doc sources
if: ${{ needs.setup.outputs.useDoc == 'true' }}
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
run: |
library_name=${{ matrix.library.name }}
curr_dir=`pwd`
outputdir="$curr_dir/results/$library_name/doc"
mkdir -p $outputdir
python src/upgraider/fix_lib_examples.py \
--libpath ${{ matrix.library.path }} \
--outputDir $outputdir \
--dbsource doc \
--threshold ${{ needs.setup.outputs.threshold }} \
--model ${{ needs.setup.outputs.model }} \
- name: Zip up results
run: |
zip -r results.zip results
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: results-${{ matrix.library.name }}
path: "results.zip"

combine_output:
name: Combine output from all benchmarks
needs:
- benchmark
runs-on: ubuntu-latest
steps:
- name: Download output zips
uses: actions/download-artifact@v3

- name: Combine output zips
run: |
mkdir results
for zip in results-*/results.zip
do
unzip -oq $zip
done
zip -r results.zip results
- name: Upload combined output files
uses: actions/upload-artifact@v3
with:
name: results-all
path: results.zip

generate-report:
needs:
- combine_output
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
pip install -r requirements.txt
python setup.py develop
- name: Download artifacts for this run
uses: actions/download-artifact@v3
with:
name: results-all
path: results

- name: Download artifacts for comparison run
if: ${{ github.event.inputs.compareTo != '' }}
uses: dawidd6/action-download-artifact@v2
with:
run_number: ${{ github.event.inputs.compareTo }}
name: results-all
path: baseline

- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

- name: Generate report
run: |
cd results
unzip results.zip
cd ..
if [ -d baseline ]; then
cd baseline
unzip results.zip
cd ..
python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results --baselinedir baseline/results > $GITHUB_STEP_SUMMARY
else
python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results > $GITHUB_STEP_SUMMARY
fi
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
*.swp
__pycache__
.env
.venv
.tox
.ipynb_checkpoints/
.DS_Store
src/soretrieval.egg-info/
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 GitHub

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
64 changes: 64 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# UpgrAIder: Automatically Updating Deprecated API Usage through LLMs and documentation retrieval

The goal of this project is to update outdated code snippets (specifically those that use deprecated library APIs). The technique relies on the usage of a Large Language Model (hence the "AI" in the name), augmented with information retrieved from release notes. More details about the project can be found in [this presentation](https://github.com/githubnext/Upgraider/blob/main/Show-and-Tell/Nadi_ShowAndTell.pdf)

## Setup

- `git clone <this repo>`

- Install dependencies:

```
python -m venv .venv
source .venv/bin/activate
pip install -r requirements
python setup.py develop
```

- Create environment variables
- You will need an OpenAI key to run this project.
- When running evaluation experiments, we use a separate virtual environment to install the specific version of the library we want to analyze. Create a virtual environment in a separate folder from this project and include its path in the `.env file` (`SCRATCH_VENV`)
- Create a `.env` file to hold these environment variables:

```
cat > .env <<EOL
OPENAI_API_KEY=...
OPENAI_ORG=...
SCRATCH_VENV=<path to a folder that already has a venv we can activate>
```

## Running

### Populating the DB

To populate the database with the information of the available release notes for each library, run `python src/upgraider/populate_doc_db.py`

Note that this is a one time step (unless you add libraries or release notes). The `libraries` folder contains information for all current target libraries, including the code examples we evaluate on. Each library folder contains a `library.json` file that specifies the base version, which is the library version available around the training date of the model (~ May 2022) and the current version of the library. The base version is useful to know which release notes to consider (those after that date) while the current version is useful since this is the one we want to use for our experiments.

Right now, each library folder already contains the release notes between the base and current library version. These were manually retrieved; in the future, it would be useful to create a script that automatically retrieves release notes for a given library.

The above script looks for sections with certain keywords related to APIs and/or deprecation. It then creates a DB entry which has an embedding for the content of each item in those sections.

### Updating a single code example

`src/upgraider/fix_code_examples.py` is the file responsible for this. Run `python upgraider/fix_lib_examples.py --help` to see the required command lines. To run a single example, make sure to specify `--examplefile`; otherwise, it will run on all the examples available for that library.

### Running a full experiment

Run `python src/upgraider/run_experiment.py`. This will attempt to run upgraider on *all* code examples avaiable for *all* libraries in the `libraries` folder. The output data and reports will be written to the `output` folder.

### Using Actions to run experiments

The `run_experiment` workflow allows you to run a full experiment on the available libraries. It produces a markdown report of the results. Note that you need to set the required environment variables (i.e., API keys etc) as repository secrets.

### Running Tests

`python -m pytest`

## Extra Functionality

Experimental/not current used any more: To find differences between two versions of an API, you can run

`python src/apiexploration/run_api_diff.py`

which will use the library version info in the `libraries` folders.
Binary file added Show-and-Tell/Nadi_ShowAndTell.pdf
Binary file not shown.
6 changes: 6 additions & 0 deletions libraries/networkx/examples/Graph.from_numpy_matrix().py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import networkx as nx
import numpy as np

A = np.matrix([[0, 1, 1, 0, 0], [1, 0, 1, 1, 0], [1, 1, 0, 1, 1], [0, 1, 1, 0, 1], [0, 0, 1, 1, 0]])
G = nx.from_numpy_matrix(A)
print(G.edges)
8 changes: 8 additions & 0 deletions libraries/networkx/examples/Graph.to_numpy_matrix().py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import networkx as nx
import numpy as np

G = nx.Graph()
G.add_nodes_from([1, 2, 3, 4, 5])
G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (3, 5), (4, 5)])
matrix = nx.to_numpy_matrix(G)
print(matrix)
6 changes: 6 additions & 0 deletions libraries/networkx/examples/OrderedGraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import networkx as nx

SG=nx.OrderedGraph()
SG.add_nodes_from("HelloWorld")
SG.add_edges_from([(0, 1), (1, 2), (3,4), (6,8)])
print(SG)
6 changes: 6 additions & 0 deletions libraries/networkx/library.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "networkx",
"ghurl": "https://github.com/networkx/networkx",
"baseversion": "2.8.2",
"currentversion": "3.0"
}
56 changes: 56 additions & 0 deletions libraries/networkx/releasenotes/release_2.8.3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
NetworkX 2.8.3
==============

Release date: 4 June 2022

Supports Python 3.8, 3.9, and 3.10.

NetworkX is a Python package for the creation, manipulation, and study of the
structure, dynamics, and functions of complex networks.

For more information, please visit our `website <https://networkx.org/>`_
and our :ref:`gallery of examples <examples_gallery>`.
Please send comments and questions to the `networkx-discuss mailing list
<http://groups.google.com/group/networkx-discuss>`_.

Highlights
----------

Minor documentation and bug fixes.

Merged PRs
----------

- Bump release version
- Update release process
- added example to closeness.py (#5645)
- Extract valid kwds from the function signature for draw_networkx_* (#5660)
- Error out when pydot fails to correctly parse node names (#5667)
- Remove redundant py2 numeric conversions (#5661)
- Correcting a typo in the references (#5677)
- Add workaround for pytest failures on 3.11-beta2 (#5680)
- Moved random_spanning_tree to public API (#5656)
- More tests for clustering (upstreaming from graphblas-algorithms) (#5673)
- Remove unused logic in nonisomorphic_trees (#5682)
- equitable_coloring: Get lazily first item instead of creating whole list (#5668)
- Update subgraph views tests to pass with out of order execution (#5683)
- Use isort with pre-commit to enforce import guidelines (#5659)
- ignore isort commit from git blame (#5684)
- Another catch by pytest-randomly (#5685)
- Remove unused file from utils.test (#5687)
- Update release requirements (#5690)
- Update developer requirements (#5689)
- Fix old release notes

Contributors
------------

- Ross Barnowski
- Jon Crall
- Lukong123
- Jarrod Millman
- RATCOinc
- Matt Schwennesen
- Mridul Seth
- Matus Valo
- Erik Welch
Loading

0 comments on commit 7e85061

Please sign in to comment.