Initial Commit

githubnext · Oct 20, 2023 · 7e85061 · 7e85061
commit 7e85061
Show file tree

Hide file tree

Showing 88 changed files with 10,266 additions and 0 deletions.
diff --git a/.github/workflows/run_experiment.yml b/.github/workflows/run_experiment.yml
@@ -0,0 +1,211 @@
+name: Run Upgraider Experiment
+
+on:
+  workflow_dispatch:
+    inputs:
+      model:
+        description: "Model to use for fixing (gpt-3.5, gpt-4)"
+        type: string
+        default: "gpt-3.5"
+      useModelOnly:
+          description: "Run experiment with no external sources"
+          type: boolean
+          default: false
+      useDoc:
+        description: "Run experiment with references from Documentation/release notes"
+        type: boolean
+        default: true
+      compareTo:
+        description: "Run number of previous run to compare to (leave empty to skip comparison)"
+        default: ""
+      simthreshold:
+        description: "Similarity threshold for retrieval"
+        default: "0" # include all info
+      debug_enabled:
+        type: boolean
+        description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
+        default: false
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    outputs:
+      libraries: "${{ steps.parse_libraries.outputs.libraries }}"
+      model: "${{ github.event.inputs.model }}"
+      useModelOnly: "${{ github.event.inputs.useModelOnly || false }}"
+      useDoc: "${{ github.event.inputs.useDoc || true }}"
+      threshold: "${{ github.event.inputs.simthreshold || 0 }}"
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - run: |
+          pip install -r requirements.txt
+          python setup.py develop
+
+      - id: parse_libraries
+        run: |
+          libraries=$(python ${GITHUB_WORKSPACE}/src/benchmark/list_libraries.py)
+          echo "got libraries $libraries"
+          echo "libraries=$libraries" >> $GITHUB_OUTPUT
+
+  benchmark:
+    needs:
+      - setup
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      matrix:
+        library: ${{ fromJson(needs.setup.outputs.libraries) }}
+    steps:      
+      - name: Checkout github repo (+ download lfs dependencies)
+        uses: actions/checkout@v3
+        with:
+          lfs: true
+
+      - name: Pull LFS objects
+        run: git lfs pull
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+          python setup.py develop
+      
+      - name: Setup scratch venv
+        run: |
+          curr_dir=`pwd`
+          SCRATCH_VENV="$curr_dir/../scratchvenv"
+          echo "SCRATCH_VENV=$SCRATCH_VENV" >> $GITHUB_ENV
+          mkdir $SCRATCH_VENV
+          cd $SCRATCH_VENV
+          python -m venv .venv
+
+      - name: Setup tmate session
+        uses: mxschmitt/action-tmate@v3
+        if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
+
+      - name: Run example update with no sources
+        if: ${{ github.event.inputs.useModelOnly == 'true' }}
+        env:
+          OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
+          OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
+          GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
+          GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
+        run: |
+          library_name=${{ matrix.library.name }}
+          curr_dir=`pwd`
+          outputdir="$curr_dir/results/$library_name/modelonly"
+          mkdir -p $outputdir 
+          python src/upgraider/fix_lib_examples.py \
+            --libpath ${{ matrix.library.path }} \
+            --outputDir $outputdir \
+            --dbsource modelonly \
+            --threshold ${{ needs.setup.outputs.threshold }} \
+            --model ${{ needs.setup.outputs.model }} \
+
+      - name: Run example update with doc sources
+        if: ${{ needs.setup.outputs.useDoc == 'true' }}
+        env:
+          OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
+          OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
+          GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
+          GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
+        run: |
+          library_name=${{ matrix.library.name }}
+          curr_dir=`pwd`
+          outputdir="$curr_dir/results/$library_name/doc"
+          mkdir -p $outputdir 
+          python src/upgraider/fix_lib_examples.py \
+            --libpath ${{ matrix.library.path }} \
+            --outputDir $outputdir \
+            --dbsource doc \
+            --threshold ${{ needs.setup.outputs.threshold }} \
+            --model ${{ needs.setup.outputs.model }} \
+
+      - name: Zip up results
+        run: |
+          zip -r results.zip results
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: results-${{ matrix.library.name }}
+          path: "results.zip"
+
+  combine_output:
+    name: Combine output from all benchmarks
+    needs:
+      - benchmark
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download output zips
+        uses: actions/download-artifact@v3
+
+      - name: Combine output zips
+        run: |
+          mkdir results
+          for zip in results-*/results.zip
+          do
+            unzip -oq $zip
+          done
+          zip -r results.zip results
+      - name: Upload combined output files
+        uses: actions/upload-artifact@v3
+        with:
+          name: results-all
+          path: results.zip
+
+  generate-report:
+    needs:
+      - combine_output
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+          python setup.py develop
+
+      - name: Download artifacts for this run
+        uses: actions/download-artifact@v3
+        with:
+          name: results-all
+          path: results
+
+      - name: Download artifacts for comparison run
+        if: ${{ github.event.inputs.compareTo != '' }}
+        uses: dawidd6/action-download-artifact@v2
+        with:
+          run_number: ${{ github.event.inputs.compareTo }}
+          name: results-all
+          path: baseline
+
+      - name: Setup tmate session
+        uses: mxschmitt/action-tmate@v3
+        if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
+
+      - name: Generate report
+        run: |
+          cd results
+          unzip results.zip
+          cd ..
+          if [ -d baseline ]; then
+            cd baseline
+            unzip results.zip
+            cd .. 
+            python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results --baselinedir baseline/results > $GITHUB_STEP_SUMMARY
+          else
+            python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results > $GITHUB_STEP_SUMMARY
+          fi
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+*.swp
+__pycache__
+.env
+.venv
+.tox
+.ipynb_checkpoints/
+.DS_Store
+src/soretrieval.egg-info/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 GitHub
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,64 @@
+# UpgrAIder: Automatically Updating Deprecated API Usage through LLMs and documentation retrieval 
+
+The goal of this project is to update outdated code snippets (specifically those that use deprecated library APIs). The technique relies on the usage of a Large Language Model (hence the "AI" in the name), augmented with information retrieved from release notes. More details about the project can be found in [this presentation](https://github.com/githubnext/Upgraider/blob/main/Show-and-Tell/Nadi_ShowAndTell.pdf)
+
+## Setup
+
+- `git clone <this repo>`
+
+- Install dependencies:
+
+```
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements
+python setup.py develop
+```
+
+- Create environment variables
+	- You will need an OpenAI key to run this project. 	
+	- When running evaluation experiments, we use a separate virtual environment to install the specific version of the library we want to analyze. Create a virtual environment in a separate folder from this project and include its path in the `.env file` (`SCRATCH_VENV`) 
+	- Create a `.env` file to hold these environment variables:
+
+	```
+	cat > .env <<EOL
+	OPENAI_API_KEY=...
+	OPENAI_ORG=...
+	SCRATCH_VENV=<path to a folder that already has a venv we can activate>
+	```
+
+## Running
+
+### Populating the DB
+
+To populate the database with the information of the available release notes for each library, run `python src/upgraider/populate_doc_db.py`
+
+Note that this is a one time step (unless you add libraries or release notes). The `libraries` folder contains information for all current target libraries, including the code examples we evaluate on. Each library folder contains a `library.json` file that specifies the base version, which is the library version available around the training date of the model (~ May 2022) and the current version of the library. The base version is useful to know which release notes to consider (those after that date) while the current version is useful since this is the one we want to use for our experiments.
+
+Right now, each library folder already contains the release notes between the base and current library version. These were manually retrieved; in the future, it would be useful to create a script that automatically retrieves release notes for a given library.
+
+The above script looks for sections with certain keywords related to APIs and/or deprecation. It then creates a DB entry which has an embedding for the content of each item in those sections.
+
+### Updating a single code example
+
+`src/upgraider/fix_code_examples.py` is the file responsible for this. Run `python upgraider/fix_lib_examples.py --help` to see the required command lines. To run a single example, make sure to specify `--examplefile`; otherwise, it will run on all the examples available for that library.
+
+### Running a full experiment
+
+Run `python src/upgraider/run_experiment.py`. This will attempt to run upgraider on *all* code examples avaiable for *all* libraries in the `libraries` folder. The output data and reports will be written to the `output` folder.
+
+### Using Actions to run experiments
+
+The `run_experiment` workflow allows you to run a full experiment on the available libraries. It produces a markdown report of the results. Note that you need to set the required environment variables (i.e., API keys etc) as repository secrets.
+
+### Running Tests
+
+`python -m pytest`
+
+## Extra Functionality
+
+Experimental/not current used any more: To find differences between two versions of an API, you can run
+
+`python src/apiexploration/run_api_diff.py`
+
+which will use the library version info in the `libraries` folders.
diff --git a/Show-and-Tell/Nadi_ShowAndTell.pdf b/Show-and-Tell/Nadi_ShowAndTell.pdf
diff --git a/libraries/networkx/examples/Graph.from_numpy_matrix().py b/libraries/networkx/examples/Graph.from_numpy_matrix().py
@@ -0,0 +1,6 @@
+import networkx as nx
+import numpy as np
+
+A = np.matrix([[0, 1, 1, 0, 0], [1, 0, 1, 1, 0], [1, 1, 0, 1, 1], [0, 1, 1, 0, 1], [0, 0, 1, 1, 0]])
+G = nx.from_numpy_matrix(A)
+print(G.edges)
diff --git a/libraries/networkx/examples/Graph.to_numpy_matrix().py b/libraries/networkx/examples/Graph.to_numpy_matrix().py
@@ -0,0 +1,8 @@
+import networkx as nx
+import numpy as np
+
+G = nx.Graph()
+G.add_nodes_from([1, 2, 3, 4, 5])
+G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (3, 5), (4, 5)])
+matrix = nx.to_numpy_matrix(G)
+print(matrix)
diff --git a/libraries/networkx/examples/OrderedGraph.py b/libraries/networkx/examples/OrderedGraph.py
@@ -0,0 +1,6 @@
+import networkx as nx
+
+SG=nx.OrderedGraph()
+SG.add_nodes_from("HelloWorld")
+SG.add_edges_from([(0, 1), (1, 2), (3,4), (6,8)])
+print(SG)
diff --git a/libraries/networkx/library.json b/libraries/networkx/library.json
@@ -0,0 +1,6 @@
+{
+    "name": "networkx",
+    "ghurl": "https://github.com/networkx/networkx",
+    "baseversion": "2.8.2",
+    "currentversion": "3.0"
+}
diff --git a/libraries/networkx/releasenotes/release_2.8.3.rst b/libraries/networkx/releasenotes/release_2.8.3.rst
@@ -0,0 +1,56 @@
+NetworkX 2.8.3
+==============
+
+Release date: 4 June 2022
+
+Supports Python 3.8, 3.9, and 3.10.
+
+NetworkX is a Python package for the creation, manipulation, and study of the
+structure, dynamics, and functions of complex networks.
+
+For more information, please visit our `website <https://networkx.org/>`_
+and our :ref:`gallery of examples <examples_gallery>`.
+Please send comments and questions to the `networkx-discuss mailing list
+<http://groups.google.com/group/networkx-discuss>`_.
+
+Highlights
+----------
+
+Minor documentation and bug fixes.
+
+Merged PRs
+----------
+
+- Bump release version
+- Update release process
+- added example to closeness.py (#5645)
+- Extract valid kwds from the function signature for draw_networkx_* (#5660)
+- Error out when pydot fails to correctly parse node names (#5667)
+- Remove redundant py2 numeric conversions (#5661)
+- Correcting a typo in the references (#5677)
+- Add workaround for pytest failures on 3.11-beta2 (#5680)
+- Moved random_spanning_tree to public API (#5656)
+- More tests for clustering (upstreaming from graphblas-algorithms) (#5673)
+- Remove unused logic in nonisomorphic_trees (#5682)
+- equitable_coloring: Get lazily first item instead of creating whole list (#5668)
+- Update subgraph views tests to pass with out of order execution (#5683)
+- Use isort with pre-commit to enforce import guidelines (#5659)
+- ignore isort commit from git blame (#5684)
+- Another catch by pytest-randomly (#5685)
+- Remove unused file from utils.test (#5687)
+- Update release requirements (#5690)
+- Update developer requirements (#5689)
+- Fix old release notes
+
+Contributors
+------------
+
+- Ross Barnowski
+- Jon Crall
+- Lukong123
+- Jarrod Millman
+- RATCOinc
+- Matt Schwennesen
+- Mridul Seth
+- Matus Valo
+- Erik Welch