diff --git a/.github/workflows/run_experiment.yml b/.github/workflows/run_experiment.yml new file mode 100644 index 0000000..ebfd63b --- /dev/null +++ b/.github/workflows/run_experiment.yml @@ -0,0 +1,211 @@ +name: Run Upgraider Experiment + +on: + workflow_dispatch: + inputs: + model: + description: "Model to use for fixing (gpt-3.5, gpt-4)" + type: string + default: "gpt-3.5" + useModelOnly: + description: "Run experiment with no external sources" + type: boolean + default: false + useDoc: + description: "Run experiment with references from Documentation/release notes" + type: boolean + default: true + compareTo: + description: "Run number of previous run to compare to (leave empty to skip comparison)" + default: "" + simthreshold: + description: "Similarity threshold for retrieval" + default: "0" # include all info + debug_enabled: + type: boolean + description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" + default: false +jobs: + setup: + runs-on: ubuntu-latest + outputs: + libraries: "${{ steps.parse_libraries.outputs.libraries }}" + model: "${{ github.event.inputs.model }}" + useModelOnly: "${{ github.event.inputs.useModelOnly || false }}" + useDoc: "${{ github.event.inputs.useDoc || true }}" + threshold: "${{ github.event.inputs.simthreshold || 0 }}" + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - run: | + pip install -r requirements.txt + python setup.py develop + + - id: parse_libraries + run: | + libraries=$(python ${GITHUB_WORKSPACE}/src/benchmark/list_libraries.py) + echo "got libraries $libraries" + echo "libraries=$libraries" >> $GITHUB_OUTPUT + + benchmark: + needs: + - setup + runs-on: ubuntu-latest + continue-on-error: true + strategy: + fail-fast: false + matrix: + library: ${{ fromJson(needs.setup.outputs.libraries) }} + steps: + - name: Checkout github repo (+ download lfs dependencies) + uses: actions/checkout@v3 + with: + lfs: true + + - name: Pull LFS objects + run: git lfs pull + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + pip install -r requirements.txt + python setup.py develop + + - name: Setup scratch venv + run: | + curr_dir=`pwd` + SCRATCH_VENV="$curr_dir/../scratchvenv" + echo "SCRATCH_VENV=$SCRATCH_VENV" >> $GITHUB_ENV + mkdir $SCRATCH_VENV + cd $SCRATCH_VENV + python -m venv .venv + + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} + + - name: Run example update with no sources + if: ${{ github.event.inputs.useModelOnly == 'true' }} + env: + OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}" + OPENAI_ORG: "${{ secrets.OPENAI_ORG }}" + GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }} + GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }} + run: | + library_name=${{ matrix.library.name }} + curr_dir=`pwd` + outputdir="$curr_dir/results/$library_name/modelonly" + mkdir -p $outputdir + python src/upgraider/fix_lib_examples.py \ + --libpath ${{ matrix.library.path }} \ + --outputDir $outputdir \ + --dbsource modelonly \ + --threshold ${{ needs.setup.outputs.threshold }} \ + --model ${{ needs.setup.outputs.model }} \ + + - name: Run example update with doc sources + if: ${{ needs.setup.outputs.useDoc == 'true' }} + env: + OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}" + OPENAI_ORG: "${{ secrets.OPENAI_ORG }}" + GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }} + GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }} + run: | + library_name=${{ matrix.library.name }} + curr_dir=`pwd` + outputdir="$curr_dir/results/$library_name/doc" + mkdir -p $outputdir + python src/upgraider/fix_lib_examples.py \ + --libpath ${{ matrix.library.path }} \ + --outputDir $outputdir \ + --dbsource doc \ + --threshold ${{ needs.setup.outputs.threshold }} \ + --model ${{ needs.setup.outputs.model }} \ + + - name: Zip up results + run: | + zip -r results.zip results + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: results-${{ matrix.library.name }} + path: "results.zip" + + combine_output: + name: Combine output from all benchmarks + needs: + - benchmark + runs-on: ubuntu-latest + steps: + - name: Download output zips + uses: actions/download-artifact@v3 + + - name: Combine output zips + run: | + mkdir results + for zip in results-*/results.zip + do + unzip -oq $zip + done + zip -r results.zip results + - name: Upload combined output files + uses: actions/upload-artifact@v3 + with: + name: results-all + path: results.zip + + generate-report: + needs: + - combine_output + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + pip install -r requirements.txt + python setup.py develop + + - name: Download artifacts for this run + uses: actions/download-artifact@v3 + with: + name: results-all + path: results + + - name: Download artifacts for comparison run + if: ${{ github.event.inputs.compareTo != '' }} + uses: dawidd6/action-download-artifact@v2 + with: + run_number: ${{ github.event.inputs.compareTo }} + name: results-all + path: baseline + + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} + + - name: Generate report + run: | + cd results + unzip results.zip + cd .. + if [ -d baseline ]; then + cd baseline + unzip results.zip + cd .. + python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results --baselinedir baseline/results > $GITHUB_STEP_SUMMARY + else + python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results > $GITHUB_STEP_SUMMARY + fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bc8eb6a --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.swp +__pycache__ +.env +.venv +.tox +.ipynb_checkpoints/ +.DS_Store +src/soretrieval.egg-info/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..545898a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 GitHub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3586b23 --- /dev/null +++ b/README.md @@ -0,0 +1,64 @@ +# UpgrAIder: Automatically Updating Deprecated API Usage through LLMs and documentation retrieval + +The goal of this project is to update outdated code snippets (specifically those that use deprecated library APIs). The technique relies on the usage of a Large Language Model (hence the "AI" in the name), augmented with information retrieved from release notes. More details about the project can be found in [this presentation](https://github.com/githubnext/Upgraider/blob/main/Show-and-Tell/Nadi_ShowAndTell.pdf) + +## Setup + +- `git clone ` + +- Install dependencies: + +``` +python -m venv .venv +source .venv/bin/activate +pip install -r requirements +python setup.py develop +``` + +- Create environment variables + - You will need an OpenAI key to run this project. + - When running evaluation experiments, we use a separate virtual environment to install the specific version of the library we want to analyze. Create a virtual environment in a separate folder from this project and include its path in the `.env file` (`SCRATCH_VENV`) + - Create a `.env` file to hold these environment variables: + + ``` + cat > .env < + ``` + +## Running + +### Populating the DB + +To populate the database with the information of the available release notes for each library, run `python src/upgraider/populate_doc_db.py` + +Note that this is a one time step (unless you add libraries or release notes). The `libraries` folder contains information for all current target libraries, including the code examples we evaluate on. Each library folder contains a `library.json` file that specifies the base version, which is the library version available around the training date of the model (~ May 2022) and the current version of the library. The base version is useful to know which release notes to consider (those after that date) while the current version is useful since this is the one we want to use for our experiments. + +Right now, each library folder already contains the release notes between the base and current library version. These were manually retrieved; in the future, it would be useful to create a script that automatically retrieves release notes for a given library. + +The above script looks for sections with certain keywords related to APIs and/or deprecation. It then creates a DB entry which has an embedding for the content of each item in those sections. + +### Updating a single code example + +`src/upgraider/fix_code_examples.py` is the file responsible for this. Run `python upgraider/fix_lib_examples.py --help` to see the required command lines. To run a single example, make sure to specify `--examplefile`; otherwise, it will run on all the examples available for that library. + +### Running a full experiment + +Run `python src/upgraider/run_experiment.py`. This will attempt to run upgraider on *all* code examples avaiable for *all* libraries in the `libraries` folder. The output data and reports will be written to the `output` folder. + +### Using Actions to run experiments + +The `run_experiment` workflow allows you to run a full experiment on the available libraries. It produces a markdown report of the results. Note that you need to set the required environment variables (i.e., API keys etc) as repository secrets. + +### Running Tests + +`python -m pytest` + +## Extra Functionality + +Experimental/not current used any more: To find differences between two versions of an API, you can run + +`python src/apiexploration/run_api_diff.py` + +which will use the library version info in the `libraries` folders. \ No newline at end of file diff --git a/Show-and-Tell/Nadi_ShowAndTell.pdf b/Show-and-Tell/Nadi_ShowAndTell.pdf new file mode 100644 index 0000000..7ca9a57 Binary files /dev/null and b/Show-and-Tell/Nadi_ShowAndTell.pdf differ diff --git a/libraries/networkx/examples/Graph.from_numpy_matrix().py b/libraries/networkx/examples/Graph.from_numpy_matrix().py new file mode 100644 index 0000000..647b4c3 --- /dev/null +++ b/libraries/networkx/examples/Graph.from_numpy_matrix().py @@ -0,0 +1,6 @@ +import networkx as nx +import numpy as np + +A = np.matrix([[0, 1, 1, 0, 0], [1, 0, 1, 1, 0], [1, 1, 0, 1, 1], [0, 1, 1, 0, 1], [0, 0, 1, 1, 0]]) +G = nx.from_numpy_matrix(A) +print(G.edges) \ No newline at end of file diff --git a/libraries/networkx/examples/Graph.to_numpy_matrix().py b/libraries/networkx/examples/Graph.to_numpy_matrix().py new file mode 100644 index 0000000..6ce53f4 --- /dev/null +++ b/libraries/networkx/examples/Graph.to_numpy_matrix().py @@ -0,0 +1,8 @@ +import networkx as nx +import numpy as np + +G = nx.Graph() +G.add_nodes_from([1, 2, 3, 4, 5]) +G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (3, 5), (4, 5)]) +matrix = nx.to_numpy_matrix(G) +print(matrix) \ No newline at end of file diff --git a/libraries/networkx/examples/OrderedGraph.py b/libraries/networkx/examples/OrderedGraph.py new file mode 100644 index 0000000..8fef9a6 --- /dev/null +++ b/libraries/networkx/examples/OrderedGraph.py @@ -0,0 +1,6 @@ +import networkx as nx + +SG=nx.OrderedGraph() +SG.add_nodes_from("HelloWorld") +SG.add_edges_from([(0, 1), (1, 2), (3,4), (6,8)]) +print(SG) \ No newline at end of file diff --git a/libraries/networkx/library.json b/libraries/networkx/library.json new file mode 100644 index 0000000..9dd05be --- /dev/null +++ b/libraries/networkx/library.json @@ -0,0 +1,6 @@ +{ + "name": "networkx", + "ghurl": "https://github.com/networkx/networkx", + "baseversion": "2.8.2", + "currentversion": "3.0" +} \ No newline at end of file diff --git a/libraries/networkx/releasenotes/release_2.8.3.rst b/libraries/networkx/releasenotes/release_2.8.3.rst new file mode 100644 index 0000000..96f5e0e --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.3.rst @@ -0,0 +1,56 @@ +NetworkX 2.8.3 +============== + +Release date: 4 June 2022 + +Supports Python 3.8, 3.9, and 3.10. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Bump release version +- Update release process +- added example to closeness.py (#5645) +- Extract valid kwds from the function signature for draw_networkx_* (#5660) +- Error out when pydot fails to correctly parse node names (#5667) +- Remove redundant py2 numeric conversions (#5661) +- Correcting a typo in the references (#5677) +- Add workaround for pytest failures on 3.11-beta2 (#5680) +- Moved random_spanning_tree to public API (#5656) +- More tests for clustering (upstreaming from graphblas-algorithms) (#5673) +- Remove unused logic in nonisomorphic_trees (#5682) +- equitable_coloring: Get lazily first item instead of creating whole list (#5668) +- Update subgraph views tests to pass with out of order execution (#5683) +- Use isort with pre-commit to enforce import guidelines (#5659) +- ignore isort commit from git blame (#5684) +- Another catch by pytest-randomly (#5685) +- Remove unused file from utils.test (#5687) +- Update release requirements (#5690) +- Update developer requirements (#5689) +- Fix old release notes + +Contributors +------------ + +- Ross Barnowski +- Jon Crall +- Lukong123 +- Jarrod Millman +- RATCOinc +- Matt Schwennesen +- Mridul Seth +- Matus Valo +- Erik Welch diff --git a/libraries/networkx/releasenotes/release_2.8.4.rst b/libraries/networkx/releasenotes/release_2.8.4.rst new file mode 100644 index 0000000..5ab1011 --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.4.rst @@ -0,0 +1,57 @@ +NetworkX 2.8.4 +============== + +Release date: 13 June 2022 + +Supports Python 3.8, 3.9, and 3.10. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Bump release version +- Clean up maximal_independent_set tests (#5567) +- MAINT: Cleanup centrality module, remove unused variables (#5308) +- importorskip scipy instead of numpy for total spanning tree (#5693) +- Add initial_graph parameter to scale_free_graph and deprecate create_using (#5697) +- Add docstring example for attr transfer to linegraph. (#5698) +- Update ISMAGS.analyze_symmetry docstring. (#5696) +- Add default value p=2 for minkowski distance metric. (#5700) +- Update inline code to inline math in docstring (#5701) +- Update multigraph docstrings to reflect `remove_edges_from` behavior. (#5699) +- Update simple_cycles docstring w/ yields and examples (#5709) +- Chromatic polynomial (#5675) +- Catch ':' explicitly while working with pydot (#5710) +- Revert "Add workaround for pytest failures on 3.11b2" (#5717) +- Default to lightmode for documentation (#5715) +- Dont compute all biconnected components in `is_biconnected()` (#5688) +- Some more changes to make pytest-randomly happy (#5719) +- Add durations flag to coverage run on CI. (#5718) +- Recover order of layers in multipartite_layout when layers are sortable (#5705) +- Update doc requirements (#5711) +- Touchups to MG and MDG edges docstrings. (#5708) +- Add PendingDeprecation for pydot (#5721) +- Add example of topo_order kwarg to dag_longest_path (#5728) +- CI: add pytest-randomly workflow. (#4553) + +Contributors +------------ + +- Ross Barnowski +- Szabolcs Horvát +- Lucas H. McCabe +- Jarrod Millman +- Mridul Seth +- Matus Valo diff --git a/libraries/networkx/releasenotes/release_2.8.5.rst b/libraries/networkx/releasenotes/release_2.8.5.rst new file mode 100644 index 0000000..fc17923 --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.5.rst @@ -0,0 +1,58 @@ +NetworkX 2.8.5 +============== + +Release date: 18 July 2022 + +Supports Python 3.8, 3.9, and 3.10. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Bump release version +- Check that nodes have "pos" attribute in geometric_edges (#5707) +- Correct louvain formula, solve infinite loops (#5713) +- Add more comprehensive tests for pydot (#5792) +- Compute `is_strongly_connected` lazily (#5793) +- Compute `is_weakly_connected` lazily (#5795) +- Updated astar docstring (#5797) +- Fix typo in bipartite closeness_centrality and thought-o in tests (#5800) +- Fix pydot colon check node-to-str conversion (#5809) +- Temporary fix for failing tests w/ scipy1.9. (#5816) +- Update distance parameter description. (#5819) +- Fix #5817 (#5822) +- Attempt to reverse slowdown from hasattr needed for cached_property (#5836) +- Update tests in base class and simple rename in convert.py (#5848) +- Move factory attributes to the class instead of instance. (#5850) +- Point to the latest URL for the description. (#5852) +- Gallery example: Morse code alphabet as a prefix tree (#5867) +- make lazy_import private and remove its internal use (#5878) +- Run CI against v2.8 branch +- CI: add explicit path while installing pygraphviz wheels on macOS in GHA (#5805) +- Deploy docs on v2.8 branch + +Contributors +------------ + +- Ross Barnowski +- Shaked Brody +- Lior +- Jarrod Millman +- Tomoya Nishide +- Dimitrios Papageorgiou +- Dan Schult +- Matt Schwennesen +- Mridul Seth +- Matus Valo diff --git a/libraries/networkx/releasenotes/release_2.8.6.rst b/libraries/networkx/releasenotes/release_2.8.6.rst new file mode 100644 index 0000000..fbc43f2 --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.6.rst @@ -0,0 +1,74 @@ +NetworkX 2.8.6 +============== + +Release date: 22 August 2022 + +Supports Python 3.8, 3.9, and 3.10. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Add random_spanning_tree to documentation (#5810) +- DOC: Switch to enumerated list in quotient_graph docstring (#5837) +- Add warning to nx_agraph about layout nondeterminism. (#5832) +- Update docs to include description of the `return_seen` kwarg (#5891) +- Add cache reset for when G._node is changed (#5894) +- Allow classes to relabel nodes -- casting (#5903) +- Update lattice.py (#5914) +- Add to about_us.rst (#5919) +- Update precommit hooks (#5923) +- Remove old Appveyor cruft (#5924) +- signature change for `node_link` functions: for issue #5787 (#5899) +- Allow unsortable nodes in approximation.treewidth functions (#5921) +- Fix Louvain_partitions by yielding a copy of the sets in the partition gh-5901 (#5902) +- Adds ```nx.bfs_layers``` method (#5879) +- Add function bfs_layers to docs (#5932) +- Propose to make new node_link arguments keyword only. (#5928) +- Bump nodelink args deprecation expiration to v3.2 (#5933) +- Add examples to lowest common ancestors algorithms (#5531) +- Naive lowest common ancestor implementation (#5736) +- Add examples for the condensation function (#5452) +- Minor doc fixups (#5868) +- update all_pairs_lca docstrings (#5876) +- Improve LCA input validation (#5877) +- Replace LCA with naive implementations (#5883) +- Update release notes +- docstring update to lexicographical_topological_sort issue 5681 (#5930) +- Support matplotlb 3.6rc1 failure (#5937) + +Improvements +------------ + +- [`#5883 `_] + Replace the implementation of ``lowest_common_ancestor`` and + ``all_pairs_lowest_common_ancestor`` with a "naive" algorithm to fix + several bugs and improve performance. + +Contributors +------------ + +- Tanmay Aeron +- Ross Barnowski +- Kevin Brown +- Matthias Bussonnier +- Tigran Khachatryan +- Dhaval Kumar +- Jarrod Millman +- Sultan Orazbayev +- Dan Schult +- Matt Schwennesen +- Dilara Tekinoglu +- kpetridis diff --git a/libraries/networkx/releasenotes/release_2.8.7.rst b/libraries/networkx/releasenotes/release_2.8.7.rst new file mode 100644 index 0000000..71b5aec --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.7.rst @@ -0,0 +1,52 @@ +NetworkX 2.8.7 +============== + +Release date: 1 October 2022 + +Supports Python 3.8, 3.9, and 3.10. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Bump release version +- Fixed unused root argument in has_bridges (#5846) +- docstring updates for `union`, `disjoint_union`, and `compose` (#5892) +- Updated networkx/classes/function.py . Solves Issue #5463 (#5474) +- Improved documentation for all_simple_paths (#5944) +- Change is_path to return False when node not in G instead of raising exception (#5943) +- Minor docstring touchups and test refactor for `is_path` (#5967) +- Update documentation header links for latest pydata-sphinx-theme (#5966) +- Fix failing example due to mpl 3.6 colorbar. (#5994) +- Add Tidelift security vulnerability link (#6001) +- Update linters (#6006) + +Improvements +------------ + +- [`#5943 `_] + ``is_path`` used to raise a `KeyError` when the ``path`` argument contained + a node that was not in the Graph. The behavior has been updated so that + ``is_path`` returns `False` in this case rather than raising the exception. + +Contributors +------------ + +- Juanita Gomez +- Kevin Brown +- 0ddoes +- pmlpm1986 +- Dan Schult +- Jarrod Millman diff --git a/libraries/networkx/releasenotes/release_2.8.8.rst b/libraries/networkx/releasenotes/release_2.8.8.rst new file mode 100644 index 0000000..b6e88e7 --- /dev/null +++ b/libraries/networkx/releasenotes/release_2.8.8.rst @@ -0,0 +1,70 @@ +NetworkX 2.8.8 +============== + +Release date: 1 November 2022 + +Supports Python 3.8, 3.9, 3.10, and 3.11. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +Minor documentation and bug fixes. + +Merged PRs +---------- + +- Bump release version +- Fix warnings from running tests in randomized order (#6014) +- Update pydata-sphinx-theme (#6012) +- update secutiry link to tidelift (#6019) +- Update numpydoc (#6022) +- Support Python 3.11 (#6023) +- Update linters (#6024) +- Minor updates to expanders generator tests (#6027) +- Add missing asserts to tests (#6039) +- fixes #6036 (#6080) +- Improve test coverage expanders line graph generators solved (PR for issue #6034) (#6071) +- Replace .A call with .toarray for sparse array in example. (#6106) +- Improve test coverage for algorithms/richclub.py (#6089) +- Tested boykov_kolmogorov and dinitz with cutoff (#6104) +- Improve test coverage for multigraph class (#6101) +- Improve test coverage for algorithms in dominating_set.py (PR for issue 6032) (#6068) +- Improve test coverage for graph class (#6105) +- added coverage in generators/tree.py (#6082) +- DOC: Specifically branch off main, instead of current branch (#6127) +- Improve test coverage for multidigraph class (#6131) +- Improve test coverage for digraph class (#6130) +- Improve test coverage for algorithms in dispersion.py (#6100) +- Test on Python 3.11 (#6159) +- Improve test coverage in algorithms shortest paths unweighted.py (#6121) +- Increased test coverage algorithms/matching.py (#6095) +- Renamed test functions in test_lowest_common_ancestors (#6110) +- Increase covering coverage (#6099) +- Add example for fiedler_vector (#6155) +- Improve test coverage for cycles.py (#6152) +- Added an example in all_pairs_node_connectivity (#6126) +- Amount of nodes and edges have mistakes when reading adjlist file (#6132) +- Update pytest (#6165) + +Contributors +------------ + +- Ross Barnowski +- Paula Pérez Bianchi +- DiamondJoseph +- Jarrod Millman +- Mjh9122 +- Alimi Qudirah +- Okite chimaobi Samuel +- Jefter Santiago +- Dan Schult +- Mridul Seth +- Tindi Sommers diff --git a/libraries/networkx/releasenotes/release_3.0.rst b/libraries/networkx/releasenotes/release_3.0.rst new file mode 100644 index 0000000..4694412 --- /dev/null +++ b/libraries/networkx/releasenotes/release_3.0.rst @@ -0,0 +1,329 @@ +NetworkX 3.0 +============ + +Release date: 7 January 2023 + +Supports Python 3.8, 3.9, 3.10, and 3.11. + +NetworkX is a Python package for the creation, manipulation, and study of the +structure, dynamics, and functions of complex networks. + +For more information, please visit our `website `_ +and our :ref:`gallery of examples `. +Please send comments and questions to the `networkx-discuss mailing list +`_. + +Highlights +---------- + +This release is the result of 8 months of work with over 180 changes by +41 contributors. We also have a `guide for people moving from NetworkX 2.X +to NetworkX 3.0 `_. Highlights include: + +- Better syncing between G._succ and G._adj for directed G. + And slightly better speed from all the core adjacency data structures. + G.adj is now a cached_property while still having the cache reset when + G._adj is set to a new dict (which doesn't happen very often). + Note: We have always assumed that G._succ and G._adj point to the same + object. But we did not enforce it well. If you have somehow worked + around our attempts and are relying on these private attributes being + allowed to be different from each other due to loopholes in our previous + code, you will have to look for other loopholes in our new code + (or subclass DiGraph to explicitly allow this). +- If your code sets G._succ or G._adj to new dictionary-like objects, you no longer + have to set them both. Setting either will ensure the other is set as well. + And the cached_properties G.adj and G.succ will be rest accordingly too. +- If you use the presence of the attribute `_adj` as a criteria for the object + being a Graph instance, that code may need updating. The graph classes + themselves now have an attribute `_adj`. So, it is possible that whatever you + are checking might be a class rather than an instance. We suggest you check + for attribute `_adj` to verify it is like a NetworkX graph object or type and + then `type(obj) is type` to check if it is a class. +- We have added an `experimental plugin feature `_, + which let users choose alternate backends like GraphBLAS, CuGraph for computation. This is an + opt-in feature and may change in future releases. +- Improved integration with the general `Scientific Python ecosystem `_. +- New drawing feature (module and tests) from NetworkX graphs to the TikZ library of TeX/LaTeX. + The basic interface is ``nx.to_latex(G, pos, **options)`` to construct a string of latex code or + ``nx.write_latex(G, filename, as_document=True, **options)`` to write the string to a file. +- Added an improved subgraph isomorphism algorithm called VF2++. + +Improvements +------------ +- [`#5663 `_] + Implements edge swapping for directed graphs. +- [`#5883 `_] + Replace the implementation of ``lowest_common_ancestor`` and + ``all_pairs_lowest_common_ancestor`` with a "naive" algorithm to fix + several bugs and improve performance. +- [`#5912 `_] + The ``mapping`` argument of the ``relabel_nodes`` function can be either a + mapping or a function that creates a mapping. ``relabel_nodes`` first checks + whether the ``mapping`` is callable - if so, then it is used as a function. + This fixes a bug related for ``mapping=str`` and may change the behavior for + other ``mapping`` arguments that implement both ``__getitem__`` and + ``__call__``. +- [`#5898 `_] + Implements computing and checking for minimal d-separators between two nodes. + Also adds functionality to DAGs for computing v-structures. +- [`#5943 `_] + ``is_path`` used to raise a `KeyError` when the ``path`` argument contained + a node that was not in the Graph. The behavior has been updated so that + ``is_path`` returns `False` in this case rather than raising the exception. +- [`#6003 `_] + ``avg_shortest_path_length`` now raises an exception if the provided + graph is directed but not strongly connected. The previous test (weak + connecting) was wrong; in that case, the returned value was nonsensical. + +API Changes +----------- + +- [`#5813 `_] + OrderedGraph and other Ordered classes are replaced by Graph because + Python dicts (and thus networkx graphs) now maintain order. +- [`#5899 `_] + The `attrs` keyword argument will be replaced with keyword only arguments + `source`, `target`, `name`, `key` and `link` for `json_graph/node_link` functions. + +Deprecations +------------ + +- [`#5723 `_] + ``nx.nx_pydot.*`` will be deprecated in the future if pydot isn't being + actively maintained. Users are recommended to use pygraphviz instead. +- [`#5899 `_] + The `attrs` keyword argument will be replaced with keyword only arguments + `source`, `target`, `name`, `key` and `link` for `json_graph/node_link` functions. + +Merged PRs +---------- + +- Bump release version +- Add characteristic polynomial example to polynomials docs (#5730) +- Remove deprecated function is_string_like (#5738) +- Remove deprecated function make_str (#5739) +- Remove unused 'name' parameter from `union` (#5741) +- Remove deprecated function is_iterator (#5740) +- Remove deprecated `euclidean` from geometric.py (#5744) +- Remove deprecated function utils.consume (#5745) +- Rm `to_numpy_recarray` (#5737) +- Remove deprecated function utils.empty_generator (#5748) +- Rm jit.py (#5751) +- Remove deprecated context managers (#5752) +- Remove deprecated function utils.to_tuple (#5755) +- Remove deprecated display_pygraphviz (#5754) +- Remove to_numpy_matrix & from_numpy_matrix (#5746) +- Remove deprecated decorator preserve_random_state (#5768) +- Remove deprecated function is_list_of_ints (#5743) +- Remove decorator random_state (#5770) +- remove `adj_matrix` from `linalg/graphmatrix.py` (#5753) +- Remove betweenness_centrality_source (#5786) +- Remove deprecated simrank_similarity_numpy (#5783) +- Remove networkx.testing subpackage (#5782) +- Change PyDot PendingDeprecation to Deprecation (#5781) +- Remove deprecated numeric_mixing_matrix (#5777) +- Remove deprecated functions make_small_graph and make_small_undirected_graph (#5761) +- Remove _naive_greedy_modularity_communities (#5760) +- Make chordal_graph_cliques a generator (#5758) +- update cytoscape functions to drop old signature (#5784) +- Remove deprecated functions dict_to_numpy_array2 and dict_to_numpy_array1 (#5756) +- Remove deprecated function utils.default_opener (#5747) +- Remove deprecated function iterable (#5742) +- remove old attr keyword from json_graph/tree (#5785) +- Remove generate_unique_node (#5780) +- Replace node_classification subpackage with a module (#5774) +- Remove gpickle (#5773) +- Remove deprecated function extrema_bounding (#5757) +- Remove coverage and performance from quality (#5775) +- Update return type of google_matrix to numpy.ndarray (#5762) +- Remove deprecated k-nearest-neighbors (#5769) +- Remove gdal dependency (#5766) +- Update return type of attrmatrix (#5764) +- Remove unused deprecated argument from to_pandas_edgelist (#5778) +- Remove deprecated function edge_betweeness (#5765) +- Remove pyyaml dependency (#5763) +- Remove copy methods for Filter* coreviews (#5776) +- Remove deprecated function nx.info (#5759) +- Remove deprecated n_communities argument from greedy_modularity_communities (#5789) +- Remove deprecated functions hub_matrix and authority_matrix (#5767) +- Make HITS numpy and scipy private functions (#5771) +- Add Triad example plot (#5528) +- Add gallery example visualizing DAG with multiple layouts (#5432) +- Make pagerank numpy and scipy private functions (#5772) +- Implement directed edge swap (#5663) +- Update relabel.py to preserve node order (#5258) +- Modify DAG example to show topological layout. (#5835) +- Add docstring example for self-ancestors/descendants (#5802) +- Update precommit linters (#5839) +- remove to/from_scipy_sparse_matrix (#5779) +- Clean up from PR #5779 (#5841) +- Corona Product (#5223) +- Add direct link to github networkx org sponsorship (#5843) +- added examples to efficiency_measures.py (#5643) +- added examples to regular.py (#5642) +- added examples to degree_alg.py (#5644) +- Add docstring examples for triads functions (#5522) +- Fix docbuild warnings: is_string_like is removed and identation in corona product (#5845) +- Use py_random_state to control randomness of random_triad (#5847) +- Remove OrderedGraphs (#5813) +- Drop NumPy 1.19 (#5856) +- Speed up unionfind a bit by not adding root node in the path (#5844) +- Minor doc fixups (#5868) +- Attempt to reverse slowdown from hasattr needed for cached_property (#5836) +- make lazy_import private and remove its internal use (#5878) +- strategy_saturation_largest_first now accepts partial colorings (#5888) +- Add weight distance metrics (#5305) +- docstring updates for `union`, `disjoint_union`, and `compose` (#5892) +- Update precommit hooks (#5923) +- Remove old Appveyor cruft (#5924) +- signature change for `node_link` functions: for issue #5787 (#5899) +- Replace LCA with naive implementations (#5883) +- Bump nodelink args deprecation expiration to v3.2 (#5933) +- Update mapping logic in `relabel_nodes` (#5912) +- Update pygraphviz (#5934) +- Further improvements to strategy_saturation_largest_first (#5935) +- Arf layout (#5910) +- [ENH] Find and verify a minimal D-separating set in DAG (#5898) +- Add Mehlhorn Steiner approximations (#5629) +- Preliminary VF2++ Implementation (#5788) +- Minor docstring touchups and test refactor for `is_path` (#5967) +- Switch to relative import for vf2pp_helpers. (#5973) +- Add vf2pp_helpers subpackage to wheel (#5975) +- Enhance biconnected components to avoid indexing (#5974) +- Update mentored projects list (#5985) +- Add concurrency hook to cancel jobs on new push. (#5986) +- Make all.py generator friendly (#5984) +- Only run scheduled pytest-randomly job in main repo. (#5993) +- Fix steiner tree test (#5999) +- Update doc requirements (#6008) +- VF2++ for Directed Graphs (#5972) +- Fix defect and update docs for MappedQueue, related to gh-5681 (#5939) +- Update pydata-sphinx-theme (#6012) +- Update numpydoc (#6022) +- Fixed test for average shortest path in the case of directed graphs (#6003) +- Update deprecations after 3.0 dep sprint (#6031) +- Use scipy.sparse array datastructure (#6037) +- Designate 3.0b1 release +- Bump release version +- Use org funding.yml +- Update which flow functions support the cutoff argument (#6085) +- Update GML parsing/writing to allow empty lists/tuples as node attributes (#6093) +- Warn on unused visualization kwargs that only apply to FancyArrowPatch edges (#6098) +- Fix weighted MultiDiGraphs in DAG longest path algorithms + add additional tests (#5988) +- Circular center node layout (#6114) +- Fix doc inconsistencies related to cutoff in connectivity.py and disjoint_paths.py (#6113) +- Remove deprecated maxcardinality parameter from min_weight_matching (#6146) +- Remove deprecated `find_cores` (#6139) +- Remove deprecated project function from bipartite package. (#6147) +- Improve test coverage for voterank algorithm (#6161) +- plugin based backend infrastructure to use multiple computation backends (#6000) +- Undocumented parameters in dispersion (#6183) +- Swap.py coverage to 100 (#6176) +- Improve test coverage for current_flow_betweenness module (#6143) +- Completed Testing in community.py resolves issue #6184 (#6185) +- Added an example to algebraic_connectivity (#6153) +- Add ThinGraph example to Multi*Graph doc_strings (#6160) +- Fix defect in eulerize, replace reciprocal edge weights (#6145) +- For issue #6030 Add test coverage for algorithms in beamsearch.py (#6087) +- Improve test coverage expanders stochastic graph generators (#6073) +- Update developer requirements (#6194) +- Designate 3.0rc1 release +- Bump release version +- Tests added in test_centrality.py (#6200) +- add laplacian_spectrum example (#6169) +- PR for issue #6033 Improve test coverage for algorithms in betweenness_subset.py #6033 (#6083) +- Di graph edges doc fix (#6108) +- Improve coverage for core.py (#6116) +- Add clear edges method as a method to be frozen by nx.freeze (#6190) +- Adds LCA test case for self-ancestors from gh-4458. (#6218) +- Minor Python 2 cleanup (#6219) +- Add example laplacian matrix (#6168) +- Revert 6219 and delete comment. (#6222) +- fix wording in error message (#6228) +- Rm incorrect test case for connected edge swap (#6223) +- add missing `seed` to function called by `connected_double_edge_swap` (#6231) +- Hide edges with a weight of None in A*. (#5945) +- Add dfs_labeled_edges reporting of reverse edges due to depth_limit. (#6240) +- Warn users about duplicate nodes in generator function input (#6237) +- Reenable geospatial examples (#6252) +- Draft 3.0 release notes (#6232) +- Add 2.8.x release notes (#6255) +- doc: clarify allowed `alpha` when using nx.draw_networkx_edges (#6254) +- Add a contributor (#6256) +- Allow MultiDiGraphs for LCA (#6234) +- Update simple_paths.py to improve readability of the BFS. (#6273) +- doc: update documentation when providing an iterator over current graph to add/remove_edges_from. (#6268) +- Fix bug vf2pp is isomorphic issue 6257 (#6270) +- Improve test coverage for Eigenvector centrality (#6227) +- Bug fix in swap: directed_edge_swap and double_edge_swap (#6149) +- Adding a test to verify that a NetworkXError is raised when calling n… (#6265) +- Pin to sphinx 5.2.3 (#6277) +- Update pre-commit hooks (#6278) +- Update GH actions (#6280) +- Fix links in release notes (#6281) +- bug fix in smallworld.py: random_reference and lattice_reference (#6151) +- [DOC] Follow numpydoc standard in barbell_graph documentation (#6286) +- Update simple_paths.py: consistent behaviour for `is_simple_path` when path contains nodes not in the graph. (#6272) +- Correctly point towards 2.8.8 in release notes (#6298) +- Isomorphism improve documentation (#6295) +- Improvements and test coverage for `line.py` (#6215) +- Fix typo in Katz centrality comment (#6310) +- Broken link in isomorphism documentation (#6296) +- Update copyright years to 2023 (#6322) +- fix warnings for make doctest (#6323) +- fix whitespace issue in test_internet_as_graph (#6324) +- Create a Tikz latex drawing feature for networkx (#6238) +- Fix docstrings (#6329) +- Fix documentation deployment (#6330) +- Fix links to migration guide (#6331) +- Fix links to migration guide (#6331) +- Fix typo in readme file (#6312) +- Fix typos in the networkx codebase (#6335) +- Refactor vf2pp modules and test files (#6334) + +Contributors +------------ + +- 0ddoe_s +- Abangma Jessika +- Adam Li +- Adam Richardson +- Ali Faraji +- Alimi Qudirah +- Anurag Bhat +- Ben Heil +- Brian Hou +- Casper van Elteren +- danieleades +- Dan Schult +- ddelange +- Dilara Tekinoglu +- Dimitrios Papageorgiou +- Douglas K. G. Araujo +- Erik Welch +- George Watkins +- Guy Aglionby +- Isaac Western +- Jarrod Millman +- Jim Kitchen +- Juanita Gomez +- Kevin Brown +- Konstantinos Petridis +- ladykkk +- Lucas H. McCabe +- Ludovic Stephan +- Lukong123 +- Matt Schwennesen +- Michael Holtz +- Morrison Turnansky +- Mridul Seth +- nsengaw4c +- Okite chimaobi Samuel +- Paula Pérez Bianchi +- Radoslav Fulek +- reneechebbo +- Ross Barnowski +- Sebastiano Vigna +- stevenstrickler +- Sultan Orazbayev +- Tina Oberoi diff --git a/libraries/networkx/requirements.txt b/libraries/networkx/requirements.txt new file mode 100644 index 0000000..296d654 --- /dev/null +++ b/libraries/networkx/requirements.txt @@ -0,0 +1 @@ +numpy \ No newline at end of file diff --git a/libraries/numpy/examples/fastCopyAndTranspose.py b/libraries/numpy/examples/fastCopyAndTranspose.py new file mode 100644 index 0000000..133713e --- /dev/null +++ b/libraries/numpy/examples/fastCopyAndTranspose.py @@ -0,0 +1,5 @@ +import numpy as np + +a = np.array(2) +b = np.fastCopyAndTranspose(a) +print(b) \ No newline at end of file diff --git a/libraries/numpy/examples/msort.py b/libraries/numpy/examples/msort.py new file mode 100644 index 0000000..4325fd9 --- /dev/null +++ b/libraries/numpy/examples/msort.py @@ -0,0 +1,10 @@ +import numpy as np + +myArray = np.array([[0.434, 0.768, 0.54900530], + [0.36211, 0.3784, 0.2415], + [0.258, 0.52929049, 0.39172155]]) + +sorted = np.msort(myArray) + +print(f"Min element is {sorted[0][0]}") + diff --git a/libraries/numpy/library.json b/libraries/numpy/library.json new file mode 100644 index 0000000..5903ceb --- /dev/null +++ b/libraries/numpy/library.json @@ -0,0 +1,6 @@ +{ + "name": "numpy", + "ghurl": "https://github.com/numpy/numpy", + "baseversion": "v1.22.3", + "currentversion": "v1.24.2" +} \ No newline at end of file diff --git a/libraries/numpy/releasenotes/1.22.4-notes.rst b/libraries/numpy/releasenotes/1.22.4-notes.rst new file mode 100644 index 0000000..1f418ca --- /dev/null +++ b/libraries/numpy/releasenotes/1.22.4-notes.rst @@ -0,0 +1,62 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.22.4 Release Notes +========================== + +NumPy 1.22.4 is a maintenance release that fixes bugs discovered after the +1.22.3 release. In addition, the wheels for this release are built using the +recently released Cython 0.29.30, which should fix the reported problems with +`debugging `_. + +The Python versions supported for this release are 3.8-3.10. Note that the Mac +wheels are based on OS X 10.15 rather than 10.9 that was used in previous +NumPy release cycles. + +Contributors +============ + +A total of 12 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Alexander Shadchin +* Bas van Beek +* Charles Harris +* Hood Chatham +* Jarrod Millman +* John-Mark Gurney + +* Junyan Ou + +* Mariusz Felisiak + +* Ross Barnowski +* Sebastian Berg +* Serge Guelton +* Stefan van der Walt + +Pull requests merged +==================== + +A total of 22 pull requests were merged for this release. + +* `#21191 `__: TYP, BUG: Fix ``np.lib.stride_tricks`` re-exported under the... +* `#21192 `__: TST: Bump mypy from 0.931 to 0.940 +* `#21243 `__: MAINT: Explicitly re-export the types in ``numpy._typing`` +* `#21245 `__: MAINT: Specify sphinx, numpydoc versions for CI doc builds +* `#21275 `__: BUG: Fix typos +* `#21277 `__: ENH, BLD: Fix math feature detection for wasm +* `#21350 `__: MAINT: Fix failing simd and cygwin tests. +* `#21438 `__: MAINT: Fix failing Python 3.8 32-bit Windows test. +* `#21444 `__: BUG: add linux guard per #21386 +* `#21445 `__: BUG: Allow legacy dtypes to cast to datetime again +* `#21446 `__: BUG: Make mmap handling safer in frombuffer +* `#21447 `__: BUG: Stop using PyBytesObject.ob_shash deprecated in Python 3.11. +* `#21448 `__: ENH: Introduce numpy.core.setup_common.NPY_CXX_FLAGS +* `#21472 `__: BUG: Ensure compile errors are raised correclty +* `#21473 `__: BUG: Fix segmentation fault +* `#21474 `__: MAINT: Update doc requirements +* `#21475 `__: MAINT: Mark ``npy_memchr`` with ``no_sanitize("alignment")`` on clang +* `#21512 `__: DOC: Proposal - make the doc landing page cards more similar... +* `#21525 `__: MAINT: Update Cython version to 0.29.30. +* `#21536 `__: BUG: Fix GCC error during build configuration +* `#21541 `__: REL: Prepare for the NumPy 1.22.4 release. +* `#21547 `__: MAINT: Skip tests that fail on PyPy. + diff --git a/libraries/numpy/releasenotes/1.23.0-notes.rst b/libraries/numpy/releasenotes/1.23.0-notes.rst new file mode 100644 index 0000000..2301192 --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.0-notes.rst @@ -0,0 +1,412 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.0 Release Notes +========================== + +The NumPy 1.23.0 release continues the ongoing work to improve the handling and +promotion of dtypes, increase the execution speed, clarify the documentation, +and expire old deprecations. The highlights are: + +* Implementation of ``loadtxt`` in C, greatly improving its performance. +* Exposing DLPack at the Python level for easy data exchange. +* Changes to the promotion and comparisons of structured dtypes. +* Improvements to f2py. + +See below for the details, + + +New functions +============= + +* A masked array specialization of ``ndenumerate`` is now available as + ``numpy.ma.ndenumerate``. It provides an alternative to ``numpy.ndenumerate`` + and skips masked values by default. + + (`gh-20020 `__) + +* ``numpy.from_dlpack`` has been added to allow easy exchange of data using the + DLPack protocol. It accepts Python objects that implement the ``__dlpack__`` + and ``__dlpack_device__`` methods and returns a ndarray object which is + generally the view of the data of the input object. + + (`gh-21145 `__) + + +Deprecations +============ + +* Setting ``__array_finalize__`` to ``None`` is deprecated. It must now be + a method and may wish to call ``super().__array_finalize__(obj)`` after + checking for ``None`` or if the NumPy version is sufficiently new. + + (`gh-20766 `__) + +* Using ``axis=32`` (``axis=np.MAXDIMS``) in many cases had the + same meaning as ``axis=None``. This is deprecated and ``axis=None`` + must be used instead. + + (`gh-20920 `__) + +* The hook function ``PyDataMem_SetEventHook`` has been deprecated and the + demonstration of its use in tool/allocation_tracking has been removed. The + ability to track allocations is now built-in to python via ``tracemalloc``. + + (`gh-20394 `__) + +* ``numpy.distutils`` has been deprecated, as a result of ``distutils`` itself + being deprecated. It will not be present in NumPy for Python >= 3.12, and + will be removed completely 2 years after the release of Python 3.12 For more + details, see :ref:`distutils-status-migration`. + + (`gh-20875 `__) + +* ``numpy.loadtxt`` will now give a ``DeprecationWarning`` when an integer + ``dtype`` is requested but the value is formatted as a floating point number. + + (`gh-21663 `__) + + +Expired deprecations +==================== + +* The ``NpzFile.iteritems()`` and ``NpzFile.iterkeys()`` methods have been + removed as part of the continued removal of Python 2 compatibility. This + concludes the deprecation from 1.15. + + (`gh-16830 `__) + +* The ``alen`` and ``asscalar`` functions have been removed. + + (`gh-20414 `__) + +* The ``UPDATEIFCOPY`` array flag has been removed together with the enum + ``NPY_ARRAY_UPDATEIFCOPY``. The associated (and deprecated) + ``PyArray_XDECREF_ERR`` was also removed. These were all deprecated in 1.14. They + are replaced by ``NPY_ARRAY_WRITEBACKIFCOPY``, that requires calling + ``PyArray_ResolveWritebackIfCopy`` before the array is deallocated. + + (`gh-20589 `__) + +* Exceptions will be raised during array-like creation. When an object raised + an exception during access of the special attributes ``__array__`` or + ``__array_interface__``, this exception was usually ignored. This behaviour + was deprecated in 1.21, and the exception will now be raised. + + (`gh-20835 `__) + +* Multidimensional indexing with non-tuple values is not allowed. Previously, + code such as ``arr[ind]`` where ``ind = [[0, 1], [0, 1]]`` produced a + ``FutureWarning`` and was interpreted as a multidimensional index (i.e., + ``arr[tuple(ind)]``). Now this example is treated like an array index over a + single dimension (``arr[array(ind)]``). Multidimensional indexing with + anything but a tuple was deprecated in NumPy 1.15. + + (`gh-21029 `__) + +* Changing to a dtype of different size in F-contiguous arrays is no longer + permitted. Deprecated since Numpy 1.11.0. See below for an extended + explanation of the effects of this change. + + (`gh-20722 `__) + + +New Features +============ + +crackfortran has support for operator and assignment overloading +---------------------------------------------------------------- +``crackfortran`` parser now understands operator and assignment +definitions in a module. They are added in the ``body`` list of the +module which contains a new key ``implementedby`` listing the names +of the subroutines or functions implementing the operator or +assignment. + +(`gh-15006 `__) + +f2py supports reading access type attributes from derived type statements +------------------------------------------------------------------------- +As a result, one does not need to use ``public`` or ``private`` statements to +specify derived type access properties. + +(`gh-15844 `__) + +New parameter ``ndmin`` added to ``genfromtxt`` +------------------------------------------------------------------------- +This parameter behaves the same as ``ndmin`` from ``numpy.loadtxt``. + +(`gh-20500 `__) + +``np.loadtxt`` now supports quote character and single converter function +------------------------------------------------------------------------- +``numpy.loadtxt`` now supports an additional ``quotechar`` keyword argument +which is not set by default. Using ``quotechar='"'`` will read quoted fields +as used by the Excel CSV dialect. + +Further, it is now possible to pass a single callable rather than a dictionary +for the ``converters`` argument. + +(`gh-20580 `__) + +Changing to dtype of a different size now requires contiguity of only the last axis +----------------------------------------------------------------------------------- +Previously, viewing an array with a dtype of a different item size required that +the entire array be C-contiguous. This limitation would unnecessarily force the +user to make contiguous copies of non-contiguous arrays before being able to +change the dtype. + +This change affects not only ``ndarray.view``, but other construction +mechanisms, including the discouraged direct assignment to ``ndarray.dtype``. + +This change expires the deprecation regarding the viewing of F-contiguous +arrays, described elsewhere in the release notes. + +(`gh-20722 `__) + +Deterministic output files for F2PY +----------------------------------- +For F77 inputs, ``f2py`` will generate ``modname-f2pywrappers.f`` +unconditionally, though these may be empty. For free-form inputs, +``modname-f2pywrappers.f``, ``modname-f2pywrappers2.f90`` will both be generated +unconditionally, and may be empty. This allows writing generic output rules in +``cmake`` or ``meson`` and other build systems. Older behavior can be restored +by passing ``--skip-empty-wrappers`` to ``f2py``. :ref:`f2py-meson` details usage. + +(`gh-21187 `__) + +``keepdims`` parameter for ``average`` +-------------------------------------- +The parameter ``keepdims`` was added to the functions ``numpy.average`` +and ``numpy.ma.average``. The parameter has the same meaning as it +does in reduction functions such as ``numpy.sum`` or ``numpy.mean``. + +(`gh-21485 `__) + +New parameter ``equal_nan`` added to ``np.unique`` +-------------------------------------------------- +``np.unique`` was changed in 1.21 to treat all ``NaN`` values as equal and return +a single ``NaN``. Setting ``equal_nan=False`` will restore pre-1.21 behavior +to treat ``NaNs`` as unique. Defaults to ``True``. + +(`gh-21623 `__) + + +Compatibility notes +=================== + +1D ``np.linalg.norm`` preserves float input types, even for scalar results +-------------------------------------------------------------------------- +Previously, this would promote to ``float64`` when the ``ord`` argument was +not one of the explicitly listed values, e.g. ``ord=3``:: + + >>> f32 = np.float32([1, 2]) + >>> np.linalg.norm(f32, 2).dtype + dtype('float32') + >>> np.linalg.norm(f32, 3) + dtype('float64') # numpy 1.22 + dtype('float32') # numpy 1.23 + +This change affects only ``float32`` and ``float16`` vectors with ``ord`` +other than ``-Inf``, ``0``, ``1``, ``2``, and ``Inf``. + +(`gh-17709 `__) + +Changes to structured (void) dtype promotion and comparisons +------------------------------------------------------------ +In general, NumPy now defines correct, but slightly limited, promotion for +structured dtypes by promoting the subtypes of each field instead of raising +an exception:: + + >>> np.result_type(np.dtype("i,i"), np.dtype("i,d")) + dtype([('f0', '`__) + +``NPY_RELAXED_STRIDES_CHECKING`` has been removed +------------------------------------------------- +NumPy cannot be compiled with ``NPY_RELAXED_STRIDES_CHECKING=0`` +anymore. Relaxed strides have been the default for many years and +the option was initially introduced to allow a smoother transition. + +(`gh-20220 `__) + +``np.loadtxt`` has recieved several changes +------------------------------------------- + +The row counting of ``numpy.loadtxt`` was fixed. ``loadtxt`` ignores fully +empty lines in the file, but counted them towards ``max_rows``. +When ``max_rows`` is used and the file contains empty lines, these will now +not be counted. Previously, it was possible that the result contained fewer +than ``max_rows`` rows even though more data was available to be read. +If the old behaviour is required, ``itertools.islice`` may be used:: + + import itertools + lines = itertools.islice(open("file"), 0, max_rows) + result = np.loadtxt(lines, ...) + +While generally much faster and improved, ``numpy.loadtxt`` may now fail to +converter certain strings to numbers that were previously successfully read. +The most important cases for this are: + +* Parsing floating point values such as ``1.0`` into integers is now deprecated. +* Parsing hexadecimal floats such as ``0x3p3`` will fail +* An ``_`` was previously accepted as a thousands delimiter ``100_000``. + This will now result in an error. + +If you experience these limitations, they can all be worked around by passing +appropriate ``converters=``. NumPy now supports passing a single converter +to be used for all columns to make this more convenient. +For example, ``converters=float.fromhex`` can read hexadecimal float numbers +and ``converters=int`` will be able to read ``100_000``. + +Further, the error messages have been generally improved. However, this means +that error types may differ. In particularly, a ``ValueError`` is now always +raised when parsing of a single entry fails. + +(`gh-20580 `__) + + +Improvements +============ + +``ndarray.__array_finalize__`` is now callable +---------------------------------------------- +This means subclasses can now use ``super().__array_finalize__(obj)`` +without worrying whether ``ndarray`` is their superclass or not. +The actual call remains a no-op. + +(`gh-20766 `__) + +Add support for VSX4/Power10 +---------------------------------------------- +With VSX4/Power10 enablement, the new instructions available in +Power ISA 3.1 can be used to accelerate some NumPy operations, +e.g., floor_divide, modulo, etc. + +(`gh-20821 `__) + +``np.fromiter`` now accepts objects and subarrays +------------------------------------------------- +The ``numpy.fromiter`` function now supports object and +subarray dtypes. Please see he function documentation for +examples. + +(`gh-20993 `__) + +Math C library feature detection now uses correct signatures +------------------------------------------------------------ +Compiling is preceded by a detection phase to determine whether the +underlying libc supports certain math operations. Previously this code +did not respect the proper signatures. Fixing this enables compilation +for the ``wasm-ld`` backend (compilation for web assembly) and reduces +the number of warnings. + +(`gh-21154 `__) + +``np.kron`` now maintains subclass information +---------------------------------------------- +``np.kron`` maintains subclass information now such as masked arrays +while computing the Kronecker product of the inputs + +.. code-block:: python + + >>> x = ma.array([[1, 2], [3, 4]], mask=[[0, 1], [1, 0]]) + >>> np.kron(x,x) + masked_array( + data=[[1, --, --, --], + [--, 4, --, --], + [--, --, 4, --], + [--, --, --, 16]], + mask=[[False, True, True, True], + [ True, False, True, True], + [ True, True, False, True], + [ True, True, True, False]], + fill_value=999999) + +.. warning:: + ``np.kron`` output now follows ``ufunc`` ordering (``multiply``) + to determine the output class type + + .. code-block:: python + + >>> class myarr(np.ndarray): + >>> __array_priority__ = -1 + >>> a = np.ones([2, 2]) + >>> ma = myarray(a.shape, a.dtype, a.data) + >>> type(np.kron(a, ma)) == np.ndarray + False # Before it was True + >>> type(np.kron(a, ma)) == myarr + True + +(`gh-21262 `__) + + +Performance improvements and changes +==================================== + +Faster ``np.loadtxt`` +--------------------- +``numpy.loadtxt`` is now generally much faster than previously as most of it +is now implemented in C. + +(`gh-20580 `__) + +Faster reduction operators +-------------------------- +Reduction operations like ``numpy.sum``, ``numpy.prod``, ``numpy.add.reduce``, +``numpy.logical_and.reduce`` on contiguous integer-based arrays are now +much faster. + +(`gh-21001 `__) + +Faster ``np.where`` +------------------- +``numpy.where`` is now much faster than previously on unpredictable/random +input data. + +(`gh-21130 `__) + +Faster operations on NumPy scalars +---------------------------------- +Many operations on NumPy scalars are now significantly faster, although +rare operations (e.g. with 0-D arrays rather than scalars) may be slower +in some cases. +However, even with these improvements users who want the best performance +for their scalars, may want to convert a known NumPy scalar into a Python +one using ``scalar.item()``. + +(`gh-21188 `__) + +Faster ``np.kron`` +------------------ +``numpy.kron`` is about 80% faster as the product is now computed +using broadcasting. + +(`gh-21354 `__) diff --git a/libraries/numpy/releasenotes/1.23.1-notes.rst b/libraries/numpy/releasenotes/1.23.1-notes.rst new file mode 100644 index 0000000..3efc5dc --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.1-notes.rst @@ -0,0 +1,45 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.1 Release Notes +========================== + +NumPy 1.23.1 is a maintenance release that fixes bugs discovered after the +1.23.0 release. Notable fixes are: + +- Fix searchsorted for float16 NaNs +- Fix compilation on Apple M1 +- Fix KeyError in crackfortran operator support (Slycot) + +The Python version supported for this release are 3.8-3.10. + + +Contributors +============ + +A total of 7 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Charles Harris +* Matthias Koeppe + +* Pranab Das + +* Rohit Goswami +* Sebastian Berg +* Serge Guelton +* Srimukh Sripada + + + +Pull requests merged +==================== + +A total of 8 pull requests were merged for this release. + +* `#21866 `__: BUG: Fix discovered MachAr (still used within valgrind) +* `#21867 `__: BUG: Handle NaNs correctly for float16 during sorting +* `#21868 `__: BUG: Use ``keepdims`` during normalization in ``np.average`` and... +* `#21869 `__: DOC: mention changes to ``max_rows`` behaviour in ``np.loadtxt`` +* `#21870 `__: BUG: Reject non integer array-likes with size 1 in delete +* `#21949 `__: BLD: Make can_link_svml return False for 32bit builds on x86_64 +* `#21951 `__: BUG: Reorder extern "C" to only apply to function declarations... +* `#21952 `__: BUG: Fix KeyError in crackfortran operator support + diff --git a/libraries/numpy/releasenotes/1.23.2-notes.rst b/libraries/numpy/releasenotes/1.23.2-notes.rst new file mode 100644 index 0000000..1dc3bb7 --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.2-notes.rst @@ -0,0 +1,50 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.2 Release Notes +========================== + +NumPy 1.23.2 is a maintenance release that fixes bugs discovered after the +1.23.1 release. Notable features are: + +- Typing changes needed for Python 3.11 +- Wheels for Python 3.11.0rc1 + +The Python versions supported for this release are 3.8-3.11. + +Contributors +============ + +A total of 9 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Alexander Grund + +* Bas van Beek +* Charles Harris +* Jon Cusick + +* Matti Picus +* Michael Osthege + +* Pal Barta + +* Ross Barnowski +* Sebastian Berg + +Pull requests merged +==================== + +A total of 15 pull requests were merged for this release. + +* `#22030 `__: ENH: Add ``__array_ufunc__`` typing support to the ``nin=1`` ufuncs +* `#22031 `__: MAINT, TYP: Fix ``np.angle`` dtype-overloads +* `#22032 `__: MAINT: Do not let ``_GenericAlias`` wrap the underlying classes'... +* `#22033 `__: TYP,MAINT: Allow ``einsum`` subscripts to be passed via integer... +* `#22034 `__: MAINT,TYP: Add object-overloads for the ``np.generic`` rich comparisons +* `#22035 `__: MAINT,TYP: Allow the ``squeeze`` and ``transpose`` method to... +* `#22036 `__: BUG: Fix subarray to object cast ownership details +* `#22037 `__: BUG: Use ``Popen`` to silently invoke f77 -v +* `#22038 `__: BUG: Avoid errors on NULL during deepcopy +* `#22039 `__: DOC: Add versionchanged for converter callable behavior. +* `#22057 `__: MAINT: Quiet the anaconda uploads. +* `#22078 `__: ENH: reorder includes for testing on top of system installations... +* `#22106 `__: TST: fix test_linear_interpolation_formula_symmetric +* `#22107 `__: BUG: Fix skip condition for test_loss_of_precision[complex256] +* `#22115 `__: BLD: Build python3.11.0rc1 wheels. diff --git a/libraries/numpy/releasenotes/1.23.3-notes.rst b/libraries/numpy/releasenotes/1.23.3-notes.rst new file mode 100644 index 0000000..882206f --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.3-notes.rst @@ -0,0 +1,56 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.3 Release Notes +========================== +NumPy 1.23.3 is a maintenance release that fixes bugs discovered after the +1.23.2 release. There is no major theme for this release, the main improvements +are for some downstream builds and some annotation corner cases. The Python +versions supported for this release are 3.8-3.11. + +Note that we will move to MacOS 11 for the NumPy 1.23.4 release, the 10.15 +version currently used will no longer be supported by our build infrastructure +at that point. + +Contributors +============ + +A total of 16 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Aaron Meurer +* Bas van Beek +* Charles Harris +* Ganesh Kathiresan +* Gavin Zhang + +* Iantra Solari+ +* Jyn Spring 琴春 + +* Matti Picus +* Rafael Cardoso Fernandes Sousa +* Rafael Sousa + +* Ralf Gommers +* Rin Cat (鈴猫) + +* Saransh Chopra + +* Sayed Adel +* Sebastian Berg +* Serge Guelton + +Pull requests merged +==================== + +A total of 14 pull requests were merged for this release. + +* `#22136 `__: BLD: Add Python 3.11 wheels to aarch64 build +* `#22148 `__: MAINT: Update setup.py for Python 3.11. +* `#22155 `__: CI: Test NumPy build against old versions of GCC(6, 7, 8) +* `#22156 `__: MAINT: support IBM i system +* `#22195 `__: BUG: Fix circleci build +* `#22214 `__: BUG: Expose heapsort algorithms in a shared header +* `#22215 `__: BUG: Support using libunwind for backtrack +* `#22216 `__: MAINT: fix an incorrect pointer type usage in f2py +* `#22220 `__: BUG: change overloads to play nice with pyright. +* `#22221 `__: TST,BUG: Use fork context to fix MacOS savez test +* `#22222 `__: TYP,BUG: Reduce argument validation in C-based ``__class_getitem__`` +* `#22223 `__: TST: ensure ``np.equal.reduce`` raises a ``TypeError`` +* `#22224 `__: BUG: Fix the implementation of numpy.array_api.vecdot +* `#22230 `__: BUG: Better report integer division overflow (backport) diff --git a/libraries/numpy/releasenotes/1.23.4-notes.rst b/libraries/numpy/releasenotes/1.23.4-notes.rst new file mode 100644 index 0000000..b92b242 --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.4-notes.rst @@ -0,0 +1,48 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.4 Release Notes +========================== +NumPy 1.23.4 is a maintenance release that fixes bugs discovered after the +1.23.3 release and keeps the build infrastructure current. The main +improvements are fixes for some annotation corner cases, a fix for a long time +``nested_iters`` memory leak, and a fix of complex vector dot for very large +arrays. The Python versions supported for this release are 3.8-3.11. + +Note that the mypy version needs to be 0.981+ if you test using Python 3.10.7, +otherwise the typing tests will fail. + +Contributors +============ + +A total of 8 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Bas van Beek +* Charles Harris +* Matthew Barber +* Matti Picus +* Ralf Gommers +* Ross Barnowski +* Sebastian Berg +* Sicheng Zeng + + +Pull requests merged +==================== + +A total of 13 pull requests were merged for this release. + +* `#22368 `__: BUG: Add ``__array_api_version__`` to ``numpy.array_api`` namespace +* `#22370 `__: MAINT: update sde toolkit to 9.0, fix download link +* `#22382 `__: BLD: use macos-11 image on azure, macos-1015 is deprecated +* `#22383 `__: MAINT: random: remove ``get_info`` from "extending with Cython"... +* `#22384 `__: BUG: Fix complex vector dot with more than NPY_CBLAS_CHUNK elements +* `#22387 `__: REV: Loosen ``lookfor``'s import try/except again +* `#22388 `__: TYP,ENH: Mark ``numpy.typing`` protocols as runtime checkable +* `#22389 `__: TYP,MAINT: Change more overloads to play nice with pyright +* `#22390 `__: TST,TYP: Bump mypy to 0.981 +* `#22391 `__: DOC: Update delimiter param description. +* `#22392 `__: BUG: Memory leaks in numpy.nested_iters +* `#22413 `__: REL: Prepare for the NumPy 1.23.4 release. +* `#22424 `__: TST: Fix failing aarch64 wheel builds. + diff --git a/libraries/numpy/releasenotes/1.23.5-notes.rst b/libraries/numpy/releasenotes/1.23.5-notes.rst new file mode 100644 index 0000000..8e14794 --- /dev/null +++ b/libraries/numpy/releasenotes/1.23.5-notes.rst @@ -0,0 +1,39 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.23.5 Release Notes +========================== +NumPy 1.23.5 is a maintenance release that fixes bugs discovered after the +1.23.4 release and keeps the build infrastructure current. +The Python versions supported for this release are 3.8-3.11. + +Contributors +============ + +A total of 7 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* @DWesl +* Aayush Agrawal + +* Adam Knapp + +* Charles Harris +* Navpreet Singh + +* Sebastian Berg +* Tania Allard + +Pull requests merged +==================== + +A total of 10 pull requests were merged for this release. + +* `#22489 `__: TST, MAINT: Replace most setup with setup_method (also teardown) +* `#22490 `__: MAINT, CI: Switch to cygwin/cygwin-install-action@v2 +* `#22494 `__: TST: Make test_partial_iteration_cleanup robust but require leak... +* `#22592 `__: MAINT: Ensure graceful handling of large header sizes +* `#22593 `__: TYP: Spelling alignment for array flag literal +* `#22594 `__: BUG: Fix bounds checking for ``random.logseries`` +* `#22595 `__: DEV: Update GH actions and Dockerfile for Gitpod +* `#22596 `__: CI: Only fetch in actions/checkout +* `#22597 `__: BUG: Decrement ref count in gentype_reduce if allocated memory... +* `#22625 `__: BUG: Histogramdd breaks on big arrays in Windows + diff --git a/libraries/numpy/releasenotes/1.24.0-notes.rst b/libraries/numpy/releasenotes/1.24.0-notes.rst new file mode 100644 index 0000000..bcccd8c --- /dev/null +++ b/libraries/numpy/releasenotes/1.24.0-notes.rst @@ -0,0 +1,515 @@ +.. currentmodule:: numpy + +======================== +NumPy 1.24 Release Notes +======================== +The NumPy 1.24.0 release continues the ongoing work to improve the handling and +promotion of dtypes, increase the execution speed, and clarify the +documentation. There are also a large number of new and expired deprecations +due to changes in promotion and cleanups. This might be called a deprecation +release. Highlights are + +* Many new deprecations, check them out. +* Many expired deprecations, +* New F2PY features and fixes. +* New "dtype" and "casting" keywords for stacking functions. + +See below for the details, + +This release supports Python versions 3.8-3.11. + + +Deprecations +============ + +Deprecate fastCopyAndTranspose and PyArray_CopyAndTranspose +----------------------------------------------------------- +The ``numpy.fastCopyAndTranspose`` function has been deprecated. Use the +corresponding copy and transpose methods directly:: + + arr.T.copy() + +The underlying C function ``PyArray_CopyAndTranspose`` has also been deprecated +from the NumPy C-API. + +(`gh-22313 `__) + +Conversion of out-of-bound Python integers +------------------------------------------ +Attempting a conversion from a Python integer to a NumPy value will now always +check whether the result can be represented by NumPy. This means the following +examples will fail in the future and give a ``DeprecationWarning`` now:: + + np.uint8(-1) + np.array([3000], dtype=np.int8) + +Many of these did succeed before. Such code was mainly useful for unsigned +integers with negative values such as ``np.uint8(-1)`` giving +``np.iinfo(np.uint8).max``. + +Note that conversion between NumPy integers is unaffected, so that +``np.array(-1).astype(np.uint8)`` continues to work and use C integer overflow +logic. For negative values, it will also work to view the array: +``np.array(-1, dtype=np.int8).view(np.uint8)``. +In some cases, using ``np.iinfo(np.uint8).max`` or ``val % 2**8`` may also +work well. + +In rare cases input data may mix both negative values and very large unsigned +values (i.e. ``-1`` and ``2**63``). There it is unfortunately necessary +to use ``%`` on the Python value or use signed or unsigned conversion +depending on whether negative values are expected. + +(`gh-22385 `__) + +Deprecate ``msort`` +------------------- +The ``numpy.msort`` function is deprecated. Use ``np.sort(a, axis=0)`` instead. + +(`gh-22456 `__) + +``np.str0`` and similar are now deprecated +------------------------------------------ +The scalar type aliases ending in a 0 bit size: ``np.object0``, ``np.str0``, +``np.bytes0``, ``np.void0``, ``np.int0``, ``np.uint0`` as well as ``np.bool8`` +are now deprecated and will eventually be removed. + +(`gh-22607 `__) + + +Expired deprecations +==================== + +* The ``normed`` keyword argument has been removed from + `np.histogram`, `np.histogram2d`, and `np.histogramdd`. + Use ``density`` instead. If ``normed`` was passed by + position, ``density`` is now used. + + (`gh-21645 `__) + +* Ragged array creation will now always raise a ``ValueError`` unless + ``dtype=object`` is passed. This includes very deeply nested sequences. + + (`gh-22004 `__) + +* Support for Visual Studio 2015 and earlier has been removed. + +* Support for the Windows Interix POSIX interop layer has been removed. + + (`gh-22139 `__) + +* Support for Cygwin < 3.3 has been removed. + + (`gh-22159 `__) + +* The mini() method of ``np.ma.MaskedArray`` has been removed. Use either + ``np.ma.MaskedArray.min()`` or ``np.ma.minimum.reduce()``. + +* The single-argument form of ``np.ma.minimum`` and ``np.ma.maximum`` has been + removed. Use ``np.ma.minimum.reduce()`` or ``np.ma.maximum.reduce()`` + instead. + + (`gh-22228 `__) + +* Passing dtype instances other than the canonical (mainly native byte-order) + ones to ``dtype=`` or ``signature=`` in ufuncs will now raise a + ``TypeError``. We recommend passing the strings ``"int8"`` or scalar types + ``np.int8`` since the byte-order, datetime/timedelta unit, etc. are never + enforced. (Initially deprecated in NumPy 1.21.) + + (`gh-22540 `__) + +* The ``dtype=`` argument to comparison ufuncs is now applied correctly. That + means that only ``bool`` and ``object`` are valid values and ``dtype=object`` + is enforced. + + (`gh-22541 `__) + +* The deprecation for the aliases ``np.object``, ``np.bool``, ``np.float``, + ``np.complex``, ``np.str``, and ``np.int`` is expired (introduces NumPy + 1.20). Some of these will now give a FutureWarning in addition to raising an + error since they will be mapped to the NumPy scalars in the future. + + (`gh-22607 `__) + + +Compatibility notes +=================== + +``array.fill(scalar)`` may behave slightly different +---------------------------------------------------- +``numpy.ndarray.fill`` may in some cases behave slightly different now due to +the fact that the logic is aligned with item assignment:: + + arr = np.array([1]) # with any dtype/value + arr.fill(scalar) + # is now identical to: + arr[0] = scalar + +Previously casting may have produced slightly different answers when using +values that could not be represented in the target ``dtype`` or when the target +had ``object`` dtype. + +(`gh-20924 `__) + +Subarray to object cast now copies +---------------------------------- +Casting a dtype that includes a subarray to an object will now ensure a copy of +the subarray. Previously an unsafe view was returned:: + + arr = np.ones(3, dtype=[("f", "i", 3)]) + subarray_fields = arr.astype(object)[0] + subarray = subarray_fields[0] # "f" field + + np.may_share_memory(subarray, arr) + +Is now always false. While previously it was true for the specific cast. + +(`gh-21925 `__) + +Returned arrays respect uniqueness of dtype kwarg objects +--------------------------------------------------------- +When the ``dtype`` keyword argument is used with :py:func:`np.array()` or +:py:func:`asarray()`, the dtype of the returned array now always exactly +matches the dtype provided by the caller. + +In some cases this change means that a *view* rather than the input array is +returned. The following is an example for this on 64bit Linux where ``long`` +and ``longlong`` are the same precision but different ``dtypes``:: + + >>> arr = np.array([1, 2, 3], dtype="long") + >>> new_dtype = np.dtype("longlong") + >>> new = np.asarray(arr, dtype=new_dtype) + >>> new.dtype is new_dtype + True + >>> new is arr + False + +Before the change, the ``dtype`` did not match because ``new is arr`` was +``True``. + +(`gh-21995 `__) + +DLPack export raises ``BufferError`` +------------------------------------ +When an array buffer cannot be exported via DLPack a ``BufferError`` is now +always raised where previously ``TypeError`` or ``RuntimeError`` was raised. +This allows falling back to the buffer protocol or ``__array_interface__`` when +DLPack was tried first. + +(`gh-22542 `__) + +NumPy builds are no longer tested on GCC-6 +------------------------------------------ +Ubuntu 18.04 is deprecated for GitHub actions and GCC-6 is not available on +Ubuntu 20.04, so builds using that compiler are no longer tested. We still test +builds using GCC-7 and GCC-8. + +(`gh-22598 `__) + + +New Features +============ + +New attribute ``symbol`` added to polynomial classes +---------------------------------------------------- +The polynomial classes in the ``numpy.polynomial`` package have a new +``symbol`` attribute which is used to represent the indeterminate of the +polynomial. This can be used to change the value of the variable when +printing:: + + >>> P_y = np.polynomial.Polynomial([1, 0, -1], symbol="y") + >>> print(P_y) + 1.0 + 0.0·y¹ - 1.0·y² + +Note that the polynomial classes only support 1D polynomials, so operations +that involve polynomials with different symbols are disallowed when the result +would be multivariate:: + + >>> P = np.polynomial.Polynomial([1, -1]) # default symbol is "x" + >>> P_z = np.polynomial.Polynomial([1, 1], symbol="z") + >>> P * P_z + Traceback (most recent call last) + ... + ValueError: Polynomial symbols differ + +The symbol can be any valid Python identifier. The default is ``symbol=x``, +consistent with existing behavior. + +(`gh-16154 `__) + +F2PY support for Fortran ``character`` strings +---------------------------------------------- +F2PY now supports wrapping Fortran functions with: + +* character (e.g. ``character x``) +* character array (e.g. ``character, dimension(n) :: x``) +* character string (e.g. ``character(len=10) x``) +* and character string array (e.g. ``character(len=10), dimension(n, m) :: x``) + +arguments, including passing Python unicode strings as Fortran character string +arguments. + +(`gh-19388 `__) + +New function ``np.show_runtime`` +-------------------------------- +A new function ``numpy.show_runtime`` has been added to display the runtime +information of the machine in addition to ``numpy.show_config`` which displays +the build-related information. + +(`gh-21468 `__) + +``strict`` option for ``testing.assert_array_equal`` +---------------------------------------------------- +The ``strict`` option is now available for ``testing.assert_array_equal``. +Setting ``strict=True`` will disable the broadcasting behaviour for scalars and +ensure that input arrays have the same data type. + +(`gh-21595 `__) + +New parameter ``equal_nan`` added to ``np.unique`` +-------------------------------------------------- +``np.unique`` was changed in 1.21 to treat all ``NaN`` values as equal and +return a single ``NaN``. Setting ``equal_nan=False`` will restore pre-1.21 +behavior to treat ``NaNs`` as unique. Defaults to ``True``. + +(`gh-21623 `__) + +``casting`` and ``dtype`` keyword arguments for ``numpy.stack`` +--------------------------------------------------------------- +The ``casting`` and ``dtype`` keyword arguments are now available for +``numpy.stack``. To use them, write ``np.stack(..., dtype=None, +casting='same_kind')``. + +``casting`` and ``dtype`` keyword arguments for ``numpy.vstack`` +---------------------------------------------------------------- +The ``casting`` and ``dtype`` keyword arguments are now available for +``numpy.vstack``. To use them, write ``np.vstack(..., dtype=None, +casting='same_kind')``. + +``casting`` and ``dtype`` keyword arguments for ``numpy.hstack`` +---------------------------------------------------------------- +The ``casting`` and ``dtype`` keyword arguments are now available for +``numpy.hstack``. To use them, write ``np.hstack(..., dtype=None, +casting='same_kind')``. + +(`gh-21627 `__) + +The bit generator underlying the singleton RandomState can be changed +--------------------------------------------------------------------- +The singleton ``RandomState`` instance exposed in the ``numpy.random`` module +is initialized at startup with the ``MT19937`` bit generator. The new function +``set_bit_generator`` allows the default bit generator to be replaced with a +user-provided bit generator. This function has been introduced to provide a +method allowing seamless integration of a high-quality, modern bit generator in +new code with existing code that makes use of the singleton-provided random +variate generating functions. The companion function ``get_bit_generator`` +returns the current bit generator being used by the singleton ``RandomState``. +This is provided to simplify restoring the original source of randomness if +required. + +The preferred method to generate reproducible random numbers is to use a modern +bit generator in an instance of ``Generator``. The function ``default_rng`` +simplifies instantiation:: + + >>> rg = np.random.default_rng(3728973198) + >>> rg.random() + +The same bit generator can then be shared with the singleton instance so that +calling functions in the ``random`` module will use the same bit generator:: + + >>> orig_bit_gen = np.random.get_bit_generator() + >>> np.random.set_bit_generator(rg.bit_generator) + >>> np.random.normal() + +The swap is permanent (until reversed) and so any call to functions in the +``random`` module will use the new bit generator. The original can be restored +if required for code to run correctly:: + + >>> np.random.set_bit_generator(orig_bit_gen) + +(`gh-21976 `__) + +``np.void`` now has a ``dtype`` argument +---------------------------------------- +NumPy now allows constructing structured void scalars directly by +passing the ``dtype`` argument to ``np.void``. + +(`gh-22316 `__) + + +Improvements +============ + +F2PY Improvements +----------------- +* The generated extension modules don't use the deprecated NumPy-C API anymore +* Improved ``f2py`` generated exception messages +* Numerous bug and ``flake8`` warning fixes +* various CPP macros that one can use within C-expressions of signature files + are prefixed with ``f2py_``. For example, one should use ``f2py_len(x)`` + instead of ``len(x)`` +* A new construct ``character(f2py_len=...)`` is introduced to support + returning assumed length character strings (e.g. ``character(len=*)``) from + wrapper functions + +A hook to support rewriting ``f2py`` internal data structures after reading all +its input files is introduced. This is required, for instance, for BC of SciPy +support where character arguments are treated as character strings arguments in +``C`` expressions. + +(`gh-19388 `__) + +IBM zSystems Vector Extension Facility (SIMD) +--------------------------------------------- +Added support for SIMD extensions of zSystem (z13, z14, z15), through the +universal intrinsics interface. This support leads to performance improvements +for all SIMD kernels implemented using the universal intrinsics, including the +following operations: rint, floor, trunc, ceil, sqrt, absolute, square, +reciprocal, tanh, sin, cos, equal, not_equal, greater, greater_equal, less, +less_equal, maximum, minimum, fmax, fmin, argmax, argmin, add, subtract, +multiply, divide. + +(`gh-20913 `__) + +NumPy now gives floating point errors in casts +---------------------------------------------- +In most cases, NumPy previously did not give floating point warnings or errors +when these happened during casts. For examples, casts like:: + + np.array([2e300]).astype(np.float32) # overflow for float32 + np.array([np.inf]).astype(np.int64) + +Should now generally give floating point warnings. These warnings should warn +that floating point overflow occurred. For errors when converting floating +point values to integers users should expect invalid value warnings. + +Users can modify the behavior of these warnings using ``np.errstate``. + +Note that for float to int casts, the exact warnings that are given may +be platform dependent. For example:: + + arr = np.full(100, value=1000, dtype=np.float64) + arr.astype(np.int8) + +May give a result equivalent to (the intermediate cast means no warning is +given):: + + arr.astype(np.int64).astype(np.int8) + +May return an undefined result, with a warning set:: + + RuntimeWarning: invalid value encountered in cast + +The precise behavior is subject to the C99 standard and its implementation in +both software and hardware. + +(`gh-21437 `__) + +F2PY supports the value attribute +--------------------------------- +The Fortran standard requires that variables declared with the ``value`` +attribute must be passed by value instead of reference. F2PY now supports this +use pattern correctly. So ``integer, intent(in), value :: x`` in Fortran codes +will have correct wrappers generated. + +(`gh-21807 `__) + +Added pickle support for third-party BitGenerators +-------------------------------------------------- +The pickle format for bit generators was extended to allow each bit generator +to supply its own constructor when during pickling. Previous versions of NumPy +only supported unpickling ``Generator`` instances created with one of the core +set of bit generators supplied with NumPy. Attempting to unpickle a +``Generator`` that used a third-party bit generators would fail since the +constructor used during the unpickling was only aware of the bit generators +included in NumPy. + +(`gh-22014 `__) + +arange() now explicitly fails with dtype=str +--------------------------------------------- +Previously, the ``np.arange(n, dtype=str)`` function worked for ``n=1`` and +``n=2``, but would raise a non-specific exception message for other values of +``n``. Now, it raises a `TypeError` informing that ``arange`` does not support +string dtypes:: + + >>> np.arange(2, dtype=str) + Traceback (most recent call last) + ... + TypeError: arange() not supported for inputs with DType . + +(`gh-22055 `__) + +``numpy.typing`` protocols are now runtime checkable +---------------------------------------------------- +The protocols used in ``numpy.typing.ArrayLike`` and ``numpy.typing.DTypeLike`` +are now properly marked as runtime checkable, making them easier to use for +runtime type checkers. + +(`gh-22357 `__) + + +Performance improvements and changes +==================================== + +Faster version of ``np.isin`` and ``np.in1d`` for integer arrays +---------------------------------------------------------------- +``np.in1d`` (used by ``np.isin``) can now switch to a faster algorithm (up to +>10x faster) when it is passed two integer arrays. This is often automatically +used, but you can use ``kind="sort"`` or ``kind="table"`` to force the old or +new method, respectively. + +(`gh-12065 `__) + +Faster comparison operators +---------------------------- +The comparison functions (``numpy.equal``, ``numpy.not_equal``, ``numpy.less``, +``numpy.less_equal``, ``numpy.greater`` and ``numpy.greater_equal``) are now +much faster as they are now vectorized with universal intrinsics. For a CPU +with SIMD extension AVX512BW, the performance gain is up to 2.57x, 1.65x and +19.15x for integer, float and boolean data types, respectively (with N=50000). + +(`gh-21483 `__) + + +Changes +======= + +Better reporting of integer division overflow +--------------------------------------------- +Integer division overflow of scalars and arrays used to provide a +``RuntimeWarning`` and the return value was undefined leading to crashes at +rare occasions:: + + >>> np.array([np.iinfo(np.int32).min]*10, dtype=np.int32) // np.int32(-1) + :1: RuntimeWarning: divide by zero encountered in floor_divide + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32) + +Integer division overflow now returns the input dtype's minimum value and raise +the following ``RuntimeWarning``:: + + >>> np.array([np.iinfo(np.int32).min]*10, dtype=np.int32) // np.int32(-1) + :1: RuntimeWarning: overflow encountered in floor_divide + array([-2147483648, -2147483648, -2147483648, -2147483648, -2147483648, + -2147483648, -2147483648, -2147483648, -2147483648, -2147483648], + dtype=int32) + +(`gh-21506 `__) + +``masked_invalid`` now modifies the mask in-place +------------------------------------------------- +When used with ``copy=False``, ``numpy.ma.masked_invalid`` now modifies the +input masked array in-place. This makes it behave identically to +``masked_where`` and better matches the documentation. + +(`gh-22046 `__) + +``nditer``/``NpyIter`` allows all allocating all operands +--------------------------------------------------------- +The NumPy iterator available through ``np.nditer`` in Python and as ``NpyIter`` +in C now supports allocating all arrays. The iterator shape defaults to ``()`` +in this case. The operands dtype must be provided, since a "common dtype" +cannot be inferred from the other inputs. + +(`gh-22457 `__) diff --git a/libraries/numpy/releasenotes/1.24.1-notes.rst b/libraries/numpy/releasenotes/1.24.1-notes.rst new file mode 100644 index 0000000..c346f6d --- /dev/null +++ b/libraries/numpy/releasenotes/1.24.1-notes.rst @@ -0,0 +1,50 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.24.1 Release Notes +========================== +NumPy 1.24.1 is a maintenance release that fixes bugs and regressions discovered after the +1.24.0 release. The Python versions supported by this release are 3.8-3.11. + +Contributors +============ + +A total of 12 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Andrew Nelson +* Ben Greiner + +* Charles Harris +* Clément Robert +* Matteo Raso +* Matti Picus +* Melissa Weber Mendonça +* Miles Cranmer +* Ralf Gommers +* Rohit Goswami +* Sayed Adel +* Sebastian Berg + +Pull requests merged +==================== + +A total of 18 pull requests were merged for this release. + +* `#22820 `__: BLD: add workaround in setup.py for newer setuptools +* `#22830 `__: BLD: CIRRUS_TAG redux +* `#22831 `__: DOC: fix a couple typos in 1.23 notes +* `#22832 `__: BUG: Fix refcounting errors found using pytest-leaks +* `#22834 `__: BUG, SIMD: Fix invalid value encountered in several ufuncs +* `#22837 `__: TST: ignore more np.distutils.log imports +* `#22839 `__: BUG: Do not use getdata() in np.ma.masked_invalid +* `#22847 `__: BUG: Ensure correct behavior for rows ending in delimiter in... +* `#22848 `__: BUG, SIMD: Fix the bitmask of the boolean comparison +* `#22857 `__: BLD: Help raspian arm + clang 13 about __builtin_mul_overflow +* `#22858 `__: API: Ensure a full mask is returned for masked_invalid +* `#22866 `__: BUG: Polynomials now copy properly (#22669) +* `#22867 `__: BUG, SIMD: Fix memory overlap in ufunc comparison loops +* `#22868 `__: BUG: Fortify string casts against floating point warnings +* `#22875 `__: TST: Ignore nan-warnings in randomized out tests +* `#22883 `__: MAINT: restore npymath implementations needed for freebsd +* `#22884 `__: BUG: Fix integer overflow in in1d for mixed integer dtypes #22877 +* `#22887 `__: BUG: Use whole file for encoding checks with ``charset_normalizer``. diff --git a/libraries/numpy/releasenotes/1.24.2-notes.rst b/libraries/numpy/releasenotes/1.24.2-notes.rst new file mode 100644 index 0000000..9e94122 --- /dev/null +++ b/libraries/numpy/releasenotes/1.24.2-notes.rst @@ -0,0 +1,51 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.24.2 Release Notes +========================== +NumPy 1.24.2 is a maintenance release that fixes bugs and regressions discovered after the +1.24.1 release. The Python versions supported by this release are 3.8-3.11. + +Contributors +============ + +A total of 14 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Bas van Beek +* Charles Harris +* Khem Raj + +* Mark Harfouche +* Matti Picus +* Panagiotis Zestanakis + +* Peter Hawkins +* Pradipta Ghosh +* Ross Barnowski +* Sayed Adel +* Sebastian Berg +* Syam Gadde + +* dmbelov + +* pkubaj + + +Pull requests merged +==================== + +A total of 17 pull requests were merged for this release. + +* `#22965 `__: MAINT: Update python 3.11-dev to 3.11. +* `#22966 `__: DOC: Remove dangling deprecation warning +* `#22967 `__: ENH: Detect CPU features on FreeBSD/powerpc64* +* `#22968 `__: BUG: np.loadtxt cannot load text file with quoted fields separated... +* `#22969 `__: TST: Add fixture to avoid issue with randomizing test order. +* `#22970 `__: BUG: Fix fill violating read-only flag. (#22959) +* `#22971 `__: MAINT: Add additional information to missing scalar AttributeError +* `#22972 `__: MAINT: Move export for scipy arm64 helper into main module +* `#22976 `__: BUG, SIMD: Fix spurious invalid exception for sin/cos on arm64/clang +* `#22989 `__: BUG: Ensure correct loop order in sin, cos, and arctan2 +* `#23030 `__: DOC: Add version added information for the strict parameter in... +* `#23031 `__: BUG: use ``_Alignof`` rather than ``offsetof()`` on most compilers +* `#23147 `__: BUG: Fix for npyv__trunc_s32_f32 (VXE) +* `#23148 `__: BUG: Fix integer / float scalar promotion +* `#23149 `__: BUG: Add missing header. +* `#23150 `__: TYP, MAINT: Add a missing explicit ``Any`` parameter to the ``npt.ArrayLike``... +* `#23161 `__: BLD: remove redundant definition of npy_nextafter [wheel build] diff --git a/libraries/pandas/examples/2.infer_datetime_format.py b/libraries/pandas/examples/2.infer_datetime_format.py new file mode 100644 index 0000000..c1d236f --- /dev/null +++ b/libraries/pandas/examples/2.infer_datetime_format.py @@ -0,0 +1,18 @@ +#https://github.com/autogluon/autogluon/blob/3cef80b27b87987fe5ecd0be8b4b5b2ca23e7427/tabular/src/autogluon/tabular/models/tab_transformer/tab_transformer_encoder.py#L600-L600 +import pandas as pd +from pandas import DataFrame +import numpy as np + +def make_date(df: DataFrame, date_field: str): + "Make sure `df[field_name]` is of the right date type." + field_dtype = df[date_field].dtype + if isinstance(field_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype): + field_dtype = np.datetime64 + if not np.issubdtype(field_dtype, np.datetime64): + df[date_field] = pd.to_datetime(df[date_field], infer_datetime_format=True) + +df = pd.DataFrame({ + 'name': ['alice','bob','charlie'], + 'date_of_birth': ['10/25/2005','10/29/2002','01/01/2001'] +}) +make_date(df, 'date_of_birth') \ No newline at end of file diff --git a/libraries/pandas/examples/Categorical.to_dense().py b/libraries/pandas/examples/Categorical.to_dense().py new file mode 100644 index 0000000..43aae64 --- /dev/null +++ b/libraries/pandas/examples/Categorical.to_dense().py @@ -0,0 +1,5 @@ +import pandas + +cat = pandas.Categorical(["a", "b", "c", "a"], ordered=True) +dense_cat = cat.to_dense() +print(dense_cat) \ No newline at end of file diff --git a/libraries/pandas/examples/ExcelWriter.save().py b/libraries/pandas/examples/ExcelWriter.save().py new file mode 100644 index 0000000..1599491 --- /dev/null +++ b/libraries/pandas/examples/ExcelWriter.save().py @@ -0,0 +1,16 @@ +import pandas as pd + +# Create multiple lists +technologies = ['Spark','Pandas','Java','Python', 'PHP'] +fee = [25000,20000,15000,15000,18000] +duration = ['5o Days','35 Days','40 days','30 Days', '30 Days'] +discount = [2000,1000,800,500,800] +columns=['Courses','Fee','Duration','Discount'] + +# Create DataFrame from multiple lists +df = pd.DataFrame(list(zip(technologies,fee,duration,discount)), columns=columns) + +writer = pd.ExcelWriter('output.xlsx') +df.to_excel(writer, sheet_name='Sheet1') +writer.save() + diff --git a/libraries/pandas/examples/Index.is_boolean.py b/libraries/pandas/examples/Index.is_boolean.py new file mode 100644 index 0000000..f398570 --- /dev/null +++ b/libraries/pandas/examples/Index.is_boolean.py @@ -0,0 +1,6 @@ +import pandas as pd + +indx1 = pd.Index([[1, 2, 2, 3], [3, 3]]) + +if indx1.is_boolean(): + print("Provided index is boolean") \ No newline at end of file diff --git a/libraries/pandas/examples/Index.is_mixed.py b/libraries/pandas/examples/Index.is_mixed.py new file mode 100644 index 0000000..1924fea --- /dev/null +++ b/libraries/pandas/examples/Index.is_mixed.py @@ -0,0 +1,5 @@ +import pandas + +idx = pandas.Index([0,'1',3, 'fooo']) +if idx.is_mixed(): + print('mixed type') \ No newline at end of file diff --git a/libraries/pandas/examples/factorize.na_sentinel.py b/libraries/pandas/examples/factorize.na_sentinel.py new file mode 100644 index 0000000..1285552 --- /dev/null +++ b/libraries/pandas/examples/factorize.na_sentinel.py @@ -0,0 +1,7 @@ +import pandas as pd +import numpy as np + +ar2 = np.array(['Q', 'W', 'E', np.nan, 'Q', 'Y']) +codes, uniques = pd.factorize(ar2, na_sentinel=77) +print(codes) +print(uniques) diff --git a/libraries/pandas/examples/infer_datetime_format.py b/libraries/pandas/examples/infer_datetime_format.py new file mode 100644 index 0000000..5499207 --- /dev/null +++ b/libraries/pandas/examples/infer_datetime_format.py @@ -0,0 +1,4 @@ +import pandas as pd + +pd.to_datetime(['2023-11-26 12:00:00', '2023-11-02 13:00:15'], infer_datetime_format=False) + diff --git a/libraries/pandas/library.json b/libraries/pandas/library.json new file mode 100644 index 0000000..e66047f --- /dev/null +++ b/libraries/pandas/library.json @@ -0,0 +1,6 @@ +{ + "name": "pandas", + "ghurl": "https://github.com/pandas-dev/pandas/", + "baseversion": "v1.4.2", + "currentversion": "v2.0.0" +} \ No newline at end of file diff --git a/libraries/pandas/releasenotes/v1.4.3.rst b/libraries/pandas/releasenotes/v1.4.3.rst new file mode 100644 index 0000000..70b451a --- /dev/null +++ b/libraries/pandas/releasenotes/v1.4.3.rst @@ -0,0 +1,72 @@ +.. _whatsnew_143: + +What's new in 1.4.3 (June 23, 2022) +----------------------------------- + +These are the changes in pandas 1.4.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.concat: + +Behavior of ``concat`` with empty or all-NA DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The behavior change in version 1.4.0 to stop ignoring the data type +of empty or all-NA columns with float or object dtype in :func:`concat` +(:ref:`whatsnew_140.notable_bug_fixes.concat_with_empty_or_all_na`) has been +reverted (:issue:`45637`). + + +.. _whatsnew_143.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) +- Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`) +- Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`) +- Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) +- Fixed regression in :meth:`DataFrame.fillna` and :meth:`DataFrame.update` creating a copy when updating inplace (:issue:`47188`) +- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when the sorting column has ``np.nan`` values (:issue:`46589`) +- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) +- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) +- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`) +- Fixed regression in ``NaN`` comparison for :class:`Index` operations where the same object was compared (:issue:`47105`) +- Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`) +- Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` when used with list-likes or dict-likes and ``axis=1`` that would give incorrect results; now raises ``NotImplementedError`` (:issue:`46995`) +- Fixed regression in :meth:`DataFrame.resample` and :meth:`DataFrame.rolling` when used with list-likes or dict-likes and ``axis=1`` that would raise an unintuitive error message; now raises ``NotImplementedError`` (:issue:`46904`) +- Fixed regression in :func:`testing.assert_index_equal` when ``check_order=False`` and :class:`Index` has extension or object dtype (:issue:`47207`) +- Fixed regression in :func:`read_excel` returning ints as floats on certain input sheets (:issue:`46988`) +- Fixed regression in :meth:`DataFrame.shift` when ``axis`` is ``columns`` and ``fill_value`` is absent, ``freq`` is ignored (:issue:`47039`) +- Fixed regression in :meth:`DataFrame.to_json` causing a segmentation violation when :class:`DataFrame` is created with an ``index`` parameter of the type :class:`PeriodIndex` (:issue:`46683`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :func:`pandas.eval`, :meth:`DataFrame.eval` and :meth:`DataFrame.query` where passing empty ``local_dict`` or ``global_dict`` was treated as passing ``None`` (:issue:`47084`) +- Most I/O methods no longer suppress ``OSError`` and ``ValueError`` when closing file handles (:issue:`47136`) +- Improving error message raised by :meth:`DataFrame.from_dict` when passing an invalid ``orient`` parameter (:issue:`47450`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.other: + +Other +~~~~~ +- The minimum version of Cython needed to compile pandas is now ``0.29.30`` (:issue:`41935`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.2..v1.4.3 diff --git a/libraries/pandas/releasenotes/v1.4.4.rst b/libraries/pandas/releasenotes/v1.4.4.rst new file mode 100644 index 0000000..56b1254 --- /dev/null +++ b/libraries/pandas/releasenotes/v1.4.4.rst @@ -0,0 +1,65 @@ +.. _whatsnew_144: + +What's new in 1.4.4 (August 31, 2022) +------------------------------------- + +These are the changes in pandas 1.4.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.fillna` not working on a :class:`DataFrame` with a :class:`MultiIndex` (:issue:`47649`) +- Fixed regression in taking NULL :class:`objects` from a :class:`DataFrame` causing a segmentation violation. These NULL values are created by :meth:`numpy.empty_like` (:issue:`46848`) +- Fixed regression in :func:`concat` materializing the :class:`Index` during sorting even if the :class:`Index` was already sorted (:issue:`47501`) +- Fixed regression in :func:`concat` or :func:`merge` handling of all-NaN ExtensionArrays with custom attributes (:issue:`47762`) +- Fixed regression in calling bitwise numpy ufuncs (for example, ``np.bitwise_and``) on Index objects (:issue:`46769`) +- Fixed regression in :func:`cut` when using a ``datetime64`` IntervalIndex as bins (:issue:`46218`) +- Fixed regression in :meth:`DataFrame.select_dtypes` where ``include="number"`` included :class:`BooleanDtype` (:issue:`46870`) +- Fixed regression in :meth:`DataFrame.loc` raising error when indexing with a ``NamedTuple`` (:issue:`48124`) +- Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`) +- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`) +- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) +- Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DatetimeIndex` with a :class:`.DateOffset` object for its ``freq`` (:issue:`46671`) +- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in updating a DataFrame column through Series ``__setitem__`` (using chained assignment) not updating column values inplace and using too much memory (:issue:`47172`) +- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) +- Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) +- Fixed regression in :meth:`~Index.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) +- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) +- Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) +- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.bug_fixes: + +Bug fixes +~~~~~~~~~ +- The ``FutureWarning`` raised when passing arguments (other than ``filepath_or_buffer``) as positional in :func:`read_csv` is now raised at the correct stacklevel (:issue:`47385`) +- Bug in :meth:`DataFrame.to_sql` when ``method`` was a ``callable`` that did not return an ``int`` and would raise a ``TypeError`` (:issue:`46891`) +- Bug in :meth:`.DataFrameGroupBy.value_counts` where ``subset`` had no effect (:issue:`46383`) +- Bug when getting values with :meth:`DataFrame.loc` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) +- Bug in the :meth:`Series.dt.strftime` accessor return a float instead of object dtype Series for all-NaT input, which also causes a spurious deprecation warning (:issue:`45858`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.other: + +Other +~~~~~ +- The minimum version of Cython needed to compile pandas is now ``0.29.32`` (:issue:`47978`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.3..v1.4.4|HEAD diff --git a/libraries/pandas/releasenotes/v1.5.0.rst b/libraries/pandas/releasenotes/v1.5.0.rst new file mode 100644 index 0000000..badf3f0 --- /dev/null +++ b/libraries/pandas/releasenotes/v1.5.0.rst @@ -0,0 +1,1285 @@ +.. _whatsnew_150: + +What's new in 1.5.0 (September 19, 2022) +---------------------------------------- + +These are the changes in pandas 1.5.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_150.enhancements.pandas-stubs: + +``pandas-stubs`` +^^^^^^^^^^^^^^^^ + +The ``pandas-stubs`` library is now supported by the pandas development team, providing type stubs for the pandas API. Please visit +https://github.com/pandas-dev/pandas-stubs for more information. + +We thank VirtusLab and Microsoft for their initial, significant contributions to ``pandas-stubs`` + +.. _whatsnew_150.enhancements.arrow: + +Native PyArrow-backed ExtensionArray +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With `Pyarrow `__ installed, users can now create pandas objects +that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``. + +The ``dtype`` argument can accept a string of a `pyarrow data type `__ +with ``pyarrow`` in brackets e.g. ``"int64[pyarrow]"`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` +initialized with a ``pyarrow.DataType``. + +.. ipython:: python + + import pyarrow as pa + ser_float = pd.Series([1.0, 2.0, None], dtype="float32[pyarrow]") + ser_float + + list_of_int_type = pd.ArrowDtype(pa.list_(pa.int64())) + ser_list = pd.Series([[1, 2], [3, None]], dtype=list_of_int_type) + ser_list + + ser_list.take([1, 0]) + ser_float * 5 + ser_float.mean() + ser_float.dropna() + +Most operations are supported and have been implemented using `pyarrow compute `__ functions. +We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. + +.. warning:: + + This feature is experimental, and the API can change in a future release without warning. + +.. _whatsnew_150.enhancements.dataframe_interchange: + +DataFrame interchange protocol implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now implement the DataFrame interchange API spec. +See the full details on the API at https://data-apis.org/dataframe-protocol/latest/index.html + +The protocol consists of two parts: + +- New method :meth:`DataFrame.__dataframe__` which produces the interchange object. + It effectively "exports" the pandas dataframe as an interchange object so + any other library which has the protocol implemented can "import" that dataframe + without knowing anything about the producer except that it makes an interchange object. +- New function :func:`pandas.api.interchange.from_dataframe` which can take + an arbitrary interchange object from any conformant library and construct a + pandas DataFrame out of it. + +.. _whatsnew_150.enhancements.styler: + +Styler +^^^^^^ + +The most notable development is the new method :meth:`.Styler.concat` which +allows adding customised footer rows to visualise additional calculations on the data, +e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`) + +Additionally there is an alternative output method :meth:`.Styler.to_string`, +which allows using the Styler's formatting methods to create, for example, CSVs (:issue:`44502`). + +A new feature :meth:`.Styler.relabel_index` is also made available to provide full customisation of the display of +index or column headers (:issue:`47864`) + +Minor feature improvements are: + + - Adding the ability to render ``border`` and ``border-{side}`` CSS properties in Excel (:issue:`42276`) + - Making keyword arguments consist: :meth:`.Styler.highlight_null` now accepts ``color`` and deprecates ``null_color`` although this remains backwards compatible (:issue:`45907`) + +.. _whatsnew_150.enhancements.resample_group_keys: + +Control of index with ``group_keys`` in :meth:`DataFrame.resample` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The argument ``group_keys`` has been added to the method :meth:`DataFrame.resample`. +As with :meth:`DataFrame.groupby`, this argument controls the whether each group is added +to the index in the resample when :meth:`.Resampler.apply` is used. + +.. warning:: + Not specifying the ``group_keys`` argument will retain the + previous behavior and emit a warning if the result will change + by specifying ``group_keys=False``. In a future version + of pandas, not specifying ``group_keys`` will default to + the same behavior as ``group_keys=False``. + +.. ipython:: python + + df = pd.DataFrame( + {'a': range(6)}, + index=pd.date_range("2021-01-01", periods=6, freq="8H") + ) + df.resample("D", group_keys=True).apply(lambda x: x) + df.resample("D", group_keys=False).apply(lambda x: x) + +Previously, the resulting index would depend upon the values returned by ``apply``, +as seen in the following example. + +.. code-block:: ipython + + In [1]: # pandas 1.3 + In [2]: df.resample("D").apply(lambda x: x) + Out[2]: + a + 2021-01-01 00:00:00 0 + 2021-01-01 08:00:00 1 + 2021-01-01 16:00:00 2 + 2021-01-02 00:00:00 3 + 2021-01-02 08:00:00 4 + 2021-01-02 16:00:00 5 + + In [3]: df.resample("D").apply(lambda x: x.reset_index()) + Out[3]: + index a + 2021-01-01 0 2021-01-01 00:00:00 0 + 1 2021-01-01 08:00:00 1 + 2 2021-01-01 16:00:00 2 + 2021-01-02 0 2021-01-02 00:00:00 3 + 1 2021-01-02 08:00:00 4 + 2 2021-01-02 16:00:00 5 + +.. _whatsnew_150.enhancements.from_dummies: + +from_dummies +^^^^^^^^^^^^ + +Added new function :func:`~pandas.from_dummies` to convert a dummy coded :class:`DataFrame` into a categorical :class:`DataFrame`. + +.. ipython:: python + + import pandas as pd + + df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], + "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], + "col2_c": [0, 0, 1]}) + + pd.from_dummies(df, sep="_") + +.. _whatsnew_150.enhancements.orc: + +Writing to ORC files +^^^^^^^^^^^^^^^^^^^^ + +The new method :meth:`DataFrame.to_orc` allows writing to ORC files (:issue:`43864`). + +This functionality depends the `pyarrow `__ library. For more details, see :ref:`the IO docs on ORC `. + +.. warning:: + + * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow. + * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0. + * :func:`~pandas.DataFrame.to_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies `. + * For supported dtypes please refer to `supported ORC features in Arrow `__. + * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files. + +.. code-block:: python + + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.to_orc("./out.orc") + +.. _whatsnew_150.enhancements.tar: + +Reading directly from TAR archives +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +I/O methods like :func:`read_csv` or :meth:`DataFrame.to_json` now allow reading and writing +directly on TAR archives (:issue:`44787`). + +.. code-block:: python + + df = pd.read_csv("./movement.tar.gz") + # ... + df.to_csv("./out.tar.gz") + +This supports ``.tar``, ``.tar.gz``, ``.tar.bz`` and ``.tar.xz2`` archives. +The used compression method is inferred from the filename. +If the compression method cannot be inferred, use the ``compression`` argument: + +.. code-block:: python + + df = pd.read_csv(some_file_obj, compression={"method": "tar", "mode": "r:gz"}) # noqa F821 + +(``mode`` being one of ``tarfile.open``'s modes: https://docs.python.org/3/library/tarfile.html#tarfile.open) + + +.. _whatsnew_150.enhancements.read_xml_dtypes: + +read_xml now supports ``dtype``, ``converters``, and ``parse_dates`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns, +apply converter methods, and parse dates (:issue:`43567`). + +.. ipython:: python + + xml_dates = """ + + + square + 00360 + 4.0 + 2020-01-01 + + + circle + 00360 + + 2021-01-01 + + + triangle + 00180 + 3.0 + 2022-01-01 + + """ + + df = pd.read_xml( + xml_dates, + dtype={'sides': 'Int64'}, + converters={'degrees': str}, + parse_dates=['date'] + ) + df + df.dtypes + + +.. _whatsnew_150.enhancements.read_xml_iterparse: + +read_xml now supports large XML using ``iterparse`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For very large XML files that can range in hundreds of megabytes to gigabytes, :func:`pandas.read_xml` +now supports parsing such sizeable files using `lxml's iterparse`_ and `etree's iterparse`_ +which are memory-efficient methods to iterate through XML trees and extract specific elements +and attributes without holding entire tree in memory (:issue:`45442`). + +.. code-block:: ipython + + In [1]: df = pd.read_xml( + ... "/path/to/downloaded/enwikisource-latest-pages-articles.xml", + ... iterparse = {"page": ["title", "ns", "id"]}) + ... ) + df + Out[2]: + title ns id + 0 Gettysburg Address 0 21450 + 1 Main Page 0 42950 + 2 Declaration by United Nations 0 8435 + 3 Constitution of the United States of America 0 8435 + 4 Declaration of Independence (Israel) 0 17858 + ... ... ... ... + 3578760 Page:Black cat 1897 07 v2 n10.pdf/17 104 219649 + 3578761 Page:Black cat 1897 07 v2 n10.pdf/43 104 219649 + 3578762 Page:Black cat 1897 07 v2 n10.pdf/44 104 219649 + 3578763 The History of Tom Jones, a Foundling/Book IX 0 12084291 + 3578764 Page:Shakespeare of Stratford (1926) Yale.djvu/91 104 21450 + + [3578765 rows x 3 columns] + + +.. _`lxml's iterparse`: https://lxml.de/3.2/parsing.html#iterparse-and-iterwalk +.. _`etree's iterparse`: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + +.. _whatsnew_150.enhancements.copy_on_write: + +Copy on Write +^^^^^^^^^^^^^ + +A new feature ``copy_on_write`` was added (:issue:`46958`). Copy on write ensures that +any DataFrame or Series derived from another in any way always behaves as a copy. +Copy on write disallows updating any other object than the object the method +was applied to. + +Copy on write can be enabled through: + +.. code-block:: python + + pd.set_option("mode.copy_on_write", True) + pd.options.mode.copy_on_write = True + +Alternatively, copy on write can be enabled locally through: + +.. code-block:: python + + with pd.option_context("mode.copy_on_write", True): + ... + +Without copy on write, the parent :class:`DataFrame` is updated when updating a child +:class:`DataFrame` that was derived from this :class:`DataFrame`. + +.. ipython:: python + + df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1}) + view = df["foo"] + view.iloc[0] + df + +With copy on write enabled, df won't be updated anymore: + +.. ipython:: python + + with pd.option_context("mode.copy_on_write", True): + df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1}) + view = df["foo"] + view.iloc[0] + df + +A more detailed explanation can be found `here `_. + +.. _whatsnew_150.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :meth:`Series.map` now raises when ``arg`` is dict but ``na_action`` is not either ``None`` or ``'ignore'`` (:issue:`46588`) +- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`) +- :class:`.StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) +- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) +- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). +- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) +- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) +- :meth:`.DataFrameGroupBy.min`, :meth:`.SeriesGroupBy.min`, :meth:`.DataFrameGroupBy.max`, and :meth:`.SeriesGroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) +- :func:`read_csv` now supports ``defaultdict`` as a ``dtype`` parameter (:issue:`41574`) +- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`) +- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`) +- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) +- :class:`Series` and :class:`DataFrame` with :class:`IntegerDtype` now supports bitwise operations (:issue:`34463`) +- Add ``milliseconds`` field support for :class:`.DateOffset` (:issue:`43371`) +- :meth:`DataFrame.where` tries to maintain dtype of :class:`DataFrame` if fill value can be cast without loss of precision (:issue:`45582`) +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- :func:`concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`) +- :func:`concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) +- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.DataFrameGroupBy.var`, :meth:`.SeriesGroupBy.var`, :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std`, :meth:`.DataFrameGroupBy.sem`, :meth:`.SeriesGroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`) +- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`) +- Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) +- Added ``numeric_only`` argument to :meth:`.Resampler.sum`, :meth:`.Resampler.prod`, :meth:`.Resampler.min`, :meth:`.Resampler.max`, :meth:`.Resampler.first`, and :meth:`.Resampler.last` (:issue:`46442`) +- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) +- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) +- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) +- Add support for :meth:`.DataFrameGroupBy.ohlc` and :meth:`.SeriesGroupBy.ohlc` for extension array dtypes (:issue:`37493`) +- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) +- :func:`pandas.read_html` now supports extracting links from table cells (:issue:`13141`) +- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) +- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) +- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) +- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`) +- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) +- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) +- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) +- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_150.notable_bug_fixes.groupby_transform_dropna: + +Using ``dropna=True`` with ``groupby`` transforms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A transform is an operation whose result has the same size as its input. When the +result is a :class:`DataFrame` or :class:`Series`, it is also required that the +index of the result matches that of the input. In pandas 1.4, using +:meth:`.DataFrameGroupBy.transform` or :meth:`.SeriesGroupBy.transform` with null +values in the groups and ``dropna=True`` gave incorrect results. Demonstrated by the +examples below, the incorrect results either contained incorrect values, or the result +did not have the same index as the input. + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 1, np.nan], 'b': [2, 3, 4]}) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: # Value in the last row should be np.nan + df.groupby('a', dropna=True).transform('sum') + Out[3]: + b + 0 5 + 1 5 + 2 5 + + In [3]: # Should have one additional row with the value np.nan + df.groupby('a', dropna=True).transform(lambda x: x.sum()) + Out[3]: + b + 0 5 + 1 5 + + In [3]: # The value in the last row is np.nan interpreted as an integer + df.groupby('a', dropna=True).transform('ffill') + Out[3]: + b + 0 2 + 1 3 + 2 -9223372036854775808 + + In [3]: # Should have one additional row with the value np.nan + df.groupby('a', dropna=True).transform(lambda x: x) + Out[3]: + b + 0 2 + 1 3 + +*New behavior*: + +.. ipython:: python + + df.groupby('a', dropna=True).transform('sum') + df.groupby('a', dropna=True).transform(lambda x: x.sum()) + df.groupby('a', dropna=True).transform('ffill') + df.groupby('a', dropna=True).transform(lambda x: x) + +.. _whatsnew_150.notable_bug_fixes.to_json_incorrectly_localizing_naive_timestamps: + +Serializing tz-naive Timestamps with to_json() with ``iso_dates=True`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.to_json`, :meth:`Series.to_json`, and :meth:`Index.to_json` +would incorrectly localize DatetimeArrays/DatetimeIndexes with tz-naive Timestamps +to UTC. (:issue:`38760`) + +Note that this patch does not fix the localization of tz-aware Timestamps to UTC +upon serialization. (Related issue :issue:`12997`) + +*Old Behavior* + +.. ipython:: python + + index = pd.date_range( + start='2020-12-28 00:00:00', + end='2020-12-28 02:00:00', + freq='1H', + ) + a = pd.Series( + data=range(3), + index=index, + ) + +.. code-block:: ipython + + In [4]: a.to_json(date_format='iso') + Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}' + + In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index + Out[5]: array([False, False, False]) + +*New Behavior* + +.. ipython:: python + + a.to_json(date_format='iso') + # Roundtripping now works + pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index + +.. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical: + +DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Calling :meth:`.DataFrameGroupBy.value_counts` with ``observed=True`` would incorrectly drop non-observed categories of non-grouping columns (:issue:`46357`). + +.. code-block:: ipython + + In [6]: df = pd.DataFrame(["a", "b", "c"], dtype="category").iloc[0:2] + In [7]: df + Out[7]: + 0 + 0 a + 1 b + +*Old Behavior* + +.. code-block:: ipython + + In [8]: df.groupby(level=0, observed=True).value_counts() + Out[8]: + 0 a 1 + 1 b 1 + dtype: int64 + + +*New Behavior* + +.. code-block:: ipython + + In [9]: df.groupby(level=0, observed=True).value_counts() + Out[9]: + 0 a 1 + 1 a 0 + b 1 + 0 b 0 + c 0 + 1 c 0 + dtype: int64 + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_150.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Some minimum supported versions of dependencies were updated. +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.20.3 | X | X | ++-----------------+-----------------+----------+---------+ +| mypy (dev) | 0.971 | | X | ++-----------------+-----------------+----------+---------+ +| beautifulsoup4 | 4.9.3 | | X | ++-----------------+-----------------+----------+---------+ +| blosc | 1.21.0 | | X | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.3.2 | | X | ++-----------------+-----------------+----------+---------+ +| fsspec | 2021.07.0 | | X | ++-----------------+-----------------+----------+---------+ +| hypothesis | 6.13.0 | | X | ++-----------------+-----------------+----------+---------+ +| gcsfs | 2021.07.0 | | X | ++-----------------+-----------------+----------+---------+ +| jinja2 | 3.0.0 | | X | ++-----------------+-----------------+----------+---------+ +| lxml | 4.6.3 | | X | ++-----------------+-----------------+----------+---------+ +| numba | 0.53.1 | | X | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.7.3 | | X | ++-----------------+-----------------+----------+---------+ +| openpyxl | 3.0.7 | | X | ++-----------------+-----------------+----------+---------+ +| pandas-gbq | 0.15.0 | | X | ++-----------------+-----------------+----------+---------+ +| psycopg2 | 2.8.6 | | X | ++-----------------+-----------------+----------+---------+ +| pymysql | 1.0.2 | | X | ++-----------------+-----------------+----------+---------+ +| pyreadstat | 1.1.2 | | X | ++-----------------+-----------------+----------+---------+ +| pyxlsb | 1.0.8 | | X | ++-----------------+-----------------+----------+---------+ +| s3fs | 2021.08.0 | | X | ++-----------------+-----------------+----------+---------+ +| scipy | 1.7.1 | | X | ++-----------------+-----------------+----------+---------+ +| sqlalchemy | 1.4.16 | | X | ++-----------------+-----------------+----------+---------+ +| tabulate | 0.8.9 | | X | ++-----------------+-----------------+----------+---------+ +| xarray | 0.19.0 | | X | ++-----------------+-----------------+----------+---------+ +| xlsxwriter | 1.4.3 | | X | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 |4.9.3 | X | ++-----------------+-----------------+---------+ +| blosc |1.21.0 | X | ++-----------------+-----------------+---------+ +| bottleneck |1.3.2 | X | ++-----------------+-----------------+---------+ +| brotlipy |0.7.0 | | ++-----------------+-----------------+---------+ +| fastparquet |0.4.0 | | ++-----------------+-----------------+---------+ +| fsspec |2021.08.0 | X | ++-----------------+-----------------+---------+ +| html5lib |1.1 | | ++-----------------+-----------------+---------+ +| hypothesis |6.13.0 | X | ++-----------------+-----------------+---------+ +| gcsfs |2021.08.0 | X | ++-----------------+-----------------+---------+ +| jinja2 |3.0.0 | X | ++-----------------+-----------------+---------+ +| lxml |4.6.3 | X | ++-----------------+-----------------+---------+ +| matplotlib |3.3.2 | | ++-----------------+-----------------+---------+ +| numba |0.53.1 | X | ++-----------------+-----------------+---------+ +| numexpr |2.7.3 | X | ++-----------------+-----------------+---------+ +| odfpy |1.4.1 | | ++-----------------+-----------------+---------+ +| openpyxl |3.0.7 | X | ++-----------------+-----------------+---------+ +| pandas-gbq |0.15.0 | X | ++-----------------+-----------------+---------+ +| psycopg2 |2.8.6 | X | ++-----------------+-----------------+---------+ +| pyarrow |1.0.1 | | ++-----------------+-----------------+---------+ +| pymysql |1.0.2 | X | ++-----------------+-----------------+---------+ +| pyreadstat |1.1.2 | X | ++-----------------+-----------------+---------+ +| pytables |3.6.1 | | ++-----------------+-----------------+---------+ +| python-snappy |0.6.0 | | ++-----------------+-----------------+---------+ +| pyxlsb |1.0.8 | X | ++-----------------+-----------------+---------+ +| s3fs |2021.08.0 | X | ++-----------------+-----------------+---------+ +| scipy |1.7.1 | X | ++-----------------+-----------------+---------+ +| sqlalchemy |1.4.16 | X | ++-----------------+-----------------+---------+ +| tabulate |0.8.9 | X | ++-----------------+-----------------+---------+ +| tzdata |2022a | | ++-----------------+-----------------+---------+ +| xarray |0.19.0 | X | ++-----------------+-----------------+---------+ +| xlrd |2.0.1 | | ++-----------------+-----------------+---------+ +| xlsxwriter |1.4.3 | X | ++-----------------+-----------------+---------+ +| xlwt |1.3.0 | | ++-----------------+-----------------+---------+ +| zstandard |0.15.2 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_150.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- BigQuery I/O methods :func:`read_gbq` and :meth:`DataFrame.to_gbq` default to + ``auth_local_webserver = True``. Google has deprecated the + ``auth_local_webserver = False`` `"out of band" (copy-paste) flow + `_. + The ``auth_local_webserver = False`` option is planned to stop working in + October 2022. (:issue:`46312`) +- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`) +- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`) +- When :func:`read_sas` previously returned ``None``, it now returns an empty :class:`DataFrame` (:issue:`47410`) +- :class:`DataFrame` constructor raises if ``index`` or ``columns`` arguments are sets (:issue:`47215`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. warning:: + + In the next major version release, 2.0, several larger API changes are being considered without a formal deprecation such as + making the standard library `zoneinfo `_ the default timezone implementation instead of ``pytz``, + having the :class:`Index` support all data types instead of having multiple subclasses (:class:`CategoricalIndex`, :class:`Int64Index`, etc.), and more. + The changes under consideration are logged in `this GitHub issue `_, and any + feedback or concerns are welcome. + +.. _whatsnew_150.deprecations.int_slicing_series: + +Label-based integer slicing on a Series with an Int64Index or RangeIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`). + +For example: + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11]) + +In the old behavior, ``ser[2:4]`` treats the slice as positional: + +*Old behavior*: + +.. code-block:: ipython + + In [3]: ser[2:4] + Out[3]: + 5 3 + 7 4 + dtype: int64 + +In a future version, this will be treated as label-based: + +*Future behavior*: + +.. code-block:: ipython + + In [4]: ser.loc[2:4] + Out[4]: + 2 1 + 3 2 + dtype: int64 + +To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior, +use ``series.loc[i:j]``. + +Slicing on a :class:`DataFrame` will not be affected. + +.. _whatsnew_150.deprecations.excel_writer_attributes: + +:class:`ExcelWriter` attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All attributes of :class:`ExcelWriter` were previously documented as not +public. However some third party Excel engines documented accessing +``ExcelWriter.book`` or ``ExcelWriter.sheets``, and users were utilizing these +and possibly other attributes. Previously these attributes were not safe to use; +e.g. modifications to ``ExcelWriter.book`` would not update ``ExcelWriter.sheets`` +and conversely. In order to support this, pandas has made some attributes public +and improved their implementations so that they may now be safely used. (:issue:`45572`) + +The following attributes are now public and considered safe to access. + + - ``book`` + - ``check_extension`` + - ``close`` + - ``date_format`` + - ``datetime_format`` + - ``engine`` + - ``if_sheet_exists`` + - ``sheets`` + - ``supported_extensions`` + +The following attributes have been deprecated. They now raise a ``FutureWarning`` +when accessed and will be removed in a future version. Users should be aware +that their usage is considered unsafe, and can lead to unexpected results. + + - ``cur_sheet`` + - ``handles`` + - ``path`` + - ``save`` + - ``write_cells`` + +See the documentation of :class:`ExcelWriter` for further details. + +.. _whatsnew_150.deprecations.group_keys_in_apply: + +Using ``group_keys`` with transformers in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, if it was inferred that the function passed to +:meth:`.DataFrameGroupBy.apply` or :meth:`.SeriesGroupBy.apply` was a transformer (i.e. the resulting index was equal to +the input index), the ``group_keys`` argument of :meth:`DataFrame.groupby` and +:meth:`Series.groupby` was ignored and the group keys would never be added to +the index of the result. In the future, the group keys will be added to the index +when the user specifies ``group_keys=True``. + +As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and +:meth:`Series.groupby`, not specifying ``group_keys`` with a transformer will +raise a ``FutureWarning``. This can be silenced and the previous behavior +retained by specifying ``group_keys=False``. + +.. _whatsnew_150.deprecations.setitem_column_try_inplace: + _ see also _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace + +Inplace operation when setting values with ``loc`` and ``iloc`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Most of the time setting values with :meth:`DataFrame.iloc` attempts to set values +inplace, only falling back to inserting a new array if necessary. There are +some cases where this rule is not followed, for example when setting an entire +column from an array with different dtype: + +.. ipython:: python + + df = pd.DataFrame({'price': [11.1, 12.2]}, index=['book1', 'book2']) + original_prices = df['price'] + new_prices = np.array([98, 99]) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = new_prices + In [4]: df.iloc[:, 0] + Out[4]: + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: price, float: 64 + +This behavior is deprecated. In a future version, setting an entire column with +iloc will attempt to operate inplace. + +*Future behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = new_prices + In [4]: df.iloc[:, 0] + Out[4]: + book1 98.0 + book2 99.0 + Name: price, dtype: float64 + In [5]: original_prices + Out[5]: + book1 98.0 + book2 99.0 + Name: price, dtype: float64 + +To get the old behavior, use :meth:`DataFrame.__setitem__` directly: + +.. code-block:: ipython + + In [3]: df[df.columns[0]] = new_prices + In [4]: df.iloc[:, 0] + Out[4] + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: price, dtype: float64 + +To get the old behaviour when ``df.columns`` is not unique and you want to +change a single column by index, you can use :meth:`DataFrame.isetitem`, which +has been added in pandas 1.5: + +.. code-block:: ipython + + In [3]: df_with_duplicated_cols = pd.concat([df, df], axis='columns') + In [3]: df_with_duplicated_cols.isetitem(0, new_prices) + In [4]: df_with_duplicated_cols.iloc[:, 0] + Out[4]: + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: 0, dtype: float64 + +.. _whatsnew_150.deprecations.numeric_only_default: + +``numeric_only`` default value +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as +``min``, ``sum``, and ``idxmax``, the default +value of the ``numeric_only`` argument, if it exists at all, was inconsistent. +Furthermore, operations with the default value ``None`` can lead to surprising +results. (:issue:`46560`) + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]}) + + In [2]: # Reading the next line without knowing the contents of df, one would + # expect the result to contain the products for both columns a and b. + df[["a", "b"]].prod() + Out[2]: + a 2 + dtype: int64 + +To avoid this behavior, the specifying the value ``numeric_only=None`` has been +deprecated, and will be removed in a future version of pandas. In the future, +all operations with a ``numeric_only`` argument will default to ``False``. Users +should either call the operation only with columns that can be operated on, or +specify ``numeric_only=True`` to operate only on Boolean, integer, and float columns. + +In order to support the transition to the new behavior, the following methods have +gained the ``numeric_only`` argument. + +- :meth:`DataFrame.corr` +- :meth:`DataFrame.corrwith` +- :meth:`DataFrame.cov` +- :meth:`DataFrame.idxmin` +- :meth:`DataFrame.idxmax` +- :meth:`.DataFrameGroupBy.cummin` +- :meth:`.DataFrameGroupBy.cummax` +- :meth:`.DataFrameGroupBy.idxmin` +- :meth:`.DataFrameGroupBy.idxmax` +- :meth:`.DataFrameGroupBy.var` +- :meth:`.DataFrameGroupBy.std` +- :meth:`.DataFrameGroupBy.sem` +- :meth:`.DataFrameGroupBy.quantile` +- :meth:`.Resampler.mean` +- :meth:`.Resampler.median` +- :meth:`.Resampler.sem` +- :meth:`.Resampler.std` +- :meth:`.Resampler.var` +- :meth:`DataFrame.rolling` operations +- :meth:`DataFrame.expanding` operations +- :meth:`DataFrame.ewm` operations + +.. _whatsnew_150.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ +- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`) +- Deprecated behavior of :meth:`SparseArray.astype`, :meth:`Series.astype`, and :meth:`DataFrame.astype` with :class:`SparseDtype` when passing a non-sparse ``dtype``. In a future version, this will cast to that non-sparse dtype instead of wrapping it in a :class:`SparseDtype` (:issue:`34457`) +- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed time zones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`) +- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) +- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) +- Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`) +- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) +- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`) +- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`) +- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) +- Deprecated allowing non-keyword arguments in :meth:`.ExtensionArray.argsort` (:issue:`46134`) +- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) +- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). +- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`) +- Deprecated :attr:`Timedelta.delta` (:issue:`46476`) +- Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`) +- Deprecated passing positional arguments to :meth:`DataFrame.pivot` and :func:`pivot` except ``data`` (:issue:`30228`) +- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) +- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`) +- Deprecated positional arguments to :meth:`StringMethods.rsplit` and :meth:`StringMethods.split` except for ``pat``, use keyword-only arguments instead of positional arguments (:issue:`47423`) +- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`) +- Deprecated allowing ``unit="M"`` or ``unit="Y"`` in :class:`Timestamp` constructor with a non-round float value (:issue:`47267`) +- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`) +- Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`) +- Deprecated :meth:`.DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`) +- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`) +- Emit warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument even for dates where leading zero is omitted (e.g. ``31/1/2001``) (:issue:`47880`) +- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) +- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) +- Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) +- Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) +- Deprecated the ``inplace`` keyword in :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, and :meth:`Categorical.as_unordered` (:issue:`37643`) +- Deprecated setting a categorical's categories with ``cat.categories = ['a', 'b', 'c']``, use :meth:`Categorical.rename_categories` instead (:issue:`37643`) +- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`) +- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`) +- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) +- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`) +- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`). +- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`). +- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`) +- Deprecated allowing ``dtype='datetime64'`` or ``dtype=np.datetime64`` in :meth:`Series.astype`, use "datetime64[ns]" instead (:issue:`47844`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`DataFrame.corrwith` for column-wise (axis=0) Pearson and Spearman correlation when other is a :class:`Series` (:issue:`46174`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`) +- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`) +- Performance improvement in :meth:`.DataFrameGroupBy.diff` and :meth:`.SeriesGroupBy.diff` (:issue:`16706`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`) +- Performance improvement in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`) +- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`) +- Performance improvement in :meth:`.DataFrameGroupBy.var` and :meth:`.SeriesGroupBy.var` with ``ddof`` other than one (:issue:`48152`) +- Performance improvement in :meth:`DataFrame.to_records` when the index is a :class:`MultiIndex` (:issue:`47263`) +- Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`) +- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`) +- Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`) +- Performance improvement in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when target is a :class:`MultiIndex` (:issue:`46235`) +- Performance improvement when setting values in a pyarrow backed string array (:issue:`46400`) +- Performance improvement in :func:`factorize` (:issue:`46109`) +- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`) +- Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`) +- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`) +- Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`) +- Performance improvement in :class:`BusinessHour` ``str`` and ``repr`` (:issue:`44764`) +- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) +- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) +- Performance improvement to :func:`read_sas` (:issue:`47404`) +- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`) +- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) +- Bug in :meth:`concat` when concatenating two (or more) unordered :class:`CategoricalIndex` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`) + +Datetimelike +^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.quantile` with datetime-like dtypes and no rows incorrectly returning ``float64`` dtype instead of retaining datetime-like dtype (:issue:`41544`) +- Bug in :func:`to_datetime` with sequences of ``np.str_`` objects incorrectly raising (:issue:`32264`) +- Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`) +- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) +- Bug in :meth:`.SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) +- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) +- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) +- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) +- Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) +- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) +- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) + +Timedelta +^^^^^^^^^ +- Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`) +- Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`) +- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`) + +Time Zones +^^^^^^^^^^ +- Bug in :class:`Timestamp` constructor raising when passed a ``ZoneInfo`` tzinfo object (:issue:`46425`) + +Numeric +^^^^^^^ +- Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) +- Bug in arithmetic operations with nullable types without :attr:`NA` values not matching the same operation with non-nullable types (:issue:`48223`) +- Bug in ``floordiv`` when dividing by ``IntegerDtype`` ``0`` would return ``0`` instead of ``inf`` (:issue:`48223`) +- Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`) +- Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`) +- Bug in :meth:`mean` where the optional dependency ``bottleneck`` causes precision loss linear in the length of the array. ``bottleneck`` has been disabled for :meth:`mean` improving the loss to log-linear but may result in a performance decrease. (:issue:`42878`) + +Conversion +^^^^^^^^^^ +- Bug in :meth:`DataFrame.astype` not preserving subclasses (:issue:`40810`) +- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) +- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) +- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) +- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) +- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`) +- Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`) +- Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`) +- Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`) +- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`) +- Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`) + +Strings +^^^^^^^ +- Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) +- Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) + +Interval +^^^^^^^^ +- Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`) +- Bug in :class:`IntervalDtype` when using datetime64[ns, tz] as a dtype string (:issue:`46999`) + +Indexing +^^^^^^^^ +- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) +- Bug in :meth:`DataFrame.__getitem__` returning copy when :class:`DataFrame` has duplicated columns even if a unique column is selected (:issue:`45316`, :issue:`41062`) +- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`) +- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) +- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`) +- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`) +- Bug in :meth:`DataFrame.loc` not casting ``None`` to ``NA`` when setting value as a list into :class:`DataFrame` (:issue:`47987`) +- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`) +- Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`) +- Bug in :func:`isin` upcasting to ``float64`` with unsigned integer dtype and list-like argument without a dtype (:issue:`46485`) +- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) +- Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`) +- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) +- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) +- Bug in :meth:`DataFrame.__setitem__` casting extension array dtypes to object when setting with a scalar key and :class:`DataFrame` as value (:issue:`46896`) +- Bug in :meth:`Series.__setitem__` when setting a scalar to a nullable pandas dtype would not raise a ``TypeError`` if the scalar could not be cast (losslessly) to the nullable type (:issue:`45404`) +- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) +- Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`) +- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`) +- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`) +- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`) +- Bug in :meth:`Series.__setitem__` losing precision when enlarging :class:`Series` with scalar (:issue:`32346`) +- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) +- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) +- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) +- Bug in :meth:`DataFrame.__getattribute__` raising ``AttributeError`` if columns have ``"string"`` dtype (:issue:`46185`) +- Bug in :meth:`DataFrame.compare` returning all ``NaN`` column when comparing extension array dtype and numpy dtype (:issue:`44014`) +- Bug in :meth:`DataFrame.where` setting wrong values with ``"boolean"`` mask for numpy dtype (:issue:`44014`) +- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) +- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) +- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) +- Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) +- Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) +- Bug in :meth:`DataFrame.sum` min_count changes dtype if input contains NaNs (:issue:`46947`) +- Bug in :class:`IntervalTree` that lead to an infinite recursion. (:issue:`46658`) +- Bug in :class:`PeriodIndex` raising ``AttributeError`` when indexing on ``NA``, rather than putting ``NaT`` in its place. (:issue:`46673`) +- Bug in :meth:`DataFrame.at` would allow the modification of multiple columns (:issue:`48296`) + +Missing +^^^^^^^ +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`) +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with :class:`IntervalDtype` and incompatible value raising instead of casting to a common (usually object) dtype (:issue:`45796`) +- Bug in :meth:`Series.map` not respecting ``na_action`` argument if mapper is a ``dict`` or :class:`Series` (:issue:`47527`) +- Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`) +- Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`) +- Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`) + +MultiIndex +^^^^^^^^^^ +- Bug in :meth:`DataFrame.loc` returning empty result when slicing a :class:`MultiIndex` with a negative step size and non-null start/stop values (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size other than -1 (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size and slicing a non-int labeled index level (:issue:`46156`) +- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) +- Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`) +- Bug in :meth:`MultiIndex.from_tuples` cannot construct Index of empty tuples (:issue:`45608`) + +I/O +^^^ +- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`) +- Bug in :func:`read_excel` results in an infinite loop with certain ``skiprows`` callables (:issue:`45585`) +- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`) +- Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`) +- Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`) +- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`) +- Bug in :func:`read_csv` interpreting second row as :class:`Index` names even when ``index_col=False`` (:issue:`46569`) +- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) +- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`) +- Bug in :func:`read_csv` ignoring non-existing header row for ``engine="python"`` (:issue:`47400`) +- Bug in :func:`read_excel` raising uncontrolled ``IndexError`` when ``header`` references non-existing rows (:issue:`43143`) +- Bug in :func:`read_html` where elements surrounding ``
`` were joined without a space between them (:issue:`29528`) +- Bug in :func:`read_csv` when data is longer than header leading to issues with callables in ``usecols`` expecting strings (:issue:`46997`) +- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) +- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) +- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) +- :meth:`DataFrame.to_html` now excludes the ``border`` attribute from ```` elements when ``border`` keyword is set to ``False``. +- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`) +- Bug in :func:`read_excel` not forward filling :class:`MultiIndex` when no names were given (:issue:`47487`) +- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`) +- Bug in :meth:`DataFrame.to_string` using wrong missing value with extension arrays in :class:`MultiIndex` (:issue:`47986`) +- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`) +- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`) +- Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`) +- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x40 control bytes (:issue:`31243`) +- Bug in :func:`read_sas` that scrambled column names (:issue:`31243`) +- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`) +- Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`) +- Bug in :meth:`DataFrame.to_json` where ``PeriodDtype`` would not make the serialization roundtrip when read back with :meth:`read_json` (:issue:`44720`) +- Bug in :func:`read_xml` when reading XML files with Chinese character tags and would raise ``XMLSyntaxError`` (:issue:`47902`) + +Period +^^^^^^ +- Bug in subtraction of :class:`Period` from :class:`.PeriodArray` returning wrong results (:issue:`45999`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) +- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`) +- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`) +- Bug in adding ``np.timedelta64("NaT", "ns")`` to a :class:`Period` with a timedelta-like freq incorrectly raising ``IncompatibleFrequency`` instead of returning ``NaT`` (:issue:`47196`) +- Bug in adding an array of integers to an array with :class:`PeriodDtype` giving incorrect results when ``dtype.freq.n > 1`` (:issue:`47209`) +- Bug in subtracting a :class:`Period` from an array with :class:`PeriodDtype` returning incorrect results instead of raising ``OverflowError`` when the operation overflows (:issue:`47538`) + +Plotting +^^^^^^^^ +- Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`) +- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`) +- Bug in :meth:`DataFrame.plot.scatter` that prevented specifying ``norm`` (:issue:`45809`) +- Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`) +- Bug in :meth:`DataFrame.plot` that led to xticks and vertical grids being improperly placed when plotting a quarterly series (:issue:`47602`) +- Bug in :meth:`DataFrame.plot` that prevented setting y-axis label, limits and ticks for a secondary y-axis (:issue:`47753`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`) +- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``func="size"`` and the input DataFrame has multiple columns (:issue:`27469`) +- Bug in :meth:`.DataFrameGroupBy.size` and :meth:`.DataFrameGroupBy.transform` with ``func="size"`` produced incorrect results when ``axis=1`` (:issue:`45715`) +- Bug in :meth:`.ExponentialMovingWindow.mean` with ``axis=1`` and ``engine='numba'`` when the :class:`DataFrame` has more columns than rows (:issue:`46086`) +- Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`) +- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) +- Bug in :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.sum`, :meth:`.DataFrameGroupBy.prod`, :meth:`.SeriesGroupBy.prod, :meth:`.DataFrameGroupBy.cumsum`, and :meth:`.SeriesGroupBy.cumsum` with integer dtypes losing precision (:issue:`37493`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.SeriesGroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.SeriesGroupBy.cumsum` with integer dtypes causing overflows when sum was bigger than maximum of dtype (:issue:`37493`) +- Bug in :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) +- Bug in :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) +- Bug in :meth:`.DataFrameGroupBy.cumprod` and :meth:`.SeriesGroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`) +- Bug in :meth:`.DataFrameGroupBy.max` and :meth:`.SeriesGroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) +- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`) +- Bug in :meth:`.Rolling.sum` and :meth:`.Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`) +- Bug in :meth:`.Rolling.var` and :meth:`.Rolling.std` would give non-zero result with window of same values (:issue:`42064`) +- Bug in :meth:`.Rolling.skew` and :meth:`.Rolling.kurt` would give NaN with window of same values (:issue:`30993`) +- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`) +- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) +- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) +- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`) +- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would not respect ``dropna=False`` when the input DataFrame/Series had a NaN values in a :class:`MultiIndex` (:issue:`46783`) +- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list which misses the resample key (:issue:`47362`) +- Bug in :meth:`DataFrame.groupby` would lose index columns when the DataFrame is empty for transforms, like fillna (:issue:`47787`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` with ``dropna=False`` and ``sort=False`` would put any null groups at the end instead the order that they are encountered (:issue:`46584`) + +Reshaping +^^^^^^^^^ +- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) +- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) +- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) +- Bug in concatenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) +- Bug in :func:`concat` losing dtype of columns when ``join="outer"`` and ``sort=True`` (:issue:`47329`) +- Bug in :func:`concat` not sorting the column names when ``None`` is included (:issue:`47331`) +- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) +- Bug in :func:`pivot_table` raising ``TypeError`` when ``dropna=True`` and aggregation column has extension array dtype (:issue:`47477`) +- Bug in :func:`merge` raising error for ``how="cross"`` when using ``FIPS`` mode in ssl library (:issue:`48024`) +- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) +- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`) +- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`) +- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`) +- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`) + +Sparse +^^^^^^ +- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``SparseDtype`` failing to retain the array's ``fill_value`` (:issue:`45691`) +- Bug in :meth:`SparseArray.unique` fails to keep original elements order (:issue:`47809`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :meth:`IntegerArray.searchsorted` and :meth:`FloatingArray.searchsorted` returning inconsistent results when acting on ``np.nan`` (:issue:`45255`) + +Styler +^^^^^^ +- Bug when attempting to apply styling functions to an empty DataFrame subset (:issue:`45313`) +- Bug in :class:`CSSToExcelConverter` leading to ``TypeError`` when border color provided without border style for ``xlsxwriter`` engine (:issue:`42276`) +- Bug in :meth:`Styler.set_sticky` leading to white text on white background in dark mode (:issue:`46984`) +- Bug in :meth:`Styler.to_latex` causing ``UnboundLocalError`` when ``clines="all;data"`` and the ``DataFrame`` has no rows. (:issue:`47203`) +- Bug in :meth:`Styler.to_excel` when using ``vertical-align: middle;`` with ``xlsxwriter`` engine (:issue:`30107`) +- Bug when applying styles to a DataFrame with boolean column labels (:issue:`47838`) + +Metadata +^^^^^^^^ +- Fixed metadata propagation in :meth:`DataFrame.melt` (:issue:`28283`) +- Fixed metadata propagation in :meth:`DataFrame.explode` (:issue:`28283`) + +Other +^^^^^ + +.. ***DO NOT USE THIS SECTION*** + +- Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.4..v1.5.0 diff --git a/libraries/pandas/releasenotes/v1.5.1.rst b/libraries/pandas/releasenotes/v1.5.1.rst new file mode 100644 index 0000000..bcd8ddb --- /dev/null +++ b/libraries/pandas/releasenotes/v1.5.1.rst @@ -0,0 +1,122 @@ +.. _whatsnew_151: + +What's new in 1.5.1 (October 19, 2022) +-------------------------------------- + +These are the changes in pandas 1.5.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.groupby_categorical_regr: + +Behavior of ``groupby`` with categorical groupers (:issue:`48645`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In versions of pandas prior to 1.5, ``groupby`` with ``dropna=False`` would still drop +NA values when the grouper was a categorical dtype. A fix for this was attempted in +1.5, however it introduced a regression where passing ``observed=False`` and +``dropna=False`` to ``groupby`` would result in only observed categories. It was found +that the patch fixing the ``dropna=False`` bug is incompatible with ``observed=False``, +and decided that the best resolution is to restore the correct ``observed=False`` +behavior at the cost of reintroducing the ``dropna=False`` bug. + +.. ipython:: python + + df = pd.DataFrame( + { + "x": pd.Categorical([1, None], categories=[1, 2, 3]), + "y": [3, 4], + } + ) + df + +*1.5.0 behavior*: + +.. code-block:: ipython + + In [3]: # Correct behavior, NA values are not dropped + df.groupby("x", observed=True, dropna=False).sum() + Out[3]: + y + x + 1 3 + NaN 4 + + + In [4]: # Incorrect behavior, only observed categories present + df.groupby("x", observed=False, dropna=False).sum() + Out[4]: + y + x + 1 3 + NaN 4 + + +*1.5.1 behavior*: + +.. ipython:: python + + # Incorrect behavior, NA values are dropped + df.groupby("x", observed=True, dropna=False).sum() + + # Correct behavior, unobserved categories present (NA values still dropped) + df.groupby("x", observed=False, dropna=False).sum() + +.. _whatsnew_151.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed Regression in :meth:`Series.__setitem__` casting ``None`` to ``NaN`` for object dtype (:issue:`48665`) +- Fixed Regression in :meth:`DataFrame.loc` when setting values as a :class:`DataFrame` with all ``True`` indexer (:issue:`48701`) +- Regression in :func:`.read_csv` causing an ``EmptyDataError`` when using an UTF-8 file handle that was already read from (:issue:`48646`) +- Regression in :func:`to_datetime` when ``utc=True`` and ``arg`` contained timezone naive and aware arguments raised a ``ValueError`` (:issue:`48678`) +- Fixed regression in :meth:`DataFrame.loc` raising ``FutureWarning`` when setting an empty :class:`DataFrame` (:issue:`48480`) +- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`) +- Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`) +- Fixed regression in :meth:`MultiIndex.values` resetting ``freq`` attribute of underlying :class:`Index` object (:issue:`49054`) +- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) +- Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`) +- Fixed regression in :func:`to_datetime` when ``arg`` was a date string with nanosecond and ``format`` contained ``%f`` would raise a ``ValueError`` (:issue:`48767`) +- Fixed regression in :func:`testing.assert_frame_equal` raising for :class:`MultiIndex` with :class:`Categorical` and ``check_like=True`` (:issue:`48975`) +- Fixed regression in :meth:`DataFrame.fillna` replacing wrong values for ``datetime64[ns]`` dtype and ``inplace=True`` (:issue:`48863`) +- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`) +- Fixed Regression in :meth:`.DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) +- Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`) +- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`) +- Fixed performance regression in :meth:`Series.isin` with mismatching dtypes (:issue:`49162`) +- Fixed regression in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`) +- Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`) +- Fixed regression in :meth:`DataFrame.corrwith` when computing correlation on tied data with ``method="spearman"`` (:issue:`48826`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :meth:`Series.__getitem__` not falling back to positional for integer keys and boolean :class:`Index` (:issue:`48653`) +- Bug in :meth:`DataFrame.to_hdf` raising ``AssertionError`` with boolean index (:issue:`48667`) +- Bug in :func:`testing.assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`) +- Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`) +- Bug in :meth:`DataFrame.sort_values` emitting unnecessary ``FutureWarning`` when called on :class:`DataFrame` with boolean sparse columns (:issue:`48784`) +- Bug in :class:`.arrays.ArrowExtensionArray` with a comparison operator to an invalid object would not raise a ``NotImplementedError`` (:issue:`48833`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.other: + +Other +~~~~~ +- Avoid showing deprecated signatures when introspecting functions with warnings about arguments becoming keyword-only (:issue:`48692`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.0..v1.5.1 diff --git a/libraries/pandas/releasenotes/v1.5.2.rst b/libraries/pandas/releasenotes/v1.5.2.rst new file mode 100644 index 0000000..efc8ca6 --- /dev/null +++ b/libraries/pandas/releasenotes/v1.5.2.rst @@ -0,0 +1,46 @@ +.. _whatsnew_152: + +What's new in 1.5.2 (November 21, 2022) +--------------------------------------- + +These are the changes in pandas 1.5.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`) +- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) +- Fixed regression in arithmetic operations for :class:`DataFrame` with :class:`MultiIndex` columns with different dtypes (:issue:`49769`) +- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance + from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) +- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`) +- Fixed performance regression in groupby operations (:issue:`49676`) +- Fixed regression in :class:`Timedelta` constructor returning object of wrong type when subclassing ``Timedelta`` (:issue:`49579`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`) +- Fixed memory leak in :meth:`.Styler.to_excel` (:issue:`49751`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.other: + +Other +~~~~~ +- Reverted ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.1..v1.5.2 diff --git a/libraries/pandas/releasenotes/v1.5.3.rst b/libraries/pandas/releasenotes/v1.5.3.rst new file mode 100644 index 0000000..5358d45 --- /dev/null +++ b/libraries/pandas/releasenotes/v1.5.3.rst @@ -0,0 +1,58 @@ +.. _whatsnew_153: + +What's new in 1.5.3 (January 18, 2023) +-------------------------------------- + +These are the changes in pandas 1.5.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed performance regression in :meth:`Series.isin` when ``values`` is empty (:issue:`49839`) +- Fixed regression in :meth:`DataFrame.memory_usage` showing unnecessary ``FutureWarning`` when :class:`DataFrame` is empty (:issue:`50066`) +- Fixed regression in :meth:`.DataFrameGroupBy.transform` when used with ``as_index=False`` (:issue:`49834`) +- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) +- Fixed regression in :meth:`.SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`) +- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`) +- Fixed regression in :func:`to_datetime` raising ``ValueError`` when parsing array of ``float`` containing ``np.nan`` (:issue:`50237`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views when indexing a :class:`DataFrame` with another :class:`DataFrame` (:issue:`50630`) +- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) +- Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`) +- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) +- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) +- Bug in :func:`pivot_table` with NumPy 1.24 or greater when the :class:`DataFrame` columns has nested elements (:issue:`50342`) +- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.other: + +Other +~~~~~ + +.. note:: + + If you are using :meth:`DataFrame.to_sql`, :func:`read_sql`, :func:`read_sql_table`, or :func:`read_sql_query` with SQLAlchemy 1.4.46 or greater, + you may see a ``sqlalchemy.exc.RemovedIn20Warning``. These warnings can be safely ignored for the SQLAlchemy 1.4.x releases + as pandas works toward compatibility with SQLAlchemy 2.0. + +- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`) +- A ``FutureWarning`` raised when attempting to set values inplace with :meth:`DataFrame.loc` or :meth:`DataFrame.iloc` has been changed to a ``DeprecationWarning`` (:issue:`48673`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.2..v1.5.3 diff --git a/libraries/pandas/releasenotes/v2.0.0.rst b/libraries/pandas/releasenotes/v2.0.0.rst new file mode 100644 index 0000000..3ddc8b8 --- /dev/null +++ b/libraries/pandas/releasenotes/v2.0.0.rst @@ -0,0 +1,1416 @@ +.. _whatsnew_200: + +What's new in 2.0.0 (April 3, 2023) +----------------------------------- + +These are the changes in pandas 2.0.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_200.enhancements.optional_dependency_management_pip: + +Installing optional dependencies with pip extras +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +When installing pandas using pip, sets of optional dependencies can also be installed by specifying extras. + +.. code-block:: bash + + pip install "pandas[performance, aws]>=2.0.0" + +The available extras, found in the :ref:`installation guide`, are +``[all, performance, computation, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql, +sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`). + +.. _whatsnew_200.enhancements.index_can_hold_numpy_numeric_dtypes: + +:class:`Index` can now hold numpy numeric dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is now possible to use any numpy numeric dtype in a :class:`Index` (:issue:`42717`). + +Previously it was only possible to use ``int64``, ``uint64`` & ``float64`` dtypes: + +.. code-block:: ipython + + In [1]: pd.Index([1, 2, 3], dtype=np.int8) + Out[1]: Int64Index([1, 2, 3], dtype="int64") + In [2]: pd.Index([1, 2, 3], dtype=np.uint16) + Out[2]: UInt64Index([1, 2, 3], dtype="uint64") + In [3]: pd.Index([1, 2, 3], dtype=np.float32) + Out[3]: Float64Index([1.0, 2.0, 3.0], dtype="float64") + +:class:`Int64Index`, :class:`UInt64Index` & :class:`Float64Index` were deprecated in pandas +version 1.4 and have now been removed. Instead :class:`Index` should be used directly, and +can it now take all numpy numeric dtypes, i.e. +``int8``/ ``int16``/``int32``/``int64``/``uint8``/``uint16``/``uint32``/``uint64``/``float32``/``float64`` dtypes: + +.. ipython:: python + + pd.Index([1, 2, 3], dtype=np.int8) + pd.Index([1, 2, 3], dtype=np.uint16) + pd.Index([1, 2, 3], dtype=np.float32) + +The ability for :class:`Index` to hold the numpy numeric dtypes has meant some changes in Pandas +functionality. In particular, operations that previously were forced to create 64-bit indexes, +can now create indexes with lower bit sizes, e.g. 32-bit indexes. + +Below is a possibly non-exhaustive list of changes: + +1. Instantiating using a numpy numeric array now follows the dtype of the numpy array. + Previously, all indexes created from numpy numeric arrays were forced to 64-bit. Now, + for example, ``Index(np.array([1, 2, 3]))`` will be ``int32`` on 32-bit systems, where + it previously would have been ``int64`` even on 32-bit systems. + Instantiating :class:`Index` using a list of numbers will still return 64bit dtypes, + e.g. ``Index([1, 2, 3])`` will have a ``int64`` dtype, which is the same as previously. +2. The various numeric datetime attributes of :class:`DatetimeIndex` (:attr:`~DatetimeIndex.day`, + :attr:`~DatetimeIndex.month`, :attr:`~DatetimeIndex.year` etc.) were previously in of + dtype ``int64``, while they were ``int32`` for :class:`arrays.DatetimeArray`. They are now + ``int32`` on :class:`DatetimeIndex` also: + + .. ipython:: python + + idx = pd.date_range(start='1/1/2018', periods=3, freq='M') + idx.array.year + idx.year + +3. Level dtypes on Indexes from :meth:`Series.sparse.from_coo` are now of dtype ``int32``, + the same as they are on the ``rows``/``cols`` on a scipy sparse matrix. Previously they + were of dtype ``int64``. + + .. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix( + ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4) + ) + ser = pd.Series.sparse.from_coo(A) + ser.index.dtypes + +4. :class:`Index` cannot be instantiated using a float16 dtype. Previously instantiating + an :class:`Index` using dtype ``float16`` resulted in a :class:`Float64Index` with a + ``float64`` dtype. It now raises a ``NotImplementedError``: + + .. ipython:: python + :okexcept: + + pd.Index([1, 2, 3], dtype=np.float16) + + +.. _whatsnew_200.enhancements.io_dtype_backend: + +Argument ``dtype_backend``, to return pyarrow-backed or numpy-backed nullable dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following functions gained a new keyword ``dtype_backend`` (:issue:`36712`) + +* :func:`read_csv` +* :func:`read_clipboard` +* :func:`read_fwf` +* :func:`read_excel` +* :func:`read_html` +* :func:`read_xml` +* :func:`read_json` +* :func:`read_sql` +* :func:`read_sql_query` +* :func:`read_sql_table` +* :func:`read_parquet` +* :func:`read_orc` +* :func:`read_feather` +* :func:`read_spss` +* :func:`to_numeric` +* :meth:`DataFrame.convert_dtypes` +* :meth:`Series.convert_dtypes` + +When this option is set to ``"numpy_nullable"`` it will return a :class:`DataFrame` that is +backed by nullable dtypes. + +When this keyword is set to ``"pyarrow"``, then these functions will return pyarrow-backed nullable :class:`ArrowDtype` DataFrames (:issue:`48957`, :issue:`49997`): + +* :func:`read_csv` +* :func:`read_clipboard` +* :func:`read_fwf` +* :func:`read_excel` +* :func:`read_html` +* :func:`read_xml` +* :func:`read_json` +* :func:`read_sql` +* :func:`read_sql_query` +* :func:`read_sql_table` +* :func:`read_parquet` +* :func:`read_orc` +* :func:`read_feather` +* :func:`read_spss` +* :func:`to_numeric` +* :meth:`DataFrame.convert_dtypes` +* :meth:`Series.convert_dtypes` + +.. ipython:: python + + import io + data = io.StringIO("""a,b,c,d,e,f,g,h,i + 1,2.5,True,a,,,,, + 3,4.5,False,b,6,7.5,True,a, + """) + df = pd.read_csv(data, dtype_backend="pyarrow") + df.dtypes + + data.seek(0) + df_pyarrow = pd.read_csv(data, dtype_backend="pyarrow", engine="pyarrow") + df_pyarrow.dtypes + +Copy-on-Write improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- A new lazy copy mechanism that defers the copy until the object in question is modified + was added to the methods listed in + :ref:`Copy-on-Write optimizations `. + These methods return views when Copy-on-Write is enabled, which provides a significant + performance improvement compared to the regular execution (:issue:`49473`). + +- Accessing a single column of a DataFrame as a Series (e.g. ``df["col"]``) now always + returns a new object every time it is constructed when Copy-on-Write is enabled (not + returning multiple times an identical, cached Series object). This ensures that those + Series objects correctly follow the Copy-on-Write rules (:issue:`49450`) + +- The :class:`Series` constructor will now create a lazy copy (deferring the copy until + a modification to the data happens) when constructing a Series from an existing + Series with the default of ``copy=False`` (:issue:`50471`) + +- The :class:`DataFrame` constructor will now create a lazy copy (deferring the copy until + a modification to the data happens) when constructing from an existing + :class:`DataFrame` with the default of ``copy=False`` (:issue:`51239`) + +- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary + of Series objects and specifying ``copy=False``, will now use a lazy copy + of those Series objects for the columns of the DataFrame (:issue:`50777`) + +- The :class:`DataFrame` constructor, when constructing a DataFrame from a + :class:`Series` or :class:`Index` and specifying ``copy=False``, will + now respect Copy-on-Write. + +- The :class:`DataFrame` and :class:`Series` constructors, when constructing from + a NumPy array, will now copy the array by default to avoid mutating + the :class:`DataFrame` / :class:`Series` + when mutating the array. Specify ``copy=False`` to get the old behavior. + When setting ``copy=False`` pandas does not guarantee correct Copy-on-Write + behavior when the NumPy array is modified after creation of the + :class:`DataFrame` / :class:`Series`. + +- The :meth:`DataFrame.from_records` will now respect Copy-on-Write when called + with a :class:`DataFrame`. + +- Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``) + will now always raise a warning when Copy-on-Write is enabled. In this mode, + chained assignment can never work because we are always setting into a temporary + object that is the result of an indexing operation (getitem), which under + Copy-on-Write always behaves as a copy. Thus, assigning through a chain + can never update the original Series or DataFrame. Therefore, an informative + warning is raised to the user to avoid silently doing nothing (:issue:`49467`) + +- :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism + when ``inplace=True``. + +- :meth:`DataFrame.transpose` will now respect the Copy-on-Write mechanism. + +- Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the + Copy-on-Write mechanism. + +- :meth:`DataFrame.__getitem__` will now respect the Copy-on-Write mechanism when the + :class:`DataFrame` has :class:`MultiIndex` columns. + +- :meth:`Series.__getitem__` will now respect the Copy-on-Write mechanism when the + :class:`Series` has a :class:`MultiIndex`. + +- :meth:`Series.view` will now respect the Copy-on-Write mechanism. + +Copy-on-Write can be enabled through one of + +.. code-block:: python + + pd.set_option("mode.copy_on_write", True) + + +.. code-block:: python + + pd.options.mode.copy_on_write = True + +Alternatively, copy on write can be enabled locally through: + +.. code-block:: python + + with pd.option_context("mode.copy_on_write", True): + ... + +.. _whatsnew_200.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- Added support for ``str`` accessor methods when using :class:`ArrowDtype` with a ``pyarrow.string`` type (:issue:`50325`) +- Added support for ``dt`` accessor methods when using :class:`ArrowDtype` with a ``pyarrow.timestamp`` type (:issue:`50954`) +- :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`) +- :meth:`.DataFrameGroupBy.quantile`, :meth:`.SeriesGroupBy.quantile` and :meth:`.DataFrameGroupBy.std` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`) +- :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std` now support datetime64, timedelta64, and :class:`DatetimeTZDtype` dtypes (:issue:`48481`) +- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) +- :func:`.testing.assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) +- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) +- Added support for extension array dtypes in :func:`merge` (:issue:`44240`) +- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) +- Added ``cumsum``, ``cumprod``, ``cummin`` and ``cummax`` to the ``ExtensionArray`` interface via ``_accumulate`` (:issue:`28385`) +- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) +- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`) +- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) +- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) +- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) +- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) +- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) +- Improve exception message when using :func:`.testing.assert_frame_equal` on a :class:`DataFrame` to include the column that is compared (:issue:`50323`) +- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`) +- Added support for extension array dtypes to :func:`get_dummies` (:issue:`32430`) +- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`) +- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`) +- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`) +- :meth:`Series.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`48304`) +- :meth:`Series.dropna` and :meth:`DataFrame.dropna` has gained ``ignore_index`` keyword to reset index (:issue:`31725`) +- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`) +- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) +- Added support for :meth:`Index.min` and :meth:`Index.max` for pyarrow string dtypes (:issue:`51397`) +- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`) +- Added :meth:`Series.dt.unit` and :meth:`Series.dt.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`51223`) +- Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`) +- :func:`read_csv`, :func:`read_table`, :func:`read_fwf` and :func:`read_excel` now accept ``date_format`` (:issue:`50601`) +- :func:`to_datetime` now accepts ``"ISO8601"`` as an argument to ``format``, which will match any ISO8601 string (but possibly not identically-formatted) (:issue:`50411`) +- :func:`to_datetime` now accepts ``"mixed"`` as an argument to ``format``, which will infer the format for each element individually (:issue:`50972`) +- Added new argument ``engine`` to :func:`read_json` to support parsing JSON with pyarrow by specifying ``engine="pyarrow"`` (:issue:`48893`) +- Added support for SQLAlchemy 2.0 (:issue:`40686`) +- Added support for ``decimal`` parameter when ``engine="pyarrow"`` in :func:`read_csv` (:issue:`51302`) +- :class:`Index` set operations :meth:`Index.union`, :meth:`Index.intersection`, :meth:`Index.difference`, and :meth:`Index.symmetric_difference` now support ``sort=True``, which will always return a sorted result, unlike the default ``sort=None`` which does not sort in some cases (:issue:`25151`) +- Added new escape mode "latex-math" to avoid escaping "$" in formatter (:issue:`50040`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_200.notable_bug_fixes.cumsum_cumprod_overflow: + +:meth:`.DataFrameGroupBy.cumsum` and :meth:`.DataFrameGroupBy.cumprod` overflow instead of lossy casting to float +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions we cast to float when applying ``cumsum`` and ``cumprod`` which +lead to incorrect results even if the result could be hold by ``int64`` dtype. +Additionally, the aggregation overflows consistent with numpy and the regular +:meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods when the limit of +``int64`` is reached (:issue:`37493`). + +*Old Behavior* + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({"key": ["b"] * 7, "value": 625}) + In [2]: df.groupby("key")["value"].cumprod()[5] + Out[2]: 5.960464477539062e+16 + +We return incorrect results with the 6th value. + +*New Behavior* + +.. ipython:: python + + df = pd.DataFrame({"key": ["b"] * 7, "value": 625}) + df.groupby("key")["value"].cumprod() + +We overflow with the 7th value, but the 6th value is still correct. + +.. _whatsnew_200.notable_bug_fixes.groupby_nth_filter: + +:meth:`.DataFrameGroupBy.nth` and :meth:`.SeriesGroupBy.nth` now behave as filtrations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, :meth:`.DataFrameGroupBy.nth` and +:meth:`.SeriesGroupBy.nth` acted as if they were aggregations. However, for most +inputs ``n``, they may return either zero or multiple rows per group. This means +that they are filtrations, similar to e.g. :meth:`.DataFrameGroupBy.head`. pandas +now treats them as filtrations (:issue:`13666`). + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 1, 2, 1, 2], "b": [np.nan, 2.0, 3.0, 4.0, 5.0]}) + gb = df.groupby("a") + +*Old Behavior* + +.. code-block:: ipython + + In [5]: gb.nth(n=1) + Out[5]: + A B + 1 1 2.0 + 4 2 5.0 + +*New Behavior* + +.. ipython:: python + + gb.nth(n=1) + +In particular, the index of the result is derived from the input by selecting +the appropriate rows. Also, when ``n`` is larger than the group, no rows instead of +``NaN`` is returned. + +*Old Behavior* + +.. code-block:: ipython + + In [5]: gb.nth(n=3, dropna="any") + Out[5]: + B + A + 1 NaN + 2 NaN + +*New Behavior* + +.. ipython:: python + + gb.nth(n=3, dropna="any") + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg: + +Construction with datetime64 or timedelta64 dtype with unsupported resolution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In past versions, when constructing a :class:`Series` or :class:`DataFrame` and +passing a "datetime64" or "timedelta64" dtype with unsupported resolution +(i.e. anything other than "ns"), pandas would silently replace the given dtype +with its nanosecond analogue: + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") + Out[5]: + 0 2016-01-01 + dtype: datetime64[ns] + + In [6] pd.Series(["2016-01-01"], dtype="datetime64[D]") + Out[6]: + 0 2016-01-01 + dtype: datetime64[ns] + +In pandas 2.0 we support resolutions "s", "ms", "us", and "ns". When passing +a supported dtype (e.g. "datetime64[s]"), the result now has exactly +the requested dtype: + +*New behavior*: + +.. ipython:: python + + pd.Series(["2016-01-01"], dtype="datetime64[s]") + +With an un-supported dtype, pandas now raises instead of silently swapping in +a supported dtype: + +*New behavior*: + +.. ipython:: python + :okexcept: + + pd.Series(["2016-01-01"], dtype="datetime64[D]") + +.. _whatsnew_200.api_breaking.value_counts: + +Value counts sets the resulting name to ``count`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In past versions, when running :meth:`Series.value_counts`, the result would inherit +the original object's name, and the result index would be nameless. This would cause +confusion when resetting the index, and the column names would not correspond with the +column values. +Now, the result name will be ``'count'`` (or ``'proportion'`` if ``normalize=True`` was passed), +and the index will be named after the original object (:issue:`49497`). + +*Previous behavior*: + +.. code-block:: ipython + + In [8]: pd.Series(['quetzal', 'quetzal', 'elk'], name='animal').value_counts() + + Out[2]: + quetzal 2 + elk 1 + Name: animal, dtype: int64 + +*New behavior*: + +.. ipython:: python + + pd.Series(['quetzal', 'quetzal', 'elk'], name='animal').value_counts() + +Likewise for other ``value_counts`` methods (for example, :meth:`DataFrame.value_counts`). + +.. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike: + +Disallow astype conversion to non-supported datetime64/timedelta64 dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In previous versions, converting a :class:`Series` or :class:`DataFrame` +from ``datetime64[ns]`` to a different ``datetime64[X]`` dtype would return +with ``datetime64[ns]`` dtype instead of the requested dtype. In pandas 2.0, +support is added for "datetime64[s]", "datetime64[ms]", and "datetime64[us]" dtypes, +so converting to those dtypes gives exactly the requested dtype: + +*Previous behavior*: + +.. ipython:: python + + idx = pd.date_range("2016-01-01", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: ser.astype("datetime64[s]") + Out[4]: + 0 2016-01-01 + 1 2016-01-02 + 2 2016-01-03 + dtype: datetime64[ns] + +With the new behavior, we get exactly the requested dtype: + +*New behavior*: + +.. ipython:: python + + ser.astype("datetime64[s]") + +For non-supported resolutions e.g. "datetime64[D]", we raise instead of silently +ignoring the requested dtype: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("datetime64[D]") + +For conversion from ``timedelta64[ns]`` dtypes, the old behavior converted +to a floating point format. + +*Previous behavior*: + +.. ipython:: python + + idx = pd.timedelta_range("1 Day", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: ser.astype("timedelta64[s]") + Out[7]: + 0 86400.0 + 1 172800.0 + 2 259200.0 + dtype: float64 + + In [8]: ser.astype("timedelta64[D]") + Out[8]: + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + +The new behavior, as for datetime64, either gives exactly the requested dtype or raises: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("timedelta64[s]") + ser.astype("timedelta64[D]") + +.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos: + +UTC and fixed-offset timezones default to standard-library tzinfo objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In previous versions, the default ``tzinfo`` object used to represent UTC +was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead. +Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone`` +objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC") + In [3]: type(ts.tzinfo) + Out[3]: pytz.UTC + + In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00") + In [3]: type(ts2.tzinfo) + Out[5]: pytz._FixedOffset + +*New behavior*: + +.. ipython:: python + + ts = pd.Timestamp("2016-01-01", tz="UTC") + type(ts.tzinfo) + + ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00") + type(ts2.tzinfo) + +For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we +continue to default to ``pytz`` objects. + +.. _whatsnew_200.api_breaking.zero_len_indexes: + +Empty DataFrames/Series will now default to have a ``RangeIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without +specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype. + +Now, the axes return an empty :class:`RangeIndex` (:issue:`49572`). + +*Previous behavior*: + +.. code-block:: ipython + + In [8]: pd.Series().index + Out[8]: + Index([], dtype='object') + + In [9] pd.DataFrame().axes + Out[9]: + [Index([], dtype='object'), Index([], dtype='object')] + +*New behavior*: + +.. ipython:: python + + pd.Series().index + pd.DataFrame().axes + +.. _whatsnew_200.api_breaking.to_latex: + +DataFrame to LaTeX has a new render engine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The existing :meth:`DataFrame.to_latex` has been restructured to utilise the +extended implementation previously available under :meth:`.Styler.to_latex`. +The arguments signature is similar, albeit ``col_space`` has been removed since +it is ignored by LaTeX engines. This render engine also requires ``jinja2`` as a +dependency which needs to be installed, since rendering is based upon jinja2 templates. + +The pandas latex options below are no longer used and have been removed. The generic +max rows and columns arguments remain but for this functionality should be replaced +by the Styler equivalents. +The alternative options giving similar functionality are indicated below: + +- ``display.latex.escape``: replaced with ``styler.format.escape``, +- ``display.latex.longtable``: replaced with ``styler.latex.environment``, +- ``display.latex.multicolumn``, ``display.latex.multicolumn_format`` and + ``display.latex.multirow``: replaced with ``styler.sparse.rows``, + ``styler.sparse.columns``, ``styler.latex.multirow_align`` and + ``styler.latex.multicol_align``, +- ``display.latex.repr``: replaced with ``styler.render.repr``, +- ``display.max_rows`` and ``display.max_columns``: replace with + ``styler.render.max_rows``, ``styler.render.max_columns`` and + ``styler.render.max_elements``. + +Note that due to this change some defaults have also changed: + +- ``multirow`` now defaults to *True*. +- ``multirow_align`` defaults to *"r"* instead of *"l"*. +- ``multicol_align`` defaults to *"r"* instead of *"l"*. +- ``escape`` now defaults to *False*. + +Note that the behaviour of ``_repr_latex_`` is also changed. Previously +setting ``display.latex.repr`` would generate LaTeX only when using nbconvert for a +JupyterNotebook, and not when the user is running the notebook. Now the +``styler.render.repr`` option allows control of the specific output +within JupyterNotebooks for operations (not just on nbconvert). See :issue:`39911`. + +.. _whatsnew_200.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Some minimum supported versions of dependencies were updated. +If installed, we now require: + ++-------------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++===================+=================+==========+=========+ +| mypy (dev) | 1.0 | | X | ++-------------------+-----------------+----------+---------+ +| pytest (dev) | 7.0.0 | | X | ++-------------------+-----------------+----------+---------+ +| pytest-xdist (dev)| 2.2.0 | | X | ++-------------------+-----------------+----------+---------+ +| hypothesis (dev) | 6.34.2 | | X | ++-------------------+-----------------+----------+---------+ +| python-dateutil | 2.8.2 | X | X | ++-------------------+-----------------+----------+---------+ +| tzdata | 2022.1 | X | X | ++-------------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| pyarrow | 7.0.0 | X | ++-----------------+-----------------+---------+ +| matplotlib | 3.6.1 | X | ++-----------------+-----------------+---------+ +| fastparquet | 0.6.3 | X | ++-----------------+-----------------+---------+ +| xarray | 0.21.0 | X | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +Datetimes are now parsed with a consistent format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the past, :func:`to_datetime` guessed the format for each element independently. This was appropriate for some cases where elements had mixed date formats - however, it would regularly cause problems when users expected a consistent format but the function would switch formats between elements. As of version 2.0.0, parsing will use a consistent format, determined by the first non-NA value (unless the user specifies a format, in which case that is used). + +*Old behavior*: + +.. code-block:: ipython + + In [1]: ser = pd.Series(['13-01-2000', '12-01-2000']) + In [2]: pd.to_datetime(ser) + Out[2]: + 0 2000-01-13 + 1 2000-12-01 + dtype: datetime64[ns] + +*New behavior*: + +.. ipython:: python + :okwarning: + + ser = pd.Series(['13-01-2000', '12-01-2000']) + pd.to_datetime(ser) + +Note that this affects :func:`read_csv` as well. + +If you still need to parse dates with inconsistent formats, you can use +``format='mixed'`` (possibly alongside ``dayfirst``) :: + + ser = pd.Series(['13-01-2000', '12 January 2000']) + pd.to_datetime(ser, format='mixed', dayfirst=True) + +or, if your formats are all ISO8601 (but possibly not identically-formatted) :: + + ser = pd.Series(['2020-01-01', '2020-01-01 03:00']) + pd.to_datetime(ser, format='ISO8601') + +.. _whatsnew_200.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ +- The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`, :issue:`32526`) +- Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) +- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. +- Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) +- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) +- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) +- :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` :meth:`Series.astype`, :meth:`DataFrame.astype` with ``datetime64``, ``timedelta64`` or :class:`PeriodDtype` dtypes no longer allow converting to integer dtypes other than "int64", do ``obj.astype('int64', copy=False).astype(dtype)`` instead (:issue:`49715`) +- :meth:`Index.astype` now allows casting from ``float64`` dtype to datetime-like dtypes, matching :class:`Series` behavior (:issue:`49660`) +- Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) +- Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) +- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) +- Passing ``datetime64`` values with resolution other than nanosecond to :func:`to_datetime` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`50369`) +- Passing integer values and a non-nanosecond datetime64 dtype (e.g. "datetime64[s]") :class:`DataFrame`, :class:`Series`, or :class:`Index` will treat the values as multiples of the dtype's unit, matching the behavior of e.g. ``Series(np.array(values, dtype="M8[s]"))`` (:issue:`51092`) +- Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`) +- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) +- Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`) +- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) +- :func:`pandas.testing.assert_index_equal` with parameter ``exact="equiv"`` now considers two indexes equal when both are either a :class:`RangeIndex` or :class:`Index` with an ``int64`` dtype. Previously it meant either a :class:`RangeIndex` or a :class:`Int64Index` (:issue:`51098`) +- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) +- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`) +- :func:`pandas.api.types.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`) +- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`) +- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) +- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) +- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) +- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) +- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects(copy=False)`` to do type inference on the result (:issue:`49999`, :issue:`49714`) +- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) +- Changed behavior of :meth:`Series.astype` from object-dtype containing ``bytes`` objects to string dtypes; this now does ``val.decode()`` on bytes objects instead of ``str(val)``, matching :meth:`Index.astype` behavior (:issue:`45326`) +- Added ``"None"`` to default ``na_values`` in :func:`read_csv` (:issue:`50286`) +- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`) +- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) +- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) +- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`) +- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) +- Creating a new DataFrame using a full slice on both axes with :attr:`~DataFrame.loc` + or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a + new DataFrame (shallow copy) instead of the original DataFrame, consistent with other + methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) +- The :class:`Series` and :class:`DataFrame` constructors will now return a shallow copy + (i.e. share data, but not attributes) when passed a Series and DataFrame, + respectively, and with the default of ``copy=False`` (and if no other keyword triggers + a copy). Previously, the new Series or DataFrame would share the index attribute (e.g. + ``df.index = ...`` would also update the index of the parent or child) (:issue:`49523`) +- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) +- :class:`DataFrame` objects read from a :class:`HDFStore` file without an index now have a :class:`RangeIndex` instead of an ``int64`` index (:issue:`51076`) +- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`) +- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) +- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) +- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) +- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`) +- :class:`DataFrame` and :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`) +- Division by zero with :class:`ArrowDtype` dtypes returns ``-inf``, ``nan``, or ``inf`` depending on the numerator, instead of raising (:issue:`51541`) +- Added :func:`pandas.api.types.is_any_real_numeric_dtype` to check for real numeric dtypes (:issue:`51152`) +- :meth:`~arrays.ArrowExtensionArray.value_counts` now returns data with :class:`ArrowDtype` with ``pyarrow.int64`` type instead of ``"Int64"`` type (:issue:`51462`) +- :func:`factorize` and :func:`unique` preserve the original dtype when passed numpy timedelta64 or datetime64 with non-nanosecond resolution (:issue:`48670`) + +.. note:: + + A current PDEP proposes the deprecation and removal of the keywords ``inplace`` and ``copy`` + for all but a small subset of methods from the pandas API. The current discussion takes place + at `here `_. The keywords won't be necessary + anymore in the context of Copy-on-Write. If this proposal is accepted, both + keywords would be deprecated in the next release of pandas and removed in pandas 3.0. + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.deprecations: + +Deprecations +~~~~~~~~~~~~ +- Deprecated parsing datetime strings with system-local timezone to ``tzlocal``, pass a ``tz`` keyword or explicitly call ``tz_localize`` instead (:issue:`50791`) +- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) +- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`) +- Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`) +- :meth:`Index.is_boolean` has been deprecated. Use :func:`pandas.api.types.is_bool_dtype` instead (:issue:`50042`) +- :meth:`Index.is_integer` has been deprecated. Use :func:`pandas.api.types.is_integer_dtype` instead (:issue:`50042`) +- :meth:`Index.is_floating` has been deprecated. Use :func:`pandas.api.types.is_float_dtype` instead (:issue:`50042`) +- :meth:`Index.holds_integer` has been deprecated. Use :func:`pandas.api.types.infer_dtype` instead (:issue:`50243`) +- :meth:`Index.is_numeric` has been deprecated. Use :func:`pandas.api.types.is_any_real_numeric_dtype` instead (:issue:`50042`,:issue:`51152`) +- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`) +- :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`) +- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_interval_dtype` instead (:issue:`50042`) +- Deprecated argument ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`) +- Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`) +- Deprecated unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` (:issue:`50977`) +- Deprecated calling ``float`` or ``int`` on a single element :class:`Series` to return a ``float`` or ``int`` respectively. Extract the element before calling ``float`` or ``int`` instead (:issue:`51101`) +- Deprecated :meth:`Grouper.groups`, use :meth:`Groupby.groups` instead (:issue:`51182`) +- Deprecated :meth:`Grouper.grouper`, use :meth:`Groupby.grouper` instead (:issue:`51182`) +- Deprecated :meth:`Grouper.obj`, use :meth:`Groupby.obj` instead (:issue:`51206`) +- Deprecated :meth:`Grouper.indexer`, use :meth:`Resampler.indexer` instead (:issue:`51206`) +- Deprecated :meth:`Grouper.ax`, use :meth:`Resampler.ax` instead (:issue:`51206`) +- Deprecated keyword ``use_nullable_dtypes`` in :func:`read_parquet`, use ``dtype_backend`` instead (:issue:`51853`) +- Deprecated :meth:`Series.pad` in favor of :meth:`Series.ffill` (:issue:`33396`) +- Deprecated :meth:`Series.backfill` in favor of :meth:`Series.bfill` (:issue:`33396`) +- Deprecated :meth:`DataFrame.pad` in favor of :meth:`DataFrame.ffill` (:issue:`33396`) +- Deprecated :meth:`DataFrame.backfill` in favor of :meth:`DataFrame.bfill` (:issue:`33396`) +- Deprecated :meth:`~pandas.io.stata.StataReader.close`. Use :class:`~pandas.io.stata.StataReader` as a context manager instead (:issue:`49228`) +- Deprecated producing a scalar when iterating over a :class:`.DataFrameGroupBy` or a :class:`.SeriesGroupBy` that has been grouped by a ``level`` parameter that is a list of length 1; a tuple of length one will be returned instead (:issue:`51583`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed :class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index`. See also :ref:`here ` for more information (:issue:`42717`) +- Removed deprecated :attr:`Timestamp.freq`, :attr:`Timestamp.freqstr` and argument ``freq`` from the :class:`Timestamp` constructor and :meth:`Timestamp.fromordinal` (:issue:`14146`) +- Removed deprecated :class:`CategoricalBlock`, :meth:`Block.is_categorical`, require datetime64 and timedelta64 values to be wrapped in :class:`DatetimeArray` or :class:`TimedeltaArray` before passing to :meth:`Block.make_block_same_class`, require ``DatetimeTZBlock.values`` to have the correct ndim when passing to the :class:`BlockManager` constructor, and removed the "fastpath" keyword from the :class:`SingleBlockManager` constructor (:issue:`40226`, :issue:`40571`) +- Removed deprecated global option ``use_inf_as_null`` in favor of ``use_inf_as_na`` (:issue:`17126`) +- Removed deprecated module ``pandas.core.index`` (:issue:`30193`) +- Removed deprecated alias ``pandas.core.tools.datetimes.to_time``, import the function directly from ``pandas.core.tools.times`` instead (:issue:`34145`) +- Removed deprecated alias ``pandas.io.json.json_normalize``, import the function directly from ``pandas.json_normalize`` instead (:issue:`27615`) +- Removed deprecated :meth:`Categorical.to_dense`, use ``np.asarray(cat)`` instead (:issue:`32639`) +- Removed deprecated :meth:`Categorical.take_nd` (:issue:`27745`) +- Removed deprecated :meth:`Categorical.mode`, use ``Series(cat).mode()`` instead (:issue:`45033`) +- Removed deprecated :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` (:issue:`37545`) +- Removed deprecated :meth:`CategoricalIndex.take_nd` (:issue:`30702`) +- Removed deprecated :meth:`Index.is_type_compatible` (:issue:`42113`) +- Removed deprecated :meth:`Index.is_mixed`, check ``index.inferred_type`` directly instead (:issue:`32922`) +- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) +- Removed deprecated :meth:`Index.asi8` (:issue:`37877`) +- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`) +- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`) +- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`) +- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`) +- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`) +- Removed deprecated :meth:`DataFrame.lookup` (:issue:`35224`) +- Removed deprecated :meth:`Series.append`, :meth:`DataFrame.append`, use :func:`concat` instead (:issue:`35407`) +- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems` and :meth:`HDFStore.iteritems` use ``obj.items`` instead (:issue:`45321`) +- Removed deprecated :meth:`DatetimeIndex.union_many` (:issue:`45018`) +- Removed deprecated ``weekofyear`` and ``week`` attributes of :class:`DatetimeArray`, :class:`DatetimeIndex` and ``dt`` accessor in favor of ``isocalendar().week`` (:issue:`33595`) +- Removed deprecated :meth:`RangeIndex._start`, :meth:`RangeIndex._stop`, :meth:`RangeIndex._step`, use ``start``, ``stop``, ``step`` instead (:issue:`30482`) +- Removed deprecated :meth:`DatetimeIndex.to_perioddelta`, Use ``dtindex - dtindex.to_period(freq).to_timestamp()`` instead (:issue:`34853`) +- Removed deprecated :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.where` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.render` (:issue:`49397`) +- Removed deprecated argument ``col_space`` in :meth:`DataFrame.to_latex` (:issue:`47970`) +- Removed deprecated argument ``null_color`` in :meth:`.Styler.highlight_null` (:issue:`49397`) +- Removed deprecated argument ``check_less_precise`` in :meth:`.testing.assert_frame_equal`, :meth:`.testing.assert_extension_array_equal`, :meth:`.testing.assert_series_equal`, :meth:`.testing.assert_index_equal` (:issue:`30562`) +- Removed deprecated ``null_counts`` argument in :meth:`DataFrame.info`. Use ``show_counts`` instead (:issue:`37999`) +- Removed deprecated :meth:`Index.is_monotonic`, and :meth:`Series.is_monotonic`; use ``obj.is_monotonic_increasing`` instead (:issue:`45422`) +- Removed deprecated :meth:`Index.is_all_dates` (:issue:`36697`) +- Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) +- Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) +- Enforced deprecation disallowing ``numpy.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor; pass ``"{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`) +- Enforced deprecation disallowing unit-less "datetime64" dtype in :meth:`Series.astype` and :meth:`DataFrame.astype` (:issue:`47844`) +- Enforced deprecation disallowing using ``.astype`` to convert a ``datetime64[ns]`` :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-aware dtype, use ``obj.tz_localize`` or ``ser.dt.tz_localize`` instead (:issue:`39258`) +- Enforced deprecation disallowing using ``.astype`` to convert a timezone-aware :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-naive ``datetime64[ns]`` dtype, use ``obj.tz_localize(None)`` or ``obj.tz_convert("UTC").tz_localize(None)`` instead (:issue:`39258`) +- Enforced deprecation disallowing passing non boolean argument to sort in :func:`concat` (:issue:`44629`) +- Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, + :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` + and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`) +- Removed argument ``index`` from the :class:`core.arrays.SparseArray` constructor (:issue:`43523`) +- Remove argument ``squeeze`` from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`32380`) +- Removed deprecated ``apply``, ``apply_index``, ``__call__``, ``onOffset``, and ``isAnchored`` attributes from :class:`DateOffset` (:issue:`34171`) +- Removed ``keep_tz`` argument in :meth:`DatetimeIndex.to_series` (:issue:`29731`) +- Remove arguments ``names`` and ``dtype`` from :meth:`Index.copy` and ``levels`` and ``codes`` from :meth:`MultiIndex.copy` (:issue:`35853`, :issue:`36685`) +- Remove argument ``inplace`` from :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` (:issue:`35626`) +- Removed arguments ``verbose`` and ``encoding`` from :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` (:issue:`47912`) +- Removed argument ``line_terminator`` from :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead (:issue:`45302`) +- Removed argument ``inplace`` from :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`) +- Disallow passing positional arguments to :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` (:issue:`41485`) +- Disallow parsing to Timedelta strings with components with units "Y", "y", or "M", as these do not represent unambiguous durations (:issue:`36838`) +- Removed :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth` (:issue:`38701`) +- Removed argument ``how`` from :meth:`PeriodIndex.astype`, use :meth:`PeriodIndex.to_timestamp` instead (:issue:`37982`) +- Removed argument ``try_cast`` from :meth:`DataFrame.mask`, :meth:`DataFrame.where`, :meth:`Series.mask` and :meth:`Series.where` (:issue:`38836`) +- Removed argument ``tz`` from :meth:`Period.to_timestamp`, use ``obj.to_timestamp(...).tz_localize(tz)`` instead (:issue:`34522`) +- Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`) +- Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`) +- Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`) +- Removed arguments ``prefix``, ``squeeze``, ``error_bad_lines`` and ``warn_bad_lines`` from :func:`read_csv` (:issue:`40413`, :issue:`43427`) +- Removed arguments ``squeeze`` from :func:`read_excel` (:issue:`43427`) +- Removed argument ``datetime_is_numeric`` from :meth:`DataFrame.describe` and :meth:`Series.describe` as datetime data will always be summarized as numeric data (:issue:`34798`) +- Disallow passing list ``key`` to :meth:`Series.xs` and :meth:`DataFrame.xs`, pass a tuple instead (:issue:`41789`) +- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`) +- Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`) +- Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`) +- Remove keywords ``convert_float`` and ``mangle_dupe_cols`` from :func:`read_excel` (:issue:`41176`) +- Remove keyword ``mangle_dupe_cols`` from :func:`read_csv` and :func:`read_table` (:issue:`48137`) +- Removed ``errors`` keyword from :meth:`DataFrame.where`, :meth:`Series.where`, :meth:`DataFrame.mask` and :meth:`Series.mask` (:issue:`47728`) +- Disallow passing non-keyword arguments to :func:`read_excel` except ``io`` and ``sheet_name`` (:issue:`34418`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.drop` and :meth:`Series.drop` except ``labels`` (:issue:`41486`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.fillna` and :meth:`Series.fillna` except ``value`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`StringMethods.split` and :meth:`StringMethods.rsplit` except for ``pat`` (:issue:`47448`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.set_index` except ``keys`` (:issue:`41495`) +- Disallow passing non-keyword arguments to :meth:`Resampler.interpolate` except ``method`` (:issue:`41699`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` except ``level`` (:issue:`41496`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.dropna` and :meth:`Series.dropna` (:issue:`41504`) +- Disallow passing non-keyword arguments to :meth:`ExtensionArray.argsort` (:issue:`46134`) +- Disallow passing non-keyword arguments to :meth:`Categorical.sort_values` (:issue:`47618`) +- Disallow passing non-keyword arguments to :meth:`Index.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.drop_duplicates` except for ``subset`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` (:issue:`41506`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` except for ``method`` (:issue:`41510`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44896`) +- Disallow passing non-keyword arguments to :meth:`Index.set_names` except for ``names`` (:issue:`41551`) +- Disallow passing non-keyword arguments to :meth:`Index.join` except for ``other`` (:issue:`46518`) +- Disallow passing non-keyword arguments to :func:`concat` except for ``objs`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`pivot` except for ``data`` (:issue:`48301`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.pivot` (:issue:`48301`) +- Disallow passing non-keyword arguments to :func:`read_html` except for ``io`` (:issue:`27573`) +- Disallow passing non-keyword arguments to :func:`read_json` except for ``path_or_buf`` (:issue:`27573`) +- Disallow passing non-keyword arguments to :func:`read_sas` except for ``filepath_or_buffer`` (:issue:`47154`) +- Disallow passing non-keyword arguments to :func:`read_stata` except for ``filepath_or_buffer`` (:issue:`48128`) +- Disallow passing non-keyword arguments to :func:`read_csv` except ``filepath_or_buffer`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`read_table` except ``filepath_or_buffer`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`read_fwf` except ``filepath_or_buffer`` (:issue:`44710`) +- Disallow passing non-keyword arguments to :func:`read_xml` except for ``path_or_buffer`` (:issue:`45133`) +- Disallow passing non-keyword arguments to :meth:`Series.mask` and :meth:`DataFrame.mask` except ``cond`` and ``other`` (:issue:`41580`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.to_stata` except for ``path`` (:issue:`48128`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`) +- Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`) +- Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`) +- Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`) +- Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`) +- Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.reindex` except for ``labels`` (:issue:`17966`) +- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`) +- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`) +- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`) +- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) +- Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`) +- Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`) +- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`) +- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`) +- Removed deprecated :attr:`NaT.freq` (:issue:`45071`) +- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) +- Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`) +- Changed behavior of :meth:`DataFrame.median` and :meth:`DataFrame.mean` with ``numeric_only=None`` to not exclude datetime-like columns THIS NOTE WILL BE IRRELEVANT ONCE ``numeric_only=None`` DEPRECATION IS ENFORCED (:issue:`29941`) +- Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`) +- Removed ``.ExponentialMovingWindow.vol`` (:issue:`39220`) +- Removed :meth:`Index.get_value` and :meth:`Index.set_value` (:issue:`33907`, :issue:`28621`) +- Removed :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` (:issue:`37601`) +- Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`) +- Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`) +- Disallow passing abbreviations for ``orient`` in :meth:`DataFrame.to_dict` (:issue:`32516`) +- Disallow partial slicing on an non-monotonic :class:`DatetimeIndex` with keys which are not in Index. This now raises a ``KeyError`` (:issue:`18531`) +- Removed ``get_offset`` in favor of :func:`to_offset` (:issue:`30340`) +- Removed the ``warn`` keyword in :func:`infer_freq` (:issue:`45947`) +- Removed the ``include_start`` and ``include_end`` arguments in :meth:`DataFrame.between_time` in favor of ``inclusive`` (:issue:`43248`) +- Removed the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument (:issue:`40245`) +- Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) +- Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`) +- Removed the ``method`` and ``tolerance`` arguments in :meth:`Index.get_loc`. Use ``index.get_indexer([label], method=..., tolerance=...)`` instead (:issue:`42269`) +- Removed the ``pandas.datetime`` submodule (:issue:`30489`) +- Removed the ``pandas.np`` submodule (:issue:`30296`) +- Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`) +- Removed :meth:`Series.str.__iter__` (:issue:`28277`) +- Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`) +- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`) +- Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift`` with datetime64, timedelta64, or period dtypes (:issue:`32591`) +- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) +- Enforced disallowing passing ``True`` and ``False`` into ``inclusive`` in :meth:`Series.between` in favor of ``"both"`` and ``"neither"`` respectively (:issue:`40628`) +- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +- Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) +- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`) +- Enforced disallowing missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex`. This now raises a ``KeyError`` (:issue:`42351`) +- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) +- Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`) +- Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`) +- Enforced disallowing ``set`` or ``dict`` indexers in ``__getitem__`` and ``__setitem__`` methods (:issue:`42825`) +- Enforced disallowing indexing on a :class:`Index` or positional indexing on a :class:`Series` producing multi-dimensional objects e.g. ``obj[:, None]``, convert to numpy before indexing instead (:issue:`35141`) +- Enforced disallowing ``dict`` or ``set`` objects in ``suffixes`` in :func:`merge` (:issue:`34810`) +- Enforced disallowing :func:`merge` to produce duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) +- Enforced disallowing using :func:`merge` or :func:`join` on a different number of levels (:issue:`34862`) +- Enforced disallowing ``value_name`` argument in :func:`DataFrame.melt` to match an element in the :class:`DataFrame` columns (:issue:`35003`) +- Enforced disallowing passing ``showindex`` into ``**kwargs`` in :func:`DataFrame.to_markdown` and :func:`Series.to_markdown` in favor of ``index`` (:issue:`33091`) +- Removed setting Categorical._codes directly (:issue:`41429`) +- Removed setting Categorical.categories directly (:issue:`47834`) +- Removed argument ``inplace`` from :meth:`Categorical.add_categories`, :meth:`Categorical.remove_categories`, :meth:`Categorical.set_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, :meth:`Categorical.as_unordered` (:issue:`37981`, :issue:`41118`, :issue:`41133`, :issue:`47834`) +- Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) +- Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`) +- Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) +- Changed behavior indexing on a :class:`DataFrame` with a :class:`DatetimeIndex` index using a string indexer, previously this operated as a slice on rows, now it operates like any other column key; use ``frame.loc[key]`` for the old behavior (:issue:`36179`) +- Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`) +- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) +- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) +- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) +- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) +- Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`) +- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) +- Changed the behavior of indexing on a timezone-aware :class:`DatetimeIndex` with a timezone-naive ``datetime`` object or vice-versa; these now behave like any other non-comparable type by raising ``KeyError`` (:issue:`36148`) +- Changed the behavior of :meth:`Index.reindex`, :meth:`Series.reindex`, and :meth:`DataFrame.reindex` with a ``datetime64`` dtype and a ``datetime.date`` object for ``fill_value``; these are no longer considered equivalent to ``datetime.datetime`` objects so the reindex casts to object dtype (:issue:`39767`) +- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) +- Changed behavior of :meth:`Index.ravel` to return a view on the original :class:`Index` instead of a ``np.ndarray`` (:issue:`36900`) +- Changed behavior of :meth:`Series.to_frame` and :meth:`Index.to_frame` with explicit ``name=None`` to use ``None`` for the column name instead of the index's name or default ``0`` (:issue:`45523`) +- Changed behavior of :func:`concat` with one array of ``bool``-dtype and another of integer dtype, this now returns ``object`` dtype instead of integer dtype; explicitly cast the bool object to integer before concatenating to get the old behavior (:issue:`45101`) +- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) +- Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) +- Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`) +- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows casting to ``object`` dtype, consistent with behavior for other types (:issue:`38845`) +- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) +- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) +- Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`) +- Changed behavior of :meth:`Timestamp.utcfromtimestamp` to return a timezone-aware object satisfying ``Timestamp.utcfromtimestamp(val).timestamp() == val`` (:issue:`45083`) +- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) +- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`) +- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`) +- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with integer dtype and floating-point data containing ``NaN``, this now raises ``IntCastingNaNError`` (:issue:`40110`) +- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with an integer ``dtype`` and values that are too large to losslessly cast to this dtype, this now raises ``ValueError`` (:issue:`41734`) +- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with an integer ``dtype`` and values having either ``datetime64`` or ``timedelta64`` dtypes, this now raises ``TypeError``, use ``values.view("int64")`` instead (:issue:`41770`) +- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`) +- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`) +- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`) +- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) +- Changed behavior of :meth:`DataFrame.max`, :class:`DataFrame.min`, :class:`DataFrame.mean`, :class:`DataFrame.median`, :class:`DataFrame.skew`, :class:`DataFrame.kurt` with ``axis=None`` to return a scalar applying the aggregation across both axes (:issue:`45072`) +- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) +- Changed behavior of comparison of ``NaT`` with a ``datetime.date`` object; these now raise on inequality comparisons (:issue:`39196`) +- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) +- Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) +- Changed behaviour of :meth:`DataFrame.to_latex` to now use the Styler implementation via :meth:`.Styler.to_latex` (:issue:`47970`) +- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`) +- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`) +- Changed behavior of :meth:`Series.diff` and :meth:`DataFrame.diff` with :class:`ExtensionDtype` dtypes whose arrays do not implement ``diff``, these now raise ``TypeError`` rather than casting to numpy (:issue:`31025`) +- Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`) +- Enforced deprecation disallowing passing ``numeric_only=True`` to :class:`Series` reductions (``rank``, ``any``, ``all``, ...) with non-numeric dtype (:issue:`47500`) +- Changed behavior of :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`) +- Comparisons between a :class:`DataFrame` and a :class:`Series` where the frame's columns do not match the series's index raise ``ValueError`` instead of automatically aligning, do ``left, right = left.align(right, axis=1, copy=False)`` before comparing (:issue:`36795`) +- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`) +- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`) +- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`) +- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) +- Enforced deprecation of silently dropping nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. This will now raise a :class:`.errors.DataError` (:issue:`42834`) +- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`) +- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) +- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) +- Using the method :meth:`.DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`) +- When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`) +- Removed deprecated methods :meth:`ExcelWriter.write_cells`, :meth:`ExcelWriter.save`, :meth:`ExcelWriter.cur_sheet`, :meth:`ExcelWriter.handles`, :meth:`ExcelWriter.path` (:issue:`45795`) +- The :class:`ExcelWriter` attribute ``book`` can no longer be set; it is still available to be accessed and mutated (:issue:`48943`) +- Removed unused ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops (:issue:`47851`) +- Removed the deprecated argument ``line_terminator`` from :meth:`DataFrame.to_csv` (:issue:`45302`) +- Removed the deprecated argument ``label`` from :func:`lreshape` (:issue:`30219`) +- Arguments after ``expr`` in :meth:`DataFrame.eval` and :meth:`DataFrame.query` are keyword-only (:issue:`47587`) +- Removed :meth:`Index._get_attributes_dict` (:issue:`50648`) +- Removed :meth:`Series.__array_wrap__` (:issue:`50648`) +- Changed behavior of :meth:`.DataFrame.value_counts` to return a :class:`Series` with :class:`MultiIndex` for any list-like(one element or not) but an :class:`Index` for a single label (:issue:`50829`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` and :meth:`.DataFrameGroupBy.cumprod` for nullable dtypes (:issue:`37493`) +- Performance improvement in :meth:`.DataFrameGroupBy.all`, :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.all`, and :meth:`.SeriesGroupBy.any` for object dtype (:issue:`50623`) +- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`) +- Performance improvement in :meth:`MultiIndex.size` (:issue:`48723`) +- Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`, :issue:`48752`) +- Performance improvement in :meth:`MultiIndex.difference` (:issue:`48606`) +- Performance improvement in :class:`MultiIndex` set operations with sort=None (:issue:`49010`) +- Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`) +- Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`, :issue:`49577`) +- Performance improvement in :meth:`MultiIndex.putmask` (:issue:`49830`) +- Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`) +- Performance improvement in :meth:`Series.rank` for pyarrow-backed dtypes (:issue:`50264`) +- Performance improvement in :meth:`Series.searchsorted` for pyarrow-backed dtypes (:issue:`50447`) +- Performance improvement in :meth:`Series.fillna` for extension array dtypes (:issue:`49722`, :issue:`50078`) +- Performance improvement in :meth:`Index.join`, :meth:`Index.intersection` and :meth:`Index.union` for masked and arrow dtypes when :class:`Index` is monotonic (:issue:`50310`, :issue:`51365`) +- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`) +- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`) +- Performance improvement for :class:`DatetimeIndex` constructor passing a list (:issue:`48609`) +- Performance improvement in :func:`merge` and :meth:`DataFrame.join` when joining on a sorted :class:`MultiIndex` (:issue:`48504`) +- Performance improvement in :func:`to_datetime` when parsing strings with timezone offsets (:issue:`50107`) +- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`) +- Performance improvement for :meth:`Series.replace` with categorical dtype (:issue:`49404`) +- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`) +- Performance improvement for indexing operations with nullable and arrow dtypes (:issue:`49420`, :issue:`51316`) +- Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`) +- Performance improvement for :func:`api.types.infer_dtype` (:issue:`51054`) +- Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`) +- Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`) +- Performance improvement in :meth:`~arrays.IntervalArray.from_tuples` (:issue:`50620`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`) +- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`) +- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`) +- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`) +- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`) +- Performance improvement in :meth:`DataFrame.__setitem__` (:issue:`46267`) +- Performance improvement in ``var`` and ``std`` for nullable dtypes (:issue:`48379`). +- Performance improvement when iterating over pyarrow and nullable dtypes (:issue:`49825`, :issue:`49851`) +- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`) +- Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`) +- Performance improvement in :meth:`Series.to_numpy` if ``copy=True`` by avoiding copying twice (:issue:`24345`) +- Performance improvement in :meth:`Series.rename` with :class:`MultiIndex` (:issue:`21055`) +- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`) +- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`) +- Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`) +- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`) +- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`) +- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`) +- Performance improvement in :class:`Period` constructor when constructing from a string or integer (:issue:`38312`) +- Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`) +- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`) +- Performance improvement in :meth:`Series.median` for nullable dtypes (:issue:`50838`) +- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`) +- Performance improvement in :func:`isna` and :func:`isnull` (:issue:`50658`) +- Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`) +- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`) +- Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`) +- Decreased memory usage in many :class:`DataFrameGroupBy` methods (:issue:`51090`) +- Performance improvement in :meth:`DataFrame.round` for an integer ``decimal`` parameter (:issue:`17254`) +- Performance improvement in :meth:`DataFrame.replace` and :meth:`Series.replace` when using a large dict for ``to_replace`` (:issue:`6697`) +- Memory improvement in :class:`StataReader` when reading seekable files (:issue:`48922`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`) +- Bug in :meth:`Series.replace` with categorical dtype when ``to_replace`` values overlap with new values (:issue:`49404`) +- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`) +- Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`) +- Bug in :meth:`.SeriesGroupBy.min`, :meth:`.SeriesGroupBy.max`, :meth:`.DataFrameGroupBy.min`, and :meth:`.DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`51034`) + +Datetimelike +^^^^^^^^^^^^ +- Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`) +- Bug in :func:`to_datetime` incorrectly raising ``OverflowError`` with string arguments corresponding to large integers (:issue:`50533`) +- Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`) +- Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`) +- Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) +- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) +- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) +- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`) +- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`) +- Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`) +- Bug in :func:`to_datetime` was showing misleading ``ValueError`` when parsing dates with format containing ISO week directive and ISO weekday directive (:issue:`50308`) +- Bug in :meth:`Timestamp.round` when the ``freq`` argument has zero-duration (e.g. "0ns") returning incorrect results instead of raising (:issue:`49737`) +- Bug in :func:`to_datetime` was not raising ``ValueError`` when invalid format was passed and ``errors`` was ``'ignore'`` or ``'coerce'`` (:issue:`50266`) +- Bug in :class:`DateOffset` was throwing ``TypeError`` when constructing with milliseconds and another super-daily argument (:issue:`49897`) +- Bug in :func:`to_datetime` was not raising ``ValueError`` when parsing string with decimal date with format ``'%Y%m%d'`` (:issue:`50051`) +- Bug in :func:`to_datetime` was not converting ``None`` to ``NaT`` when parsing mixed-offset date strings with ISO8601 format (:issue:`50071`) +- Bug in :func:`to_datetime` was not returning input when parsing out-of-bounds date string with ``errors='ignore'`` and ``format='%Y%m%d'`` (:issue:`14487`) +- Bug in :func:`to_datetime` was converting timezone-naive ``datetime.datetime`` to timezone-aware when parsing with timezone-aware strings, ISO8601 format, and ``utc=False`` (:issue:`50254`) +- Bug in :func:`to_datetime` was throwing ``ValueError`` when parsing dates with ISO8601 format where some values were not zero-padded (:issue:`21422`) +- Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`) +- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`) +- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`) +- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`) +- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`) +- Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`) +- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with datetime or timedelta dtypes incorrectly raising ``ValueError`` (:issue:`11312`) +- Bug in :func:`to_datetime` was not returning input with ``errors='ignore'`` when input was out-of-bounds (:issue:`50587`) +- Bug in :func:`DataFrame.from_records` when given a :class:`DataFrame` input with timezone-aware datetime64 columns incorrectly dropping the timezone-awareness (:issue:`51162`) +- Bug in :func:`to_datetime` was raising ``decimal.InvalidOperation`` when parsing date strings with ``errors='coerce'`` (:issue:`51084`) +- Bug in :func:`to_datetime` with both ``unit`` and ``origin`` specified returning incorrect results (:issue:`42624`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` when converting an object-dtype object containing timezone-aware datetimes or strings to ``datetime64[ns]`` incorrectly localizing as UTC instead of raising ``TypeError`` (:issue:`50140`) +- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` with datetime or timedelta dtypes giving incorrect results for groups containing ``NaT`` (:issue:`51373`) +- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` incorrectly raising with :class:`PeriodDtype` or :class:`DatetimeTZDtype` (:issue:`51373`) + +Timedelta +^^^^^^^^^ +- Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`) +- Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`) +- Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`) +- Bug in :class:`Timedelta` comparisons with very large ``datetime.timedelta`` objects incorrect raising ``OutOfBoundsTimedelta`` (:issue:`49021`) + +Timezones +^^^^^^^^^ +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` with object-dtype containing multiple timezone-aware ``datetime`` objects with heterogeneous timezones to a :class:`DatetimeTZDtype` incorrectly raising (:issue:`32581`) +- Bug in :func:`to_datetime` was failing to parse date strings with timezone name when ``format`` was specified with ``%Z`` (:issue:`49748`) +- Better error message when passing invalid values to ``ambiguous`` parameter in :meth:`Timestamp.tz_localize` (:issue:`49565`) +- Bug in string parsing incorrectly allowing a :class:`Timestamp` to be constructed with an invalid timezone, which would raise when trying to print (:issue:`50668`) +- Corrected TypeError message in :func:`objects_to_datetime64ns` to inform that DatetimeIndex has mixed timezones (:issue:`50974`) + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`) +- Bug in arithmetic operations on :class:`Series` not propagating mask when combining masked dtypes and numpy dtypes (:issue:`45810`, :issue:`42630`) +- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`) +- Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`) +- Bug in :meth:`~arrays.ArrowExtensionArray.mode` where ``dropna=False`` was not respected when there was ``NA`` values (:issue:`50982`) +- Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`) +- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with tz-aware data containing ``pd.NaT`` and ``axis=1`` would return incorrect results (:issue:`51242`) + +Conversion +^^^^^^^^^^ +- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) +- Bug in constructing :class:`Series` with masked dtype and boolean values with ``NA`` raising (:issue:`42137`) +- Bug in :meth:`DataFrame.eval` incorrectly raising an ``AttributeError`` when there are negative values in function call (:issue:`46471`) +- Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`) +- Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`) +- Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`) +- Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`) +- Bug in :meth:`DataFrame.astype` modifying input array inplace when converting to ``string`` and ``copy=False`` (:issue:`51073`) +- Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`) +- Bug in :meth:`DataFrame.astype` not copying data when converting to pyarrow dtype (:issue:`50984`) +- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`) +- Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` raising for extension array dtypes (:issue:`29618`, :issue:`50261`, :issue:`31913`) +- Bug in :meth:`Series` not copying data when created from :class:`Index` and ``dtype`` is equal to ``dtype`` from :class:`Index` (:issue:`52008`) + +Strings +^^^^^^^ +- Bug in :func:`pandas.api.types.is_string_dtype` that would not return ``True`` for :class:`StringDtype` or :class:`ArrowDtype` with ``pyarrow.string()`` (:issue:`15585`) +- Bug in converting string dtypes to "datetime64[ns]" or "timedelta64[ns]" incorrectly raising ``TypeError`` (:issue:`36153`) +- Bug in setting values in a string-dtype column with an array, mutating the array as side effect when it contains missing values (:issue:`51299`) + +Interval +^^^^^^^^ +- Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`) +- Bug in :meth:`Series.infer_objects` failing to infer :class:`IntervalDtype` for an object series of :class:`Interval` objects (:issue:`50090`) +- Bug in :meth:`Series.shift` with :class:`IntervalDtype` and invalid null ``fill_value`` failing to raise ``TypeError`` (:issue:`51258`) + +Indexing +^^^^^^^^ +- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`) +- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) +- Bug in :meth:`DataFrame.loc` when setting :class:`DataFrame` with different dtypes coercing values to single dtype (:issue:`50467`) +- Bug in :meth:`DataFrame.sort_values` where ``None`` was not returned when ``by`` is empty list and ``inplace=True`` (:issue:`50643`) +- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`) +- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`) +- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with all ``False`` ``bool`` indexer and empty object (:issue:`51450`) +- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) +- Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`50085`) +- Bug in :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__`, :meth:`DataFrame.__setitem__` and :meth:`Series.__setitem__` + when indexing on indexes with extension float dtypes (:class:`Float64` & :class:`Float64`) or complex dtypes using integers (:issue:`51053`) +- Bug in :meth:`DataFrame.loc` modifying object when setting incompatible value with an empty indexer (:issue:`45981`) +- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) +- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) +- Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`) +- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) +- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`) +- Bug in :meth:`Series.rename` with :class:`MultiIndex` losing extension array dtypes (:issue:`21055`) +- Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`) +- Bug in :meth:`Series.__getitem__` returning corrupt object when selecting from an empty pyarrow backed object (:issue:`51734`) +- Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`) + +Missing +^^^^^^^ +- Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) +- Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) +- Bug in :class:`NA` raising a ``TypeError`` instead of return :class:`NA` when performing a binary operation with a ``bytes`` object (:issue:`49108`) +- Bug in :meth:`DataFrame.update` with ``overwrite=False`` raising ``TypeError`` when ``self`` has column with ``NaT`` values and column not present in ``other`` (:issue:`16713`) +- Bug in :meth:`Series.replace` raising ``RecursionError`` when replacing value in object-dtype :class:`Series` containing ``NA`` (:issue:`47480`) +- Bug in :meth:`Series.replace` raising ``RecursionError`` when replacing value in numeric :class:`Series` with ``NA`` (:issue:`50758`) + +MultiIndex +^^^^^^^^^^ +- Bug in :meth:`MultiIndex.get_indexer` not matching ``NaN`` values (:issue:`29252`, :issue:`37222`, :issue:`38623`, :issue:`42883`, :issue:`43222`, :issue:`46173`, :issue:`48905`) +- Bug in :meth:`MultiIndex.argsort` raising ``TypeError`` when index contains :attr:`NA` (:issue:`48495`) +- Bug in :meth:`MultiIndex.difference` losing extension array dtype (:issue:`48606`) +- Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`) +- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`) +- Bug in :meth:`MultiIndex.intersection` losing extension array (:issue:`48604`) +- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`, :issue:`48900`) +- Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`) +- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) +- Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`) +- Bug in :meth:`MultiIndex.join` losing dtypes when :class:`MultiIndex` has duplicates (:issue:`49830`) +- Bug in :meth:`MultiIndex.putmask` losing extension array (:issue:`49830`) +- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`) + +I/O +^^^ +- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) +- Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`) +- Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`) +- Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`) +- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) +- Bug in :func:`read_json` raising with ``orient="table"`` and ``NA`` value (:issue:`40255`) +- Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) +- Bug in :meth:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) +- Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`) +- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) +- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) +- Bug in :func:`read_csv` unnecessarily overflowing for extension array dtype when containing ``NA`` (:issue:`32134`) +- Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`) +- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) +- Bug in :meth:`DataFrame.to_html` with ``na_rep`` set when the :class:`DataFrame` contains non-scalar data (:issue:`47103`) +- Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`) +- Bug in :func:`read_csv` when ``engine="pyarrow"`` where ``encoding`` parameter was not handled correctly (:issue:`51302`) +- Bug in :func:`read_xml` ignored repeated elements when iterparse is used (:issue:`51183`) +- Bug in :class:`ExcelWriter` leaving file handles open if an exception occurred during instantiation (:issue:`51443`) +- Bug in :meth:`DataFrame.to_parquet` where non-string index or columns were raising a ``ValueError`` when ``engine="pyarrow"`` (:issue:`52036`) + +Period +^^^^^^ +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) +- Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`) +- Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`) +- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`50803`) +- Bug in :meth:`.DataFrameGroupBy.sum`, :meth:`.DataFrameGroupByGroupBy.cumsum`, :meth:`.DataFrameGroupByGroupBy.prod`, :meth:`.DataFrameGroupByGroupBy.cumprod` with :class:`PeriodDtype` failing to raise ``TypeError`` (:issue:`51040`) +- Bug in parsing empty string with :class:`Period` incorrectly raising ``ValueError`` instead of returning ``NaT`` (:issue:`51349`) + +Plotting +^^^^^^^^ +- Bug in :meth:`DataFrame.plot.hist`, not dropping elements of ``weights`` corresponding to ``NaN`` values in ``data`` (:issue:`48884`) +- ``ax.set_xlim`` was sometimes raising ``UserWarning`` which users couldn't address due to ``set_xlim`` not accepting parsing arguments - the converter now uses :func:`Timestamp` instead (:issue:`49148`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :class:`.ExponentialMovingWindow` with ``online`` not raising a ``NotImplementedError`` for unsupported operations (:issue:`48834`) +- Bug in :meth:`.DataFrameGroupBy.sample` raises ``ValueError`` when the object is empty (:issue:`48459`) +- Bug in :meth:`Series.groupby` raises ``ValueError`` when an entry of the index is equal to the name of the index (:issue:`48567`) +- Bug in :meth:`.DataFrameGroupBy.resample` produces inconsistent results when passing empty DataFrame (:issue:`47705`) +- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` would not include unobserved categories in result when grouping by categorical indexes (:issue:`49354`) +- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` would change result order depending on the input index when grouping by categoricals (:issue:`49223`) +- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` when grouping on categorical data would sort result values even when used with ``sort=False`` (:issue:`42482`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :class:`.SeriesGroupBy.apply` with ``as_index=False`` would not attempt the computation without using the grouping keys when using them failed with a ``TypeError`` (:issue:`49256`) +- Bug in :meth:`.DataFrameGroupBy.describe` would describe the group keys (:issue:`49256`) +- Bug in :meth:`.SeriesGroupBy.describe` with ``as_index=False`` would have the incorrect shape (:issue:`49256`) +- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` with ``dropna=False`` would drop NA values when the grouper was categorical (:issue:`36327`) +- Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`) +- Bug in :meth:`.SeriesGroupBy.nth` would raise when grouper contained NA values after subsetting from a :class:`DataFrameGroupBy` (:issue:`26454`) +- Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`) +- Bug in :meth:`.DataFrameGroupBy.value_counts` would raise when used with a :class:`.TimeGrouper` (:issue:`50486`) +- Bug in :meth:`.Resampler.size` caused a wide :class:`DataFrame` to be returned instead of a :class:`Series` with :class:`MultiIndex` (:issue:`46826`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`) +- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`) +- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`) +- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`) +- Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) +- Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) +- Bug in :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, and :meth:`.Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) +- Bug in :meth:`.DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) +- Bug in :meth:`DataFrameGroupBy.agg` after subsetting columns (e.g. ``.groupby(...)[["a", "b"]]``) would not include groupings in the result (:issue:`51186`) + +Reshaping +^^^^^^^^^ +- Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`) +- Bug in :meth:`DataFrame.melt` losing extension array dtype (:issue:`41570`) +- Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) +- Bug in :meth:`DataFrame.join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) +- Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`) +- Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) +- Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) +- Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) +- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`) + +Sparse +^^^^^^ +- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`) +- Bug in :meth:`Series.astype` when converting a from ``datetime64[ns]`` to ``Sparse[datetime64[ns]]`` incorrectly raising (:issue:`50082`) +- Bug in :meth:`Series.sparse.to_coo` raising ``SystemError`` when :class:`MultiIndex` contains a ``ExtensionArray`` (:issue:`50996`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) +- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`) +- Bug in :meth:`Series.round` for pyarrow-backed dtypes raising ``AttributeError`` (:issue:`50437`) +- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) +- Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`) +- Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`) +- Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`) +- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`) +- Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`) +- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`) +- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`) + +Styler +^^^^^^ +- Fix :meth:`~pandas.io.formats.style.Styler.background_gradient` for nullable dtype :class:`Series` with ``NA`` values (:issue:`50712`) + +Metadata +^^^^^^^^ +- Fixed metadata propagation in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` (:issue:`28283`) + +Other +^^^^^ +- Bug in incorrectly accepting dtype strings containing "[pyarrow]" more than once (:issue:`51548`) +- Bug in :meth:`Series.searchsorted` inconsistent behavior when accepting :class:`DataFrame` as parameter ``value`` (:issue:`49620`) +- Bug in :func:`array` failing to raise on :class:`DataFrame` inputs (:issue:`51167`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_200.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.0rc0..v2.0.0|HEAD diff --git a/libraries/pandas/requirements.txt b/libraries/pandas/requirements.txt new file mode 100644 index 0000000..794cc3d --- /dev/null +++ b/libraries/pandas/requirements.txt @@ -0,0 +1 @@ +openpyxl diff --git a/libraries/scipy/examples/central_diff_weights.py b/libraries/scipy/examples/central_diff_weights.py new file mode 100644 index 0000000..ec84d57 --- /dev/null +++ b/libraries/scipy/examples/central_diff_weights.py @@ -0,0 +1,11 @@ +from scipy.misc import central_diff_weights + +def g(x): + return 2 * x**2 + 3 + +point = 10.0 +step = 0.1 +point_number = 3 +weights = central_diff_weights(point_number) +vals = [g(point + (i - point_number/2) * step) for i in range(point_number)] +sum(w * v for (w, v) in zip(weights, vals))/step \ No newline at end of file diff --git a/libraries/scipy/examples/minimize.py b/libraries/scipy/examples/minimize.py new file mode 100644 index 0000000..d27d8fa --- /dev/null +++ b/libraries/scipy/examples/minimize.py @@ -0,0 +1,11 @@ +import numpy as np +from scipy.optimize import minimize + +def rosen(x): + """The Rosenbrock function""" + return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0) + +x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2]) +res = minimize(rosen, x0, method='TNC', options={'maxiter': 10}) + +print(res.x) \ No newline at end of file diff --git a/libraries/scipy/library.json b/libraries/scipy/library.json new file mode 100644 index 0000000..698d9b6 --- /dev/null +++ b/libraries/scipy/library.json @@ -0,0 +1,6 @@ +{ + "name": "scipy", + "ghurl": "https://github.com/scipy/scipy", + "baseversion": "1.8.1", + "currentversion": "1.10.1" +} \ No newline at end of file diff --git a/libraries/scipy/releasenotes/1.10.0-notes.rst b/libraries/scipy/releasenotes/1.10.0-notes.rst new file mode 100644 index 0000000..b9c0da1 --- /dev/null +++ b/libraries/scipy/releasenotes/1.10.0-notes.rst @@ -0,0 +1,1211 @@ +========================== +SciPy 1.10.0 Release Notes +========================== + +.. contents:: + +SciPy 1.10.0 is the culmination of 6 months of hard work. It contains +many new features, numerous bug-fixes, improved test coverage and better +documentation. There have been a number of deprecations and API changes +in this release, which are documented below. All users are encouraged to +upgrade to this release, as there are a large number of bug-fixes and +optimizations. Before upgrading, we recommend that users check that +their own code does not use deprecated SciPy functionality (to do so, +run your code with ``python -Wd`` and check for ``DeprecationWarning`` s). +Our development attention will now shift to bug-fix releases on the +1.10.x branch, and on adding new features on the main branch. + +This release requires Python 3.8+ and NumPy 1.19.5 or greater. + +For running on PyPy, PyPy3 6.0+ is required. + + +************************** +Highlights of this release +************************** + +- A new dedicated datasets submodule (`scipy.datasets`) has been added, and is + now preferred over usage of `scipy.misc` for dataset retrieval. +- A new `scipy.interpolate.make_smoothing_spline` function was added. This + function constructs a smoothing cubic spline from noisy data, using the + generalized cross-validation (GCV) criterion to find the tradeoff between + smoothness and proximity to data points. +- `scipy.stats` has three new distributions, two new hypothesis tests, three + new sample statistics, a class for greater control over calculations + involving covariance matrices, and many other enhancements. + +************ +New features +************ + +`scipy.datasets` introduction +============================= +- A new dedicated ``datasets`` submodule has been added. The submodules + is meant for datasets that are relevant to other SciPy submodules ands + content (tutorials, examples, tests), as well as contain a curated + set of datasets that are of wider interest. As of this release, all + the datasets from `scipy.misc` have been added to `scipy.datasets` + (and deprecated in `scipy.misc`). +- The submodule is based on [Pooch](https://www.fatiando.org/pooch/latest/) + (a new optional dependency for SciPy), a Python package to simplify fetching + data files. This move will, in a subsequent release, facilitate SciPy + to trim down the sdist/wheel sizes, by decoupling the data files and + moving them out of the SciPy repository, hosting them externally and + downloading them when requested. After downloading the datasets once, + the files are cached to avoid network dependence and repeated usage. +- Added datasets from ``scipy.misc``: `scipy.datasets.face`, + `scipy.datasets.ascent`, `scipy.datasets.electrocardiogram` +- Added download and caching functionality: + + - `scipy.datasets.download_all`: a function to download all the `scipy.datasets` + associated files at once. + - `scipy.datasets.clear_cache`: a simple utility function to clear cached dataset + files from the file system. + - ``scipy/datasets/_download_all.py`` can be run as a standalone script for + packaging purposes to avoid any external dependency at build or test time. + This can be used by SciPy packagers (e.g., for Linux distros) which may + have to adhere to rules that forbid downloading sources from external + repositories at package build time. + +`scipy.integrate` improvements +============================== +- Added parameter ``complex_func`` to `scipy.integrate.quad`, which can be set + ``True`` to integrate a complex integrand. + + +`scipy.interpolate` improvements +================================ +- `scipy.interpolate.interpn` now supports tensor-product interpolation methods + (``slinear``, ``cubic``, ``quintic`` and ``pchip``) +- Tensor-product interpolation methods (``slinear``, ``cubic``, ``quintic`` and + ``pchip``) in `scipy.interpolate.interpn` and + `scipy.interpolate.RegularGridInterpolator` now allow values with trailing + dimensions. +- `scipy.interpolate.RegularGridInterpolator` has a new fast path for + ``method="linear"`` with 2D data, and ``RegularGridInterpolator`` is now + easier to subclass +- `scipy.interpolate.interp1d` now can take a single value for non-spline + methods. +- A new ``extrapolate`` argument is available to `scipy.interpolate.BSpline.design_matrix`, + allowing extrapolation based on the first and last intervals. +- A new function `scipy.interpolate.make_smoothing_spline` has been added. It is an + implementation of the generalized cross-validation spline smoothing + algorithm. The ``lam=None`` (default) mode of this function is a clean-room + reimplementation of the classic ``gcvspl.f`` Fortran algorithm for + constructing GCV splines. +- A new ``method="pchip"`` mode was aded to + `scipy.interpolate.RegularGridInterpolator`. This mode constructs an + interpolator using tensor products of C1-continuous monotone splines + (essentially, a `scipy.interpolate.PchipInterpolator` instance per + dimension). + + + +`scipy.sparse.linalg` improvements +================================== +- The spectral 2-norm is now available in `scipy.sparse.linalg.norm`. +- The performance of `scipy.sparse.linalg.norm` for the default case (Frobenius + norm) has been improved. +- LAPACK wrappers were added for ``trexc`` and ``trsen``. +- The `scipy.sparse.linalg.lobpcg` algorithm was rewritten, yielding + the following improvements: + + - a simple tunable restart potentially increases the attainable + accuracy for edge cases, + - internal postprocessing runs one final exact Rayleigh-Ritz method + giving more accurate and orthonormal eigenvectors, + - output the computed iterate with the smallest max norm of the residual + and drop the history of subsequent iterations, + - remove the check for ``LinearOperator`` format input and thus allow + a simple function handle of a callable object as an input, + - better handling of common user errors with input data, rather + than letting the algorithm fail. + + +`scipy.linalg` improvements +=========================== +- `scipy.linalg.lu_factor` now accepts rectangular arrays instead of being restricted + to square arrays. + + +`scipy.ndimage` improvements +============================ +- The new `scipy.ndimage.value_indices` function provides a time-efficient method to + search for the locations of individual values with an array of image data. +- A new ``radius`` argument is supported by `scipy.ndimage.gaussian_filter1d` and + `scipy.ndimage.gaussian_filter` for adjusting the kernel size of the filter. + + +`scipy.optimize` improvements +============================= +- `scipy.optimize.brute` now coerces non-iterable/single-value ``args`` into a + tuple. +- `scipy.optimize.least_squares` and `scipy.optimize.curve_fit` now accept + `scipy.optimize.Bounds` for bounds constraints. +- Added a tutorial for `scipy.optimize.milp`. +- Improved the pretty-printing of `scipy.optimize.OptimizeResult` objects. +- Additional options (``parallel``, ``threads``, ``mip_rel_gap``) can now + be passed to `scipy.optimize.linprog` with ``method='highs'``. + + +`scipy.signal` improvements +=========================== +- The new window function `scipy.signal.windows.lanczos` was added to compute a + Lanczos window, also known as a sinc window. + + +`scipy.sparse.csgraph` improvements +=================================== +- the performance of `scipy.sparse.csgraph.dijkstra` has been improved, and + star graphs in particular see a marked performance improvement + + +`scipy.special` improvements +============================ +- The new function `scipy.special.powm1`, a ufunc with signature + ``powm1(x, y)``, computes ``x**y - 1``. The function avoids the loss of + precision that can result when ``y`` is close to 0 or when ``x`` is close to + 1. +- `scipy.special.erfinv` is now more accurate as it leverages the Boost equivalent under + the hood. + + +`scipy.stats` improvements +========================== +- Added `scipy.stats.goodness_of_fit`, a generalized goodness-of-fit test for + use with any univariate distribution, any combination of known and unknown + parameters, and several choices of test statistic (Kolmogorov-Smirnov, + Cramer-von Mises, and Anderson-Darling). +- Improved `scipy.stats.bootstrap`: Default method ``'BCa'`` now supports + multi-sample statistics. Also, the bootstrap distribution is returned in the + result object, and the result object can be passed into the function as + parameter ``bootstrap_result`` to add additional resamples or change the + confidence interval level and type. +- Added maximum spacing estimation to `scipy.stats.fit`. +- Added the Poisson means test ("E-test") as `scipy.stats.poisson_means_test`. +- Added new sample statistics. + + - Added `scipy.stats.contingency.odds_ratio` to compute both the conditional + and unconditional odds ratios and corresponding confidence intervals for + 2x2 contingency tables. + - Added `scipy.stats.directional_stats` to compute sample statistics of + n-dimensional directional data. + - Added `scipy.stats.expectile`, which generalizes the expected value in the + same way as quantiles are a generalization of the median. + +- Added new statistical distributions. + + - Added `scipy.stats.uniform_direction`, a multivariate distribution to + sample uniformly from the surface of a hypersphere. + - Added `scipy.stats.random_table`, a multivariate distribution to sample + uniformly from m x n contingency tables with provided marginals. + - Added `scipy.stats.truncpareto`, the truncated Pareto distribution. + +- Improved the ``fit`` method of several distributions. + + - `scipy.stats.skewnorm` and `scipy.stats.weibull_min` now use an analytical + solution when ``method='mm'``, which also serves a starting guess to + improve the performance of ``method='mle'``. + - `scipy.stats.gumbel_r` and `scipy.stats.gumbel_l`: analytical maximum + likelihood estimates have been extended to the cases in which location or + scale are fixed by the user. + - Analytical maximum likelihood estimates have been added for + `scipy.stats.powerlaw`. + +- Improved random variate sampling of several distributions. + + - Drawing multiple samples from `scipy.stats.matrix_normal`, + `scipy.stats.ortho_group`, `scipy.stats.special_ortho_group`, and + `scipy.stats.unitary_group` is faster. + - The ``rvs`` method of `scipy.stats.vonmises` now wraps to the interval + ``[-np.pi, np.pi]``. + - Improved the reliability of `scipy.stats.loggamma` ``rvs`` method for small + values of the shape parameter. + +- Improved the speed and/or accuracy of functions of several statistical + distributions. + + - Added `scipy.stats.Covariance` for better speed, accuracy, and user control + in multivariate normal calculations. + - `scipy.stats.skewnorm` methods ``cdf``, ``sf``, ``ppf``, and ``isf`` + methods now use the implementations from Boost, improving speed while + maintaining accuracy. The calculation of higher-order moments is also + faster and more accurate. + - `scipy.stats.invgauss` methods ``ppf`` and ``isf`` methods now use the + implementations from Boost, improving speed and accuracy. + - `scipy.stats.invweibull` methods ``sf`` and ``isf`` are more accurate for + small probability masses. + - `scipy.stats.nct` and `scipy.stats.ncx2` now rely on the implementations + from Boost, improving speed and accuracy. + - Implemented the ``logpdf`` method of `scipy.stats.vonmises` for reliability + in extreme tails. + - Implemented the ``isf`` method of `scipy.stats.levy` for speed and + accuracy. + - Improved the robustness of `scipy.stats.studentized_range` for large ``df`` + by adding an infinite degree-of-freedom approximation. + - Added a parameter ``lower_limit`` to `scipy.stats.multivariate_normal`, + allowing the user to change the integration limit from -inf to a desired + value. + - Improved the robustness of ``entropy`` of `scipy.stats.vonmises` for large + concentration values. + +- Enhanced `scipy.stats.gaussian_kde`. + + - Added `scipy.stats.gaussian_kde.marginal`, which returns the desired + marginal distribution of the original kernel density estimate distribution. + - The ``cdf`` method of `scipy.stats.gaussian_kde` now accepts a + ``lower_limit`` parameter for integrating the PDF over a rectangular region. + - Moved calculations for `scipy.stats.gaussian_kde.logpdf` to Cython, + improving speed. + - The global interpreter lock is released by the ``pdf`` method of + `scipy.stats.gaussian_kde` for improved multithreading performance. + - Replaced explicit matrix inversion with Cholesky decomposition for speed + and accuracy. + +- Enhanced the result objects returned by many `scipy.stats` functions + + - Added a ``confidence_interval`` method to the result object returned by + `scipy.stats.ttest_1samp` and `scipy.stats.ttest_rel`. + - The `scipy.stats` functions ``combine_pvalues``, ``fisher_exact``, + ``chi2_contingency``, ``median_test`` and ``mood`` now return + bunch objects rather than plain tuples, allowing attributes to be + accessed by name. + - Attributes of the result objects returned by ``multiscale_graphcorr``, + ``anderson_ksamp``, ``binomtest``, ``crosstab``, ``pointbiserialr``, + ``spearmanr``, ``kendalltau``, and ``weightedtau`` have been renamed to + ``statistic`` and ``pvalue`` for consistency throughout `scipy.stats`. + Old attribute names are still allowed for backward compatibility. + - `scipy.stats.anderson` now returns the parameters of the fitted + distribution in a `scipy.stats._result_classes.FitResult` object. + - The ``plot`` method of `scipy.stats._result_classes.FitResult` now accepts + a ``plot_type`` parameter; the options are ``'hist'`` (histogram, default), + ``'qq'`` (Q-Q plot), ``'pp'`` (P-P plot), and ``'cdf'`` (empirical CDF + plot). + - Kolmogorov-Smirnov tests (e.g. `scipy.stats.kstest`) now return the + location (argmax) at which the statistic is calculated and the variant + of the statistic used. + +- Improved the performance of several `scipy.stats` functions. + + - Improved the performance of `scipy.stats.cramervonmises_2samp` and + `scipy.stats.ks_2samp` with ``method='exact'``. + - Improved the performance of `scipy.stats.siegelslopes`. + - Improved the performance of `scipy.stats.mstats.hdquantile_sd`. + - Improved the performance of `scipy.stats.binned_statistic_dd` for several + NumPy statistics, and binned statistics methods now support complex data. + +- Added the ``scramble`` optional argument to `scipy.stats.qmc.LatinHypercube`. + It replaces ``centered``, which is now deprecated. +- Added a parameter ``optimization`` to all `scipy.stats.qmc.QMCEngine` + subclasses to improve characteristics of the quasi-random variates. +- Added tie correction to `scipy.stats.mood`. +- Added tutorials for resampling methods in `scipy.stats`. +- `scipy.stats.bootstrap`, `scipy.stats.permutation_test`, and + `scipy.stats.monte_carlo_test` now automatically detect whether the provided + ``statistic`` is vectorized, so passing the ``vectorized`` argument + explicitly is no longer required to take advantage of vectorized statistics. +- Improved the speed of `scipy.stats.permutation_test` for permutation types + ``'samples'`` and ``'pairings'``. +- Added ``axis``, ``nan_policy``, and masked array support to + `scipy.stats.jarque_bera`. +- Added the ``nan_policy`` optional argument to `scipy.stats.rankdata`. + + +******************* +Deprecated features +******************* +- `scipy.misc` module and all the methods in ``misc`` are deprecated in v1.10 + and will be completely removed in SciPy v2.0.0. Users are suggested to + utilize the `scipy.datasets` module instead for the dataset methods. +- `scipy.stats.qmc.LatinHypercube` parameter ``centered`` has been deprecated. + It is replaced by the ``scramble`` argument for more consistency with other + QMC engines. +- `scipy.interpolate.interp2d` class has been deprecated. The docstring of the + deprecated routine lists recommended replacements. + +******************** +Expired Deprecations +******************** +- There is an ongoing effort to follow through on long-standing deprecations. +- The following previously deprecated features are affected: + + - Removed ``cond`` & ``rcond`` kwargs in ``linalg.pinv`` + - Removed wrappers ``scipy.linalg.blas.{clapack, flapack}`` + - Removed ``scipy.stats.NumericalInverseHermite`` and removed ``tol`` & ``max_intervals`` kwargs from ``scipy.stats.sampling.NumericalInverseHermite`` + - Removed ``local_search_options`` kwarg frrom ``scipy.optimize.dual_annealing``. + + +************* +Other changes +************* +- `scipy.stats.bootstrap`, `scipy.stats.permutation_test`, and + `scipy.stats.monte_carlo_test` now automatically detect whether the provided + ``statistic`` is vectorized by looking for an ``axis`` parameter in the + signature of ``statistic``. If an ``axis`` parameter is present in + ``statistic`` but should not be relied on for vectorized calls, users must + pass option ``vectorized==False`` explicitly. +- `scipy.stats.multivariate_normal` will now raise a ``ValueError`` when the + covariance matrix is not positive semidefinite, regardless of which method + is called. + + + +******* +Authors +******* + +* Name (commits) +* h-vetinari (10) +* Jelle Aalbers (1) +* Oriol Abril-Pla (1) + +* Alan-Hung (1) + +* Tania Allard (7) +* Oren Amsalem (1) + +* Sven Baars (10) +* Balthasar (1) + +* Ross Barnowski (1) +* Christoph Baumgarten (2) +* Peter Bell (2) +* Sebastian Berg (1) +* Aaron Berk (1) + +* boatwrong (1) + +* boeleman (1) + +* Jake Bowhay (50) +* Matthew Brett (4) +* Evgeni Burovski (93) +* Matthias Bussonnier (6) +* Dominic C (2) +* Mingbo Cai (1) + +* James Campbell (2) + +* CJ Carey (4) +* cesaregarza (1) + +* charlie0389 (1) + +* Hood Chatham (5) +* Andrew Chin (1) + +* Daniel Ching (1) + +* Leo Chow (1) + +* chris (3) + +* John Clow (1) + +* cm7S (1) + +* cmgodwin (1) + +* Christopher Cowden (2) + +* Henry Cuzco (2) + +* Anirudh Dagar (12) +* Hans Dembinski (2) + +* Jaiden di Lanzo (24) + +* Felipe Dias (1) + +* Dieter Werthmüller (1) +* Giuseppe Dilillo (1) + +* dpoerio (1) + +* drpeteb (1) + +* Christopher Dupuis (1) + +* Jordan Edmunds (1) + +* Pieter Eendebak (1) + +* Jérome Eertmans (1) + +* Fabian Egli (2) + +* Sebastian Ehlert (2) + +* Kian Eliasi (1) + +* Tomohiro Endo (1) + +* Stefan Endres (1) +* Zeb Engberg (4) + +* Jonas Eschle (1) + +* Thomas J. Fan (9) +* fiveseven (1) + +* Neil Flood (1) + +* Franz Forstmayr (1) +* Sara Fridovich-Keil (1) +* David Gilbertson (1) + +* Ralf Gommers (251) +* Marco Gorelli (2) + +* Matt Haberland (387) +* Andrew Hawryluk (2) + +* Christoph Hohnerlein (2) + +* Loïc Houpert (2) + +* Shamus Husheer (1) + +* ideasrule (1) + +* imoiwm (1) + +* Lakshaya Inani (1) + +* Joseph T. Iosue (1) +* iwbc-mzk (1) + +* Nathan Jacobi (3) + +* Julien Jerphanion (5) +* He Jia (1) +* jmkuebler (1) + +* Johannes Müller (1) + +* Vedant Jolly (1) + +* Juan Luis Cano Rodríguez (2) +* Justin (1) + +* jvavrek (1) + +* jyuv (2) +* Kai Mühlbauer (1) + +* Nikita Karetnikov (3) + +* Reinert Huseby Karlsen (1) + +* kaspar (2) + +* Toshiki Kataoka (1) +* Robert Kern (3) +* Joshua Klein (1) + +* Andrew Knyazev (7) +* Jozsef Kutas (16) + +* Eric Larson (4) +* Lechnio (1) + +* Antony Lee (2) +* Aditya Limaye (1) + +* Xingyu Liu (2) +* Christian Lorentzen (4) +* Loïc Estève (2) +* Thibaut Lunet (2) + +* Peter Lysakovski (1) +* marianasalamoni (2) + +* mariprudencio (1) + +* Paige Martin (1) + +* Arno Marty (1) + +* matthewborish (3) + +* Damon McDougall (1) +* Nicholas McKibben (22) +* McLP (1) + +* mdmahendri (1) + +* Melissa Weber Mendonça (9) +* Jarrod Millman (1) +* Naoto Mizuno (2) +* Shashaank N (1) +* Pablo S Naharro (1) + +* nboudrie (2) + +* Andrew Nelson (52) +* Nico Schlömer (1) +* NiMlr (1) + +* o-alexandre-felipe (1) + +* Maureen Ononiwu (1) + +* Dimitri Papadopoulos (2) + +* partev (1) + +* Tirth Patel (10) +* Paulius Šarka (1) + +* Josef Perktold (1) +* Giacomo Petrillo (3) + +* Matti Picus (1) +* Rafael Pinto (1) + +* PKNaveen (1) + +* Ilhan Polat (6) +* Akshita Prasanth (2) + +* Sean Quinn (1) +* Tyler Reddy (155) +* Martin Reinecke (1) +* Ned Richards (1) +* Marie Roald (1) + +* Sam Rosen (4) + +* Pamphile Roy (105) +* sabonerune (2) + +* Atsushi Sakai (94) +* Daniel Schmitz (27) +* Anna Scholtz (1) + +* Eli Schwartz (11) +* serge-sans-paille (2) +* JEEVANSHI SHARMA (1) + +* ehsan shirvanian (2) + +* siddhantwahal (2) +* Mathieu Dutour Sikiric (1) + +* Sourav Singh (1) +* Alexander Soare (1) + +* Bjørge Solli (2) + +* Scott Staniewicz (1) +* Ethan Steinberg (3) + +* Albert Steppi (3) +* Thomas Stoeger (1) + +* Kai Striega (4) +* Tartopohm (1) + +* Mamoru TASAKA (2) + +* Ewout ter Hoeven (5) +* TianyiQ (1) + +* Tiger (1) + +* Will Tirone (1) +* Ajay Shanker Tripathi (1) + +* Edgar Andrés Margffoy Tuay (1) + +* Dmitry Ulyumdzhiev (1) + +* Hari Vamsi (1) + +* VitalyChait (1) + +* Rik Voorhaar (1) + +* Samuel Wallan (4) +* Stefan van der Walt (2) +* Warren Weckesser (145) +* wei2222 (1) + +* windows-server-2003 (3) + +* Marek Wojciechowski (2) + +* Niels Wouda (1) + +* WRKampi (1) + +* Yeonjoo Yoo (1) + +* Rory Yorke (1) +* Xiao Yuan (2) + +* Meekail Zain (2) + +* Fabio Zanini (1) + +* Steffen Zeile (1) + +* Egor Zemlyanoy (19) +* Gavin Zhang (3) + + +A total of 184 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + + +************************ +Issues closed for 1.10.0 +************************ + +* `#1261 `__: errors in fmin_bfgs and some improvements (Trac #734) +* `#2167 `__: BivariateSpline errors with kx=ky=1 (Trac #1642) +* `#2304 `__: funm gives incorrect results for non-diagonalizable inputs (Trac... +* `#3421 `__: Rename information theory functions? +* `#3854 `__: KroghInterpolator doesn't pass through points +* `#4043 `__: scipy.interpolate.interp1d should be able to take a single value +* `#4555 `__: leastsq should use cholesky not inv for hessian inversion +* `#4598 `__: von Mises random variate sampling broken for non-zero location... +* `#4975 `__: Documentation for s in UnivariateSpline is confusing +* `#6173 `__: scipy.interpolate.lagrange implemented through coefficients +* `#6688 `__: ENH: optimize.basinhopping: call an acceptance test before local... +* `#7104 `__: scipy.stats.nct - wrong values in tails +* `#7268 `__: scipy.sparse.linalg.norm does not implement spectral norm +* `#7521 `__: scipy.UnivariateSpline smoothing condition documentation inaccuracy +* `#7857 `__: griddata sensible to size of original grid when it should not +* `#8376 `__: InterpolatedUnivariateSpline.roots() seems to miss roots sometimes +* `#9119 `__: documentation issues of functions in scipy.stats.mstats +* `#9389 `__: Kolmogorov Smirnov 2 samples returning max distance location... +* `#9440 `__: Unexpected successful optimization with minimize when number... +* `#9451 `__: Add shgo to optimize benchmarks +* `#10737 `__: Goodness of fit tests for distributions with unknown parameters +* `#10911 `__: scipy.optimize.minimize_scalar does not automatically select... +* `#11026 `__: rv_discrete.interval returning wrong values for alpha = 1 +* `#11053 `__: scipy.stats: Allow specifying inverse-variance matrix to multivariate_normal +* `#11131 `__: DOC: stats.fisher_exact does not match R functionality for \`oddsratio\`... +* `#11406 `__: scipy.sparse.linalg.svds (v1.4.1) on singular matrix does not... +* `#11475 `__: Filter radius as optional argument for gaussian_filter1d/gaussian_filter +* `#11772 `__: Cache covariance matrix decomposition in frozen multivariate_normal +* `#11777 `__: non-central chi2 (scipy.stats.ncx2.pdf) gets clipped to zero... +* `#11790 `__: NaN handling of stats.rankdata +* `#11860 `__: Occurrence of nan values when using multinomial.pmf from scipy.stats? +* `#11916 `__: Improve documentation for smoothing in interpolate.UnivariateSpline... +* `#12041 `__: Spherical mean/variance +* `#12246 `__: Interpolation 2D with SmoothBivariateSpline +* `#12621 `__: Scalar minimization functions have no references +* `#12632 `__: curve_fit algorithm try to transform xdata in an array of floats +* `#12963 `__: shgo is not correctly passing jac to minimizer +* `#13021 `__: 2D Interpolation Scaling Issues +* `#13049 `__: Examples missing import numpy as np? +* `#13452 `__: Calling \`len()\` on the \`scipy.spatial.transform.rotation.Rotation\`... +* `#13529 `__: signal.decimate doesn't use sosfilters and sosfiltfilt +* `#14098 `__: DOC-Update for InterpolatedUnivariateSpline and LSQUnivariateSpline +* `#14198 `__: better description of solveh_banded limitations +* `#14348 `__: Extract spline coefficient from splprep: tck +* `#14386 `__: Let CloughTocher2DInterpolator fit "nearest" for points outside... +* `#14472 `__: scipy.interpolate.CubicSpline boundary conditions appear to be... +* `#14533 `__: optimize.shgo gives unexpected TypeError +* `#14541 `__: Raspberry Pi 4 aarch64: ModuleNotFoundError: No module named... +* `#14584 `__: scipy.signal.filter_design.zpk2sos doctests fail (values different... +* `#14809 `__: BUG: scipy.signal.periodogram window parameter +* `#14853 `__: BUG: sqrtm dtype +* `#14922 `__: Question: Seemingly unused, non-working script \`isolve/tests/demo_lgres.py\` +* `#15049 `__: BUG: Visualization of CWT matrix in signal.cwt example code +* `#15072 `__: BUG: signal.decimate returns NaN with large float32 arrays +* `#15393 `__: BUG: signal.decimate returns unexpected values with float32 arrays +* `#15473 `__: ENH: \`skewnorm.cdf\` is very slow. Consider a much more efficient... +* `#15618 `__: ENH: Generation of random 2D tables with given marginal totals +* `#15675 `__: ENH: \`multivariate_normal\` should accept eigendecomposition... +* `#15685 `__: ENH: The exact p-value calculation in \`stats.cramervonmises_2samp\`... +* `#15733 `__: DEP: remove quiet parameter from fitpack +* `#15749 `__: DEP: remove tol from \`NumericalInverseHermite\` +* `#15792 `__: MAINT: There is no unittest and documentation of Improper integral... +* `#15807 `__: DEP: remove dual_annealing argument 'local_search_options' +* `#15844 `__: It's not that obvious that \`firls\` requires an even number... +* `#15883 `__: BUG: stats.bootstrap bca implementation triggers ValueError for... +* `#15936 `__: Please add citations to the papers for COLAMD +* `#15996 `__: Symbol hiding when using GNU linker in the Meson build should... +* `#16148 `__: Documentation in spearmanr +* `#16235 `__: BUG: Memory leak in function \`Py_FindObjects\` due to new reference... +* `#16236 `__: BUG: Memory leak in function \`py_filter2d\` due to new reference... +* `#16251 `__: DEP: Execute deprecation of scipy.linalg.blas.{clapack, flapack} +* `#16252 `__: DEP: add deprecation warnings to kwargs \`turbo\` / \`eigvals\`... +* `#16253 `__: DEP: add deprecation warning for kwargs \`nyq\` / \`Hz\` in firwin\* +* `#16256 `__: DEP: add deprecation warning for binom_test +* `#16272 `__: BUG: unclear error for invalid bracketing +* `#16291 `__: BUG: lambertw returns nan's on small values +* `#16297 `__: DOC: minor release procedure adjustment +* `#16319 `__: ENH: improved accuracy and orthonormality of output eigenvectors... +* `#16333 `__: DOC: rvalue description is missing in stats.probplot +* `#16334 `__: BUG: CLI help is not accessible using light themes +* `#16338 `__: ENH: Add option to clip out of bounds input values to minimum... +* `#16342 `__: BUG: IIRdesign function ftype='bessel' not recognized +* `#16344 `__: ENH: improved \`stats.ortho_group\` +* `#16364 `__: ENH: stats: return bunches rather than plain tuples +* `#16380 `__: BUG: RegularGridInterpolator error message is wrong +* `#16386 `__: TST: sparse/linalg/tests/test_expm_multiply.py::test_expm_multiply_dtype... +* `#16399 `__: \`test_mio.py::test_recarray\` failure due to dtype handling... +* `#16413 `__: DOC: rvs method docstrings refer to seed argument instead of... +* `#16433 `__: ENH: scipy.stats.bootstrap() should do BCa for multivariate statistics... +* `#16472 `__: handle spline interpolation methods in \`interpn\` +* `#16476 `__: dev.py does not propagate error codes, thus hides errors on CI +* `#16490 `__: DOC: err on example for \`scipy.signal.upfirdn\` +* `#16558 `__: BUG: leaves_color_list incorrect when distance=0 +* `#16580 `__: Typo in scipy/optimize/tests/test_optimize.py, logit instead... +* `#16582 `__: TST: RegularGridInterpolator tests should be parameterised +* `#16603 `__: ENH, DOC: Add policy on typo and small docs fixes +* `#16663 `__: BUG: \`bool(rotation)\` leads to error +* `#16673 `__: Test failure for \`TestPoisson.test_mindist\` in Azure CI job +* `#16713 `__: BUG/DOC: spatial: docstrings of \`Rotation\` methods are missing... +* `#16726 `__: CI: Python 3.11 tests are failing because a dependency is using... +* `#16741 `__: BUG: DOC: editing docstring example in svds +* `#16759 `__: DOC: Add 'import numpy as np' to the 'Examples' section of docstrings. +* `#16763 `__: BUG: numpy version requirement mismatch docs vs setup.py +* `#16773 `__: BUG: indexing error in scipy.spatial.Voronoi in 3D +* `#16796 `__: DOC: Method "bisect" for root_scalar lacks correct argument list +* `#16819 `__: BUG: stats.binned_statistic_2d is ~8x slower when using \`statistic=np.mean\`... +* `#16833 `__: Runtime performance in BSpline.design_matrix is inferior to BSpline().__call__() +* `#16892 `__: Add legend to \`rv_histogram\` plot in docs +* `#16912 `__: MAINT: stats: optimize: Move \`_contains_nan\` function to more... +* `#16914 `__: BUG: documentation of scipy.stats.truncnorm could be clearer +* `#17031 `__: BUG: stats: Intermittent failure of the test 'test_plot_iv' +* `#17033 `__: New CI failures in \`sparse\` with nightly numpy +* `#17047 `__: BUG: Documentation error in scipy.signal +* `#17056 `__: Mypy failure in CI for \`numpy/__init__.pyi\` positional-only... +* `#17065 `__: BUG: minimize(method=’L-BFGS-B’) documentation is contradictory +* `#17070 `__: Using Meson-built 1.10.0.dev0 nightly wheel in a conda environment... +* `#17074 `__: BUG: scipy.optimize.linprog does not fulfill integer constraints... +* `#17078 `__: DOC: "These are not universal functions" difficult to understand... +* `#17089 `__: ENH: Documentation on test behind p-values of .spearmanr +* `#17129 `__: DOC: inconsistency in when a new feature was added +* `#17155 `__: BUG: stats: Bug in XSLOW tests in TestNumericalInverseHermite +* `#17167 `__: BUG: bernoulli.pmf returns non-zero values with non-integer arguments +* `#17168 `__: \`test_powm1\` failing in CI on Windows +* `#17174 `__: MAINT, REL: wheels not uploaded to staging on push to maintenance +* `#17241 `__: BUG: CubicSpline segfaults when passing empty values for \`y\`with... +* `#17336 `__: BUG: Meson build unconditionally probes for pythran, despite... +* `#17375 `__: BUG: resample_poly() freezes with large data and specific samplerate... +* `#17380 `__: BUG: optimize: using \`integrality\` prevents \`linprog\` from... +* `#17382 `__: BUG/DOC: optimize: \`minimize\` doc should reflect tnc's deprecation... +* `#17412 `__: BUG: Meson error:compiler for language "cpp", not specified for... +* `#17444 `__: BUG: beta.ppf causes segfault +* `#17468 `__: Weird errors with running the tests \`scipy.stats.tests.test_distributions\`... +* `#17518 `__: ENH: stats.pearsonr: support complex data +* `#17523 `__: BUG: \`[source]\` button in the docs sending to the wrong place +* `#17578 `__: TST, BLD, CI: 1.10.0rc1 wheel build/test failures +* `#17619 `__: BUG: core dump when calling scipy.optimize.linprog +* `#17644 `__: BUG: 1.10.0rc2 Windows wheel tests runs all segfault +* `#17650 `__: BUG: Assertion failed when using HiGHS + +************************ +Pull requests for 1.10.0 +************************ + +* `#9072 `__: ENH: Added rectangular integral to multivariate_normal +* `#9932 `__: ENH: stats.gaussian_kde: add method that returns marginal distribution +* `#11712 `__: BUG: trust-constr evaluates function out of bounds +* `#12211 `__: DOC: Dice similiarity index +* `#12312 `__: ENH: Accelerate matrix normal sampling using matmul +* `#12594 `__: BUG: fixed indexing error when using bounds in Powell's method... +* `#13053 `__: ENH: add MLE for stats.powerlaw.fit +* `#13265 `__: ENH: Kstest exact performance improvements +* `#13340 `__: ENH: stats: Add the function odds_ratio. +* `#13663 `__: ENH: linalg: Add LAPACK wrappers for trexc and trsen. +* `#13753 `__: DOC: optimize: update Powell docs to reflect API +* `#13957 `__: ENH: stats.ks_2samp: Pythranize remaining exact p-value calculations +* `#14248 `__: MAINT:linalg: Make lu_factor accept rectangular arrays +* `#14317 `__: ENH: Optimize sparse frobenius norm +* `#14402 `__: DOC: Clarify argument documentation for \`solve\` +* `#14430 `__: ENH: improve siegelslopes via pythran +* `#14563 `__: WIP: stats: bins=auto in docstrings +* `#14579 `__: BENCH: optimize: add DFO CUTEST benchmark +* `#14638 `__: DOC: added mention of the limitations of Thomas' algorithm +* `#14840 `__: ENH: Addition of Poisson Means Test (E-test). +* `#15097 `__: ENH: add radius to gaussian_filter1d and gaussian_filter +* `#15444 `__: ENH: Infinite df approximation for Studentized Range PDF +* `#15493 `__: ENH: Convert gaussian_kde logpdf to Cython +* `#15607 `__: ENH: Add \`scipy.datasets\` submodule +* `#15709 `__: ENH: improve the computation time of stats.cramervonmises_2samp() +* `#15770 `__: ENH: stats: replace ncx2 stats distribution with Boost non_central_chi_squared +* `#15878 `__: DEP: remove local_search_options of dual_annealing +* `#15892 `__: BUG: stats: use mean behavior for percentileofscore in bootstrap +* `#15901 `__: DEP: Deprecate scipy.misc in favour of scipy.datasets +* `#15967 `__: TST/DOC: stats: explain/check 100% interval for discrete distributions +* `#15972 `__: DOC: length of \`bands\` param. specified in \`firls\` +* `#16002 `__: ENH: Allow specyfing inverse covariance of a multivariate normal... +* `#16017 `__: ENH: special: Use boost for a couple ufuncs. +* `#16069 `__: ENH: add additional MLE for fixed parameters in gumbel_r.fit +* `#16096 `__: BUG: use SOS filters in decimate for numerical stability +* `#16109 `__: ENH: add \`optimization\` to \`QMCEngine\` +* `#16140 `__: ENH: stats: Add \`nan_policy\` optional argument for \`stats.rankdata\` +* `#16224 `__: Add a \`pchip\` mode to RegularGridInterpolator. +* `#16227 `__: BUG: special: Fix a couple issues with the 'double-double' code... +* `#16238 `__: MAINT: stats: support string array for _contains_nan and add... +* `#16268 `__: DOC: optimize: add marginals/slack example to \`linprog\` +* `#16294 `__: BUG: linalg: Add precision preservation for \`sqrtm\` +* `#16298 `__: REL: set version to 1.10.0.dev0 +* `#16299 `__: DEP: Execute deprecation of scipy.linalg.blas.{clapack, flapack} +* `#16307 `__: DEP: add deprecation warning for binom_test +* `#16315 `__: DEP: add deprecation warning for kwargs nyq / Hz in firwin +* `#16317 `__: ENH: stats: add truncated (i.e. upper bounded) Pareto distribution... +* `#16320 `__: ENH: improved accuracy and orthonormality of output eigenvectors... +* `#16327 `__: DOC: BLD: remove \`-scipyopt\` from html Make command and build... +* `#16328 `__: MAINT: retry openblas download in CI +* `#16332 `__: BLD: ensure we get understandable messages when git submodules... +* `#16335 `__: BLD: update NumPy to >=1.19.5 +* `#16336 `__: MAINT: forward port git scoping +* `#16340 `__: DEP: remove tol & max_intervals from NumericalInverseHermite +* `#16346 `__: DEV: add meson-python to environment.yml +* `#16351 `__: Added "import numpy as np" statement to filter examples +* `#16354 `__: DOC: optimize: remove callback doc from the options in \`_minimize_lbfgsb\`... +* `#16355 `__: DEP: add deprecation warnings to kwargs turbo / eigvals of linalg.eigh +* `#16356 `__: DOC: add examples to \`signal.medfilt2d\` +* `#16357 `__: BENCH: Add SHGO and DIRECT to optimization benchmark +* `#16362 `__: ENH: Provide more information when a value is out of bounds in... +* `#16367 `__: BUG: unclear error for invalid bracketing +* `#16371 `__: MAINT: remove last (already safe) usage of \`mktemp\` +* `#16372 `__: MAINT: rename \`do.py\` to \`dev.py\` +* `#16373 `__: DOC: added rvalue description in \`stats.probplot\` +* `#16377 `__: ENH: stats.bootstrap: update warning to mention np.min +* `#16383 `__: BUG: fix error message of RegularGridInterpolator +* `#16387 `__: ENH: stats.combine_pvalues: convert output tuple to Bunch +* `#16388 `__: DEP: deprecate \`stats.kendalltau\` kwarg \`initial_lexsort\` +* `#16389 `__: DEP: sharpen stats deprecations +* `#16392 `__: DEP: add warning to \`sparse.gmres\` deprecated kwarg \`restrt\` +* `#16397 `__: MAINT: fix two refcounting issues in \`ndimage\` +* `#16398 `__: MAINT: Replace find_common_types +* `#16406 `__: MAINT: stats.rankdata: change default to nan_policy='propagate' +* `#16407 `__: ENH: stats.fisher_exact: convert output tuple to Bunch +* `#16411 `__: MAINT: optimize.brute should coerce non-tuple args to tuple +* `#16415 `__: DOC: stats: fix seed -> random_state in \`rvs\` docstring +* `#16423 `__: MAINT: stats: not using nested TypeErrors in _contains_nan +* `#16424 `__: MAINT: future-proof \`stats.kde\` for changes in numpy casting... +* `#16425 `__: DOC: Procedure adjustment in file doc/source/dev/core-dev/releasing.rst.inc +* `#16428 `__: MAINT: fix up \`_sputils.get_index_dtype\` for NEP 50 casting... +* `#16431 `__: CI: fix Gitpod build after dev.py update to the new CLI +* `#16432 `__: Docstring fixes in lobpcg.py +* `#16434 `__: DOC: stats.mstats.sen_seasonal_slopes: add docstring +* `#16435 `__: ENH: directional mean +* `#16438 `__: MAINT: remove unused \`DeprecatedImport\` +* `#16439 `__: ENH: stats.chi2_contingency: convert output tuple to Bunch +* `#16440 `__: ENH: stats.median_test: convert output tuple to Bunch +* `#16441 `__: ENH: stats.mood: convert output tuple to Bunch +* `#16442 `__: MAINT: fix issues with Python scalar related casting behavior... +* `#16447 `__: BLD: make it easier to build with AddressSanitizer +* `#16449 `__: ENH: improve scipy.interpolate.RegularGridInterpolator performance +* `#16450 `__: BUG: Fix CLI Help in light themes +* `#16454 `__: ENH: stats.bootstrap: return bootstrap distribution +* `#16455 `__: ENH: stats.bootstrap: add BCa method for multi-sample statistic +* `#16462 `__: CI: Update Python 3.8-dbg job to ubuntu-20.04 +* `#16463 `__: ENH: stats.jarque_bera: add axis, nan_policy, masked array support +* `#16470 `__: DOC: stats.spearmanr: add information about p-value calculation +* `#16471 `__: MAINT: interpolate/RGI: only call \`find_indices\` when needed +* `#16474 `__: DOC: Add more information to entropy docstring +* `#16475 `__: BLD: build the f2py shared source file once and link to each... +* `#16481 `__: BUG: Change (n+1) to n for correct jackknife calculation of hd... +* `#16486 `__: DOC: special.entr: add context +* `#16487 `__: MAINT: Improve test speed, add timeouts +* `#16496 `__: add notes for x and y array sorted in decreasing order +* `#16497 `__: DOC: special: Add 'Examples' section to spence docstring. +* `#16498 `__: ENH: Speed up hdquantile_sd via cumulative sums +* `#16501 `__: DOC: Fix typo in spatial.Delaunay +* `#16502 `__: DOC: Minor Rst syntax update. +* `#16503 `__: ENH: stats: Implement _munp() for the skewnorm distribution. +* `#16505 `__: DOC: correct errs on examples for scipy.signal.upfirdn +* `#16508 `__: BUG/ENH: handle spline interpolation methods in \`interpn\` and... +* `#16511 `__: add reference to regulargridinterpolator +* `#16513 `__: MAINT: skip complex128 propack tests on windows (& module clean-up) +* `#16516 `__: DOC: add a hint on what to use in case of matlab v7.3 +* `#16518 `__: CI: pip and conda caching in all workflows +* `#16524 `__: TST: stats.permutation_test: strengthen test against \`ks_2samp\` +* `#16529 `__: CI: clean up scikit-umfpack and scikit-sparse usage in CI +* `#16532 `__: Deprecated imports in docstring examples in \`io.harwell_boeing\`... +* `#16533 `__: ENH: signal: add Lanczos window function +* `#16534 `__: CI: fix scikit-umfpack and scikit-sparse install in Azure job +* `#16535 `__: MAINT: signal: Fix matplotlib deprecation warning in the chirp... +* `#16543 `__: DOC: update cwt doc examples +* `#16544 `__: DOC: add better example for \`MultinomialQMC\`. +* `#16546 `__: DOC: Add alt-text to tutorial images +* `#16547 `__: ENH: correct bounds warnings in \`minimize\` +* `#16550 `__: TST: fix flaky sparse.linalg.exmp test +* `#16552 `__: CI: test distro Python install on Ubuntu Jammy (22.04 LTS) +* `#16554 `__: TST: add timeout to \`test_kappa4_array_gh13582\` +* `#16557 `__: BUG: fix \`interpolate.RegularGridInterpolator\` \`out_of_bounds\`... +* `#16559 `__: ENH: adding a logpdf function to von-mises distribution +* `#16560 `__: vectorize ortho_group.rvs +* `#16561 `__: DOC: optimize: Fix warning in differential_evolution docstring +* `#16565 `__: [DOC] improper type syntax in basinhopping docstring. +* `#16566 `__: fix window function doc string for Window length +* `#16567 `__: DOC: Add note about inaccuracies in matrix functions +* `#16571 `__: DOC: sparse.linalg: add references for UMFPACK. +* `#16574 `__: ENH: vectorize along samples \`stats.ortho_group.rvs\` and \`stats.unitary_group.rvs\` +* `#16576 `__: testing documentation broken link fix +* `#16587 `__: DOC: add import NumPy in QMC examples. +* `#16589 `__: DOC: update toolchain.rst after EOL of manylinux_2_24; allow... +* `#16591 `__: ENH: stats.nct: replace with boost implementation +* `#16592 `__: DOC: interpolate: document the .roots() workaround +* `#16594 `__: MAINT: Better pytest-timeout support +* `#16596 `__: MAINT: stats.rv_continuous: consistently return NumPy scalars +* `#16607 `__: MAINT: remove unnecessary \`__future__\` imports +* `#16608 `__: TST: stats.rv_continuous: more direct test for numpy scalar output +* `#16612 `__: ENH: vectorize along samples \`stats.special_ortho_group.rvs\` +* `#16614 `__: DOC: add import NumPy in linalg decomposition function examples +* `#16615 `__: DOC: Adding import numpy to several files +* `#16616 `__: DOC: Adding import numpy to examples in some stats files +* `#16617 `__: DOC: Update instructions for debugging using dev.py +* `#16618 `__: DOC: add import NumPy in bsplines examples +* `#16619 `__: DOC: add import numpy in some stats examples +* `#16620 `__: DOC: Add numpy import to examples +* `#16621 `__: FIX: upstream fix for binomial distribution divide-by-zero +* `#16624 `__: DOC: add NumPy imports in \`_mstats_basic.py\` examples +* `#16625 `__: DOC: add \`import numpy as np\` to examples +* `#16626 `__: BUG: cluster: fix \`leaves_color_list\` issue +* `#16627 `__: TST: spatial.directed_hausdorff: Parametrized test_random_state_None_int +* `#16629 `__: DOC: Modifiy the scipy.stats.mode example to be nontrivial. +* `#16631 `__: MAINT: stats.gaussian_kde: raise informative message with degenerate... +* `#16632 `__: MAINT: signal:corrected peak_finding example +* `#16633 `__: DOC: update benchmarking docs to use dev.py user interface +* `#16634 `__: DOC: Add example to fft.fht +* `#16635 `__: DOC: fix default_rng namespace and linestyle of an example +* `#16639 `__: DOC: better links in readme for newcomers +* `#16640 `__: MAINT: optimize: always return a float from goal functional wrapper +* `#16641 `__: DOC: optimize: fix doc that \`curve_fit\` xdata should be float... +* `#16644 `__: DOC: io: Add Examples section for mminfo, mmread and mmwrite. +* `#16646 `__: MAINT: have get_index_dtype follow its documentation and return... +* `#16647 `__: MAINT: Fix expit function name typo in test_optimize.py +* `#16650 `__: DOC: io: Add 'Examples' to the 'whosmat' docstring. +* `#16651 `__: ENH: stats.resampling: automatically detect whether statistic... +* `#16652 `__: MAINT: Remove unused imports. +* `#16653 `__: DEV: generalized cross-validation smoothing spline +* `#16654 `__: ENH: stats: add aliases to results objects +* `#16658 `__: BUG: signal: Compare window_length to correct axis in savgol_filter +* `#16659 `__: DOC: replace \`sphinx_panels\` and \`sphinx_tabs\` with \`sphinx_design\` +* `#16666 `__: MAINT: remove unused \`__main__\` code from \`optimize\` submodule +* `#16667 `__: DOC: spatial: Correct barycentric description in Delaunay +* `#16668 `__: DOC: signal: Update values in zpk2sos docstring examples. +* `#16670 `__: MAINT: fix a compiler warning in \`signal/_firfilter.c\` +* `#16672 `__: BLD: update minimum \`meson\` and \`meson-python\` versions +* `#16675 `__: TST: sparse.linalg: increase \`lobpcg\` solve tolerance in test +* `#16676 `__: MAINT: stats.mstats.mode: refactor to keep \`kwargs\` out of... +* `#16677 `__: TST: speed up mindist test +* `#16678 `__: DOC: remove custom colours in css +* `#16680 `__: MAINT: stats.gmean: corrections with \`axis=None\` when masked-array... +* `#16683 `__: DEV: add \`--durations\` argument to dev.py interface +* `#16685 `__: BLD: implement compiler version checks for GCC and MSVC +* `#16687 `__: DOC: signal: Update the examples in the remez docstring. +* `#16689 `__: MAINT: sparse.linalg: remove LGMRES demo +* `#16690 `__: random uniform -> normal to initiate lobpcg and arpack in svds +* `#16691 `__: ENH: stats: Implement isf for the levy distribution. +* `#16692 `__: ENH: stats.gaussian_kde: replace use of inv_cov in pdf +* `#16696 `__: ENH: Speed up sparse.csgraph.dijkstra +* `#16699 `__: DOC: stats: resampling and Monte Carlo methods tutorial +* `#16703 `__: BLD: upgrade meson(-python) min versions and remove explicit... +* `#16704 `__: DOC: improve some MSVC links in toolchain.rst +* `#16705 `__: MAINT: add \`__bool__\` method to spatial.transform.Rotation +* `#16706 `__: CI: add Meson version number in environment.yml to rebuild Docker... +* `#16707 `__: DOC: expand the \`scipy.interpolate\` tutorial +* `#16712 `__: BUG: Update _svds.py: orthogonalize eigenvectors from arpack... +* `#16714 `__: ENH: stats.bootstrap: extend previous bootstrap result +* `#16715 `__: DOC: interpolate: add an example of splPrep/PPoly.from_spline... +* `#16717 `__: DOC: reformat seed docstrings +* `#16722 `__: MAINT: additional test truthiness and length the empty Rotation +* `#16730 `__: MAINT: interpolate: use _fitpack_impl in fitpack2 +* `#16731 `__: ENH: interpolate.KroghInterpolator: raise warning about numerical... +* `#16732 `__: DOC: Replace runtests.py with dev.py where appropriate +* `#16733 `__: DOC: Add link to development workflow +* `#16735 `__: DOC: forward port 1.9.0 relnotes +* `#16738 `__: REL: DOC: update version switcher +* `#16739 `__: CI: move the py311-dev job over to Meson +* `#16740 `__: DOC: Fix Sphinx markup. +* `#16742 `__: CI: move test_numpy_main to linux_meson +* `#16743 `__: DEP: interpolate: revert docstring only deprecation of fitpack... +* `#16747 `__: DOC: sparse.linalg: Fix output in an example in the lobpcg docstring. +* `#16753 `__: DOC: Integrate: Add improper integral examples for \`dblquad\`... +* `#16754 `__: DOC: optimize: Fix mistake in a linprog example. +* `#16755 `__: TST: sparse.linalg: Loosen tolerance for the lobpcg test 'test_tolerance_float32' +* `#16756 `__: TST: test fixes for pypy +* `#16758 `__: ENH: Release the GIL while computing KDE kernel estimate +* `#16761 `__: DOC: add logo to readme. +* `#16762 `__: MAINT: stats: mark slow tests +* `#16766 `__: DOC: toolchain: fix numpy dependency for 1.7.2/3 +* `#16770 `__: ENH: stats: use Boost implementation of skewnorm cdf/ppf +* `#16772 `__: DOC: add one :math: to docstring for consistency +* `#16776 `__: BUG: Set nperseg size to the size of an already-initialized window... +* `#16778 `__: MAINT: fix a couple of Mypy errors that appeared recently +* `#16779 `__: TST: Interpolate: Move incorrectly located NDInterpolator tests +* `#16788 `__: DOC, TST: clarify Voronoi Qz +* `#16790 `__: ENH: stats.invgauss: use Boost implementation of ppf/isf +* `#16791 `__: MAINT: stats.skewnorm: fix fit when data skewness is greater... +* `#16793 `__: DOC: optimize: add tutorial for milp +* `#16795 `__: DOC: Embed method signatures of \`spatial.transform.Rotation\` +* `#16797 `__: ENH add extrapolate to BSpline.design_matrix +* `#16799 `__: DOC: optimize.root_scalar: improve parametrization of methods +* `#16800 `__: MAINT: remove \`_lib/_c99compat.h\` and use C99 rather than \`npy_math.h\`... +* `#16801 `__: ENH: added the spectral 2-norm to _norm.py +* `#16804 `__: ENH: stats.weibull_min: override fit +* `#16806 `__: DEV: update pydevtool version to propagate exit codes +* `#16809 `__: Doc: Added missing "import numpy as np" to docstring examples... +* `#16811 `__: DOC: fix broken links +* `#16816 `__: MAINT: special: remove one \`libnpymath\` dependency; more \`NPY_\`... +* `#16817 `__: MAINT: remove \`NPY_INLINE\`, use \`inline\` instead +* `#16818 `__: MAINT: update PROPACK git submodule to get rid of prints in test... +* `#16826 `__: MAINT: fix some build warnings from \`special/ellip_harm.pxd\` +* `#16828 `__: DOC: add NumPy import in scipy.io examples +* `#16829 `__: Interpn nonscalar followup +* `#16830 `__: DOC: Add plot to circmean docstring +* `#16831 `__: DOC: special: Several docstring updates. +* `#16832 `__: DOC: add NumPy import in scipy.optimize examples +* `#16834 `__: DOC: Improve circular stats doc +* `#16835 `__: ENH: stats.ttest_1samp: add confidence_interval and df +* `#16837 `__: DOC: interpolate: small example code improvement for \`BSpline.basis_element\` +* `#16840 `__: ENH: BSplines.design_matrix performance improvement +* `#16843 `__: ENH: Handle np array methods in stats.binned_statistic_dd +* `#16847 `__: DOC: interpolate.{RegularGridInterpolator, interpn} add note... +* `#16848 `__: ENH: stats.anderson: add fit parameters to result +* `#16853 `__: DOC: interpolate: improve \`interpolate.make_interp.spline\`... +* `#16854 `__: MAINT: Delay \`pooch\` import error for \`scipy.datasets\` +* `#16855 `__: Roadmap update: scipy.interpolate and Fortran libs +* `#16856 `__: DOC: interpolate: add default spline degree value for \`InterpolatedUnivariateSpline\` +* `#16857 `__: ENH : remove an expected warning in BarycentricInterpolator +* `#16858 `__: ENH: Modify scipy.optimize.least_squares to accept bounds of... +* `#16860 `__: DOC: interpolate: improve spline smoothing parameter docs. +* `#16863 `__: DOC: Adding docs contribution guidelines +* `#16864 `__: DOC: stats: Some updates: +* `#16865 `__: DOC: interpolate: improve \`make_lsq_spline\` docs +* `#16866 `__: DEP, DOC: Show deprecated methods in docs and fix overwriting... +* `#16867 `__: DOC: fix an accuracy issue in the docstring of \`Rotation.align_vectors\` +* `#16869 `__: DOC: Added missing 'import numpy as np' to docstring examples... +* `#16873 `__: MAINT: stats.multinomial: don't alter p[-1] when p[:-1].sum()... +* `#16874 `__: DOC: signal: Add 'Examples' to the 'normalize' docstring. +* `#16884 `__: DOC: improve installing from source instructions +* `#16885 `__: TST: Interpolate: Parameterise RegularGridInterpolator tests +* `#16886 `__: CI: wheels only on scipy [skip azp][skip github] +* `#16887 `__: DOC: optimize.linprog: adjust tutorial to address gh16531 +* `#16888 `__: DOC: outline how cibuildwheel is triggered and runs in CI +* `#16889 `__: MAINT: interpolate: Remove a couple unused imports. +* `#16890 `__: ENH: optimize.OptimizeResult: improve pretty-printing +* `#16891 `__: TST: Interpolate: rename test so that is executed +* `#16893 `__: DOC: add diagram explaining how Docker images get built and used... +* `#16896 `__: DOC: Fix broken link in the "Additional Git Resources" page. +* `#16897 `__: Pass down mip_rel_gap to the HiGHS optimizer +* `#16899 `__: DOC: add legend to rv_histogram plot +* `#16902 `__: ENH: stats.ttest_rel: add confidence_interval to result +* `#16903 `__: DOC: interpolate: add actual smoothing condition for \`UnivariateSpline\` +* `#16906 `__: DOC: fixes for refguide check issues +* `#16907 `__: BUG: stats: expect method of the vonmises distribution +* `#16910 `__: MAINT: forward port 1.9.1 relnotes +* `#16913 `__: ENH:interpolate: allow interp1d to take single value +* `#16916 `__: DOC: add note about using interpn for data on a regular grid +* `#16923 `__: MAINT: integrate.qmc_quad: add QMC quadrature +* `#16924 `__: Fix compilation with -Wincompatible-function-pointer-types +* `#16931 `__: DOC: add details on Meson build debugging and introspection +* `#16933 `__: MAINT : interpolate: added test for DivideByZero warning silencing... +* `#16937 `__: MAINT: refer to python3 in refguide_check +* `#16939 `__: MAINT: stats: move \`_contains_nan\` function to \`_lib._util.py\` +* `#16940 `__: DOC: Documentation note update for truncnorm +* `#16941 `__: MAINT: support logpdf in NumericalInverseHermite (stats.sampling) +* `#16948 `__: DOC: sparse.linalg.svds: fix intermittent refguide check failure +* `#16950 `__: DOC: Add examples for common Bessel functions +* `#16951 `__: ENH: stats.fit: add plot_types to FitResult.plot +* `#16953 `__: DEV: update dev.py to only install changed files +* `#16955 `__: BLD: fix up or suppress Fortran build warnings +* `#16956 `__: BLD: fix meson version checks for MSVC +* `#16958 `__: ENH: stats.crosstab: convert output tuple to bunch +* `#16959 `__: DOC: Add example for morlet in scipy.signal +* `#16960 `__: DOC: Fix indentation in benchmarking.rst +* `#16963 `__: DOC: Update 2 links to point to stable. +* `#16967 `__: ENH: stats.goodness_of_fit: a general goodness of fit test +* `#16968 `__: ENH: Close parenthesis in numpy version warning +* `#16976 `__: DOC: stats.qmc: fix description of seed parameter +* `#16980 `__: DOC: fix duplicate word typos. +* `#16986 `__: DOC: Fix link to rendered docs in documentation guide +* `#16987 `__: ENH: stats.gaussian_kde: replace use of inv_cov in logpdf +* `#16989 `__: DOC: edited t_span parameter description in integrate.solve_ivp +* `#16990 `__: CI: enable uploads for (weekly) nightlies and update how action... +* `#16992 `__: CI: upgrade CI image to run on Ubuntu 22.04 instead of 20.04 +* `#16995 `__: DOC: stats: fix incorrectly documented statistic attribute for... +* `#17003 `__: DOC: Add examples for a few Bessel functions +* `#17005 `__: CI: pin OpenBLAS to specific build in macOS job to avoid gges... +* `#17006 `__: ENH: stats.spearmanr: add statistic attribute to result object... +* `#17007 `__: ENH: stats.kendalltau: add statistic attribute to result object... +* `#17008 `__: ENH: stats.weightedtau: add statistic attribute to result object +* `#17009 `__: Revert "CI: pin OpenBLAS to specific build in macOS job to avoid... +* `#17014 `__: MAINT: remove unused variables and imports +* `#17016 `__: ENH: stats.pearsonr, stats.pointbiserialr: add statistic/correlation... +* `#17017 `__: ENH: stats.somersd: add correlation attribute to result object +* `#17021 `__: FIX: \`dev.py build\` parallelism behaviour and fixed typos +* `#17022 `__: Explain where LIL comes from +* `#17027 `__: Fix explanation of LIst of List sparse matrix +* `#17029 `__: CI: cirrus for building aarch64 +* `#17030 `__: ENH: stats.permutation_test: improve performance of samples/pairings... +* `#17032 `__: TST: stats.fit: fix random state +* `#17034 `__: TST: stats.jarque_bera: fix test failure due to NumPy update +* `#17036 `__: DEV: Update GPG key in Docker [Gitpod] +* `#17038 `__: deduplicate \`splint\` in FITPACK wrappers; take 3 +* `#17039 `__: ENH: add a \`stats.expectile\` function +* `#17041 `__: DOC: Add examples for integrals of Bessel functions +* `#17048 `__: DOC:signal: Fix typo in TransferFunction +* `#17049 `__: TST: stats.jarque_bera: fix test failure due to NumPy update +* `#17051 `__: ENH: support complex functions in integrate.quad +* `#17052 `__: BLD: implement symbol hiding for Meson through a linker version... +* `#17057 `__: Fix or avoid various test failures that are showing up in CI +* `#17062 `__: Add location and sign to KS test result +* `#17063 `__: CI: fix uploading of nightly wheels +* `#17068 `__: MAINT: Removed unused imports. +* `#17071 `__: DOC: update maxfun in scipy.optimize.minimize(method=’L-BFGS-B’)... +* `#17073 `__: DOC: examples for derivatives of Bessel functions +* `#17076 `__: DOC: spatial: Copy-edit the voronoi_plot_2d example. +* `#17079 `__: BUG: fix \`signal.sosfilt\` issue with complex dtypes and Intel... +* `#17081 `__: DOC: Fix formatting in svds docstrings +* `#17083 `__: DOC: Fix broken link for environment variables NumPy doc +* `#17085 `__: DOC: optimize: add link to SciPy cookbooks milp tutorials +* `#17091 `__: MAINT: interpolate remove duplication of FITPACK interface \`sproot\`. +* `#17093 `__: ENH: Improves behavior of scipy.optimize.linprog (#17074) +* `#17094 `__: DOC: examples for roots of Bessel functions +* `#17099 `__: BLD: turn off fast-math for Intel compilers +* `#17103 `__: ENH: stats.Covariance: add CovViaDiagonal +* `#17106 `__: CI: fix testing of \`SCIPY_USE_PYTHRAN=0\`, and upgrade to pythran... +* `#17108 `__: DOC: Reformulate ufunc description in special doc page +* `#17109 `__: BLD: Ensure Intel Fortran handles negative 0 as expected. +* `#17110 `__: DOC: add Numpy import to scipy.sparse examples +* `#17112 `__: ENH: Add support for bounds class in curve_fit +* `#17115 `__: DOC: add Numpy import to examples +* `#17117 `__: ENH: stats.logistic: override fit for remaining cases +* `#17118 `__: ENH: Support for complex functions in binned_statistic_dd +* `#17122 `__: ENH: remove duplicate function call +* `#17126 `__: MAINT, ENH: scipy.stats: Refactor \`directionalmean\` to return... +* `#17128 `__: ENH: stats.covariance: add CovViaCholesky +* `#17130 `__: DOC: remove inconsistent messages +* `#17135 `__: ENH: stats.Covariance: specifying covariance matrix by its eigendecomposition +* `#17138 `__: CI: add permission to GH actions. +* `#17140 `__: BUG: Fix issue with shgo not correctly passing jac to minimizer +* `#17141 `__: ENH: stats.fit: add maximum spacing estimation +* `#17144 `__: DOC: replace \`set_tight_layout\` with \`set_layout_engine\`... +* `#17147 `__: BENCH: remove \`--quick\` flag to \`asv run\` in dev.py +* `#17149 `__: MAINT: remove certifi py3.11 warning filter +* `#17152 `__: ENH/MAINT: \`qmc.LatinHypercube\`: deprecate centered with scramble +* `#17157 `__: ENH: Added value_indices() function to scipy.ndimage +* `#17159 `__: MAINT: spatial: Skip \`test_massive_arr_overflow\` on systems... +* `#17161 `__: MAINT: stats.sampling.NumericalInverseHermite: private distribution... +* `#17163 `__: ENH: Add \`download_all\` utility method & script +* `#17169 `__: MAINT: special: Loosen the tolerance for a test of powm1. +* `#17170 `__: MAINT: better handling of mode/center outside of the domain in... +* `#17175 `__: MAINT: forward port 1.9.2 relnotes +* `#17177 `__: DOC: stats: Fix versionadded markup for odds_ratio +* `#17178 `__: DOC: interpolate: discuss failure modes of SmoothBivariateSpline +* `#17180 `__: DEP: interpolate: deprecate interp2d +* `#17181 `__: CI: Fix when wheels are built for staging +* `#17182 `__: MAINT: fix typo "mat[r]ix" +* `#17183 `__: DOC: examples for \`ive\` and \`kve\` +* `#17184 `__: DOC: stats: Fix the 1.9.0 release note about the 'weights' parameter... +* `#17188 `__: DOC: update version switcher for 1.9.2 +* `#17198 `__: MAINT: stats: remove use of interp2d from levy_stable._fitstart +* `#17199 `__: DOC: Fix typos in IIR design argument documentation +* `#17215 `__: MAINT: remove code for old numpy versions +* `#17217 `__: MAINT: interpolate/RGI: make all _evaluate_YYY methods use self.values +* `#17223 `__: DOC: linalg: Expand the qz example. +* `#17227 `__: TST: stats.sampling.NumericalInverseHermite: filter all RuntimeWarnings +* `#17230 `__: ENH: subclass-friendly refactor RegularGridInterpolator +* `#17233 `__: DOC: examples for Struve functions +* `#17236 `__: stats/distributions: make rv_sample public, allow subclassing +* `#17237 `__: ENH: add conditional_table to SciPy.stats. +* `#17238 `__: DOC: linalg: Several docstring updates. +* `#17243 `__: DOC: special: Updates for smirnov and smirnovi +* `#17247 `__: MAINT: optimize.leastsq: fix covariance not SPD +* `#17256 `__: doc/RegularizedIncompleteBetaFunction +* `#17258 `__: MAINT: stats.multivariate_normal: frozen rvs should pass cov_object... +* `#17259 `__: DOC: CI: Add note about skipping Cirrus CI. +* `#17262 `__: MAINT: forward port 1.9.3 relnotes +* `#17264 `__: DOC: update version switcher for 1.9.3 +* `#17273 `__: TST: linalg: temporarily silence failure in test_solve_discrete_are +* `#17276 `__: MAINT/ENH: stats.multivariate_normal.rvs: fix shape and speed... +* `#17277 `__: ENH: Random unit vector distribution +* `#17279 `__: TST: mark no_segmentation fault test for DIRECT as xslow +* `#17280 `__: DOC: example for voigt_profile +* `#17283 `__: STY: stats.Covariance: fix lint issue in \`main\` +* `#17284 `__: MAINT: special: Loosen tolerance in test_sinpi() and test_cospi(). +* `#17291 `__: Cythonize 2D linear code path in RegularGridInterpolator +* `#17296 `__: Fix test fails caused by pytest 7.1.3 +* `#17298 `__: DOC: Add examples to Stats Anderson +* `#17299 `__: DOC: interpolate: Extrapolation tips and tricks +* `#17301 `__: DOC, MAINT: remove use of inspect.formatargspec during doc build +* `#17302 `__: MAINT: special: Use boost for special.hyp1f1 with real inputs. +* `#17303 `__: Remove handwritten \`_fitpack.spalde\` : a rebase of pr/17145 +* `#17304 `__: ENH: stats: implement _sf and _isf for invweibull. +* `#17305 `__: BUG: interpolate: allow zero-sized data arrays +* `#17313 `__: DOC: interpolate: add a note on data with different scales +* `#17314 `__: DOC: interpolate/tutorial: add a length-1 example +* `#17315 `__: MAINT: special: Remove tests of numpy functions arccosh, arcsinh... +* `#17317 `__: DOC: interpolate/tutorial: add an example for equally-spaced... +* `#17319 `__: DOC: references and examples for huber/pseudo_huber +* `#17331 `__: CI: On Azure, pin pytest-xdist to version 2.5.0 +* `#17340 `__: DOC: clarify use of bounds with basinhopping +* `#17345 `__: ENH: commit to close #1261 (trac #734) by adding xtol argument. +* `#17346 `__: BLD: fix \`SCIPY_USE_PYTHRAN=0\` usage for the Meson build +* `#17349 `__: DOC: Fix signal docstrings; finish adding 'import numpy as np' +* `#17351 `__: CI: Pin ninja==1.10.2.4 to avoid bug in 1.11.1 that breaks meson. +* `#17355 `__: DOC: spatial: Fix some docstrings. +* `#17359 `__: CI: ninja packages are repaired, so unpin. +* `#17361 `__: DOC: examples for gdtr and gdtrc +* `#17363 `__: DOC: adjust the deprecation notice for interp2d +* `#17366 `__: DOC/MAINT: clean doctests namespace +* `#17367 `__: DOC: Add missing \`build\` parameter to \`dev.py\` +* `#17369 `__: DOC: consistent use of \`=\` for argument documentation +* `#17371 `__: DOC: update RBF tutorial with new \`RBFInterpolator\` +* `#17372 `__: BLD: update to Meson 0.64.0, remove \`pure: false\` lines +* `#17374 `__: DOC: \`special.itairy\` example +* `#17376 `__: DOC: Add examples to stats.mstats.find_repeats +* `#17395 `__: DOC: optimize: minimize doc to reflect tnc's deprecation of maxiter +* `#17397 `__: BUG: signal: Change types in the upfirdn utility function _output_len() +* `#17399 `__: DOC: signal.iirdesign: remove \`bessel\` from supported filter... +* `#17400 `__: TST: use norm in signal.TestBessel.test_fs_param +* `#17409 `__: DOC: Examples for special functions related to F distribution +* `#17415 `__: MAINT: Python 3.8 typing simplify +* `#17416 `__: BLD: fix a lot of configuration warnings by using \`fs.copyfile\` +* `#17417 `__: BUG: integrate: simpson didn't handle integer n-d arrays. +* `#17418 `__: DOC: special: Remove duplicate imports from special examples. +* `#17423 `__: Documentation to fix #17089 +* `#17426 `__: BLD: fix for propack and boost submodules - don't ask for native... +* `#17427 `__: DOC: optimize.linprog: adjust HiGHS URL +* `#17430 `__: BLD: define NDEBUG to mimic cmake release build +* `#17433 `__: MAINT/TST: improved test coverage for DIRECT optimizer +* `#17439 `__: DOC: Improve example for uniform_direction distribution +* `#17446 `__: MAINT: stats.gaussian_kde: error early if n_features > n_data +* `#17447 `__: MAINT: optimize.fminbound/minimize_scalar: add references, distinguish... +* `#17448 `__: MAINT: optimize.minimize_scalar: always acknowledge 'bounds'... +* `#17449 `__: MAINT: remove remaining occurrences of unicode +* `#17457 `__: DOC: Double Integral Example Typo +* `#17466 `__: BUG: stats: Fix for gh-17444. +* `#17467 `__: BUG: ndimage: Don't use np.int0 (it is the same as np.intp) +* `#17469 `__: BUG: stats: Random parameters in \`pytest.mark.parametrize()\`... +* `#17471 `__: MAINT: stats.rv_count: revert gh-17236 +* `#17472 `__: Getting rid of _make_points_and_values_ascending and its unnecessary... +* `#17478 `__: ENH: Add clear_cache utility for \`scipy.datasets\` +* `#17481 `__: MAINT: special: remove more \`npy_math.h\` usage +* `#17482 `__: MAINT: stats: Unconditionally disable boost double promotion. +* `#17484 `__: DOC: remove hard-coded value from PoissonDisk example +* `#17485 `__: ENH: increase range of vonmises entropy +* `#17487 `__: CI: pin setuptools for musllinux +* `#17489 `__: BUG: ndimage: Work around gh-17270 +* `#17496 `__: DEV: dev.py: make lint task consistent with CI +* `#17500 `__: MAINT: special: Remove references to nonexistent function exp1m. +* `#17501 `__: Minor: Misspelling typos fixed in _svds.py +* `#17504 `__: CI: PRs run against merged main [skip circle][skip gh][skip azp] +* `#17512 `__: TST: interpolate: stop skipping a test with zero-sized arrays +* `#17513 `__: BUG: optimize: fixed issue 17380 +* `#17526 `__: BUG, DOC: stats: fix \`[source]\` button redicting to the wrong... +* `#17534 `__: DOC: 1.10.0 release notes +* `#17536 `__: DOC: Examples for \`yve\` and \`jve\` +* `#17540 `__: DOC: fix documentation of \`make_smoothing_spline\` +* `#17543 `__: CI: fix gh17539 failures of the alpine linux run +* `#17545 `__: BUG: special: Fix handling of subnormal input for lambertw. +* `#17551 `__: BUG Fix: Update lobpcg.py to turn history arrays into lists for... +* `#17569 `__: MAINT: version bounds for 1.10.0rc1/relnotes fixes +* `#17579 `__: Revert "ENH: stats.ks_2samp: Pythranize remaining exact p-value... +* `#17580 `__: CI: native cp38-macosx_arm64 [wheel build][skip azp][skip circle][ski… +* `#17583 `__: MAINT: 1.10.0rc1 backports round 2 +* `#17591 `__: MAINT: stats.pearsonr: raise error for complex input +* `#17600 `__: DOC: update version switcher for 1.10 +* `#17611 `__: MAINT: Update ascent.dat file hash +* `#17614 `__: MAINT: optimize.milp: don't warn about \`mip_rel_gap\` option +* `#17627 `__: MAINT: Cast \`datasets.ascent\` image to float64 +* `#17634 `__: MAINT: casting errstate for NumPy 1.24 +* `#17638 `__: MAINT, TST: alpine/musl segfault shim +* `#17640 `__: MAINT: prepare for SciPy 1.10.0rc2 +* `#17645 `__: MAINT: stats.rankdata: ensure consistent shape handling +* `#17653 `__: MAINT: pybind11 win exclusion +* `#17656 `__: MAINT: 1.10.0rc2 backports, round two +* `#17662 `__: Fix undefined behavior within scipy.fft +* `#17686 `__: REV: integrate.qmc_quad: delay release to SciPy 1.11.0 +* `#17689 `__: REL: integrate.qmc_quad: remove from release notes diff --git a/libraries/scipy/releasenotes/1.10.1-notes.rst b/libraries/scipy/releasenotes/1.10.1-notes.rst new file mode 100644 index 0000000..7d47ef5 --- /dev/null +++ b/libraries/scipy/releasenotes/1.10.1-notes.rst @@ -0,0 +1,99 @@ +========================== +SciPy 1.10.1 Release Notes +========================== + +.. contents:: + +SciPy 1.10.1 is a bug-fix release with no new features +compared to 1.10.0. + + + +Authors +======= +* Name (commits) +* alice (1) + +* Matt Borland (2) + +* Evgeni Burovski (2) +* CJ Carey (1) +* Ralf Gommers (9) +* Brett Graham (1) + +* Matt Haberland (5) +* Alex Herbert (1) + +* Ganesh Kathiresan (2) + +* Rishi Kulkarni (1) + +* Loïc Estève (1) +* Michał Górny (1) + +* Jarrod Millman (1) +* Andrew Nelson (4) +* Tyler Reddy (50) +* Pamphile Roy (2) +* Eli Schwartz (2) +* Tomer Sery (1) + +* Kai Striega (1) +* Jacopo Tissino (1) + +* windows-server-2003 (1) + +A total of 21 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + + +Issues closed for 1.10.1 +------------------------ + +* `#14980 `__: BUG: Johnson's algorithm fails without negative cycles +* `#17670 `__: Failed to install on Raspberry Pi (ARM) 32bit in 3.11.1 +* `#17715 `__: scipy.stats.bootstrap broke with statistic returning multiple... +* `#17716 `__: BUG: interpolate.interpn fails with read only input +* `#17718 `__: BUG: RegularGridInterpolator 2D mixed precision crashes +* `#17727 `__: BUG: RegularGridInterpolator does not work on non-native byteorder... +* `#17736 `__: BUG: SciPy requires OpenBLAS even when building against a different... +* `#17775 `__: BUG: Asymptotic computation of ksone.sf has intermediate overflow +* `#17782 `__: BUG: Segfault in scipy.sparse.csgraph.shortest_path() with v1.10.0 +* `#17795 `__: BUG: stats.pearsonr one-sided hypothesis yields incorrect p-value... +* `#17801 `__: BUG: stats.powerlaw.fit: raises OverflowError +* `#17808 `__: BUG: name of cython executable is hardcoded in _build_utils/cythoner.py +* `#17811 `__: CI job with numpy nightly build failing on missing \`_ArrayFunctionDispatcher.__code__\` +* `#17839 `__: BUG: 1.10.0 tests fail on i386 and other less common arches +* `#17896 `__: DOC: publicly expose \`multivariate_normal\` attributes \`mean\`... +* `#17934 `__: BUG: meson \`__config__\` generation - truncated unicode characters +* `#17938 `__: BUG: \`scipy.stats.qmc.LatinHypercube\` with \`optimization="random-cd"\`... + + +Pull requests for 1.10.1 +------------------------ + +* `#17712 `__: REL, MAINT: prepare for 1.10.1 +* `#17717 `__: BUG: allow readonly input to interpolate.interpn +* `#17721 `__: MAINT: update \`meson-python\` upper bound to <0.13.0 +* `#17726 `__: BUG: interpolate/RGI: upcast float32 to float64 +* `#17735 `__: MAINT: stats.bootstrap: fix BCa with vector-valued statistics +* `#17743 `__: DOC: improve the docs on using BLAS/LAPACK libraries with Meson +* `#17777 `__: BLD: link to libatomic if necessary +* `#17783 `__: BUG: Correct intermediate overflow in KS one asymptotic in SciPy.stats +* `#17790 `__: BUG: signal: fix check_malloc extern declaration type +* `#17797 `__: MAINT: stats.pearsonr: correct p-value with negative correlation... +* `#17800 `__: [sparse.csgraph] Fix a bug in dijkstra and johnson algorithm +* `#17803 `__: MAINT: add missing \`__init__.py\` in test folder +* `#17806 `__: MAINT: stats.powerlaw.fit: fix overflow when np.min(data)==0 +* `#17810 `__: BLD: use Meson's found cython instead of a wrapper script +* `#17831 `__: MAINT, CI: GHA MacOS setup.py update +* `#17850 `__: MAINT: remove use of \`__code__\` in \`scipy.integrate\` +* `#17854 `__: TST: mark test for \`stats.kde.marginal\` as xslow +* `#17855 `__: BUG: Fix handling of \`powm1\` overflow errors +* `#17859 `__: TST: fix test failures on i386, s390x, ppc64, riscv64 (Debian) +* `#17862 `__: BLD: Meson \`__config__\` generation +* `#17863 `__: BUG: fix Johnson's algorithm +* `#17872 `__: BUG: fix powm1 overflow handling +* `#17904 `__: ENH: \`multivariate_normal_frozen\`: restore \`cov\` attribute +* `#17910 `__: CI: use nightly numpy musllinux_x86_64 wheel +* `#17931 `__: TST: test_location_scale proper 32bit Linux skip +* `#17932 `__: TST: 32-bit tol for test_pdist_jensenshannon_iris +* `#17936 `__: BUG: Use raw strings for paths in \`__config__.py.in\` +* `#17940 `__: BUG: \`rng_integers\` in \`_random_cd\` now samples on a closed... +* `#17942 `__: BLD: update classifiers for Python 3.11 +* `#17963 `__: MAINT: backports/prep for SciPy 1.10.1 +* `#17981 `__: BLD: make sure macosx_x86_64 10.9 tags are being made on maintenance/1.10.x +* `#17984 `__: DOC: update link of the logo in the readme +* `#17997 `__: BUG: at least one entry from trial should be used in exponential... diff --git a/libraries/scipy/releasenotes/1.8.1-notes.rst b/libraries/scipy/releasenotes/1.8.1-notes.rst new file mode 100644 index 0000000..d0f19dc --- /dev/null +++ b/libraries/scipy/releasenotes/1.8.1-notes.rst @@ -0,0 +1,77 @@ +========================== +SciPy 1.8.1 Release Notes +========================== + +.. contents:: + +SciPy 1.8.1 is a bug-fix release with no new features +compared to 1.8.0. Notably, usage of Pythran has been +restored for Windows builds/binaries. + +Authors +======= + +* Henry Schreiner +* Maximilian Nöthe +* Sebastian Berg (1) +* Sameer Deshmukh (1) + +* Niels Doucet (1) + +* DWesl (4) +* Isuru Fernando (1) +* Ralf Gommers (4) +* Matt Haberland (1) +* Andrew Nelson (1) +* Dimitri Papadopoulos Orfanos (1) + +* Tirth Patel (3) +* Tyler Reddy (46) +* Pamphile Roy (7) +* Niyas Sait (1) + +* H. Vetinari (2) +* Warren Weckesser (1) + +A total of 17 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + +Issues closed for 1.8.1 +----------------------- + +* `#15258 `__: BUG: sparse \`dot\` method should accept scalars +* `#15433 `__: BUG: optimize: minimize: \`ValueError\` when \`np.all(lb==ub)\` +* `#15539 `__: BUG: Questionable macOS wheel contents +* `#15543 `__: REL: list contributors using GitHub handles +* `#15552 `__: BUG: MacOS universal2 wheels have two gfortran shared libraries,... +* `#15636 `__: BUG: DOCS incorrect \`source\` link on docs +* `#15678 `__: BUG: scipy.stats.skew does not work with scipy.stats.bootstrap +* `#16174 `__: Failure of \`TestCorrelateComplex.test_rank0\` in CI with NumPy... + + +Pull requests for 1.8.1 +----------------------- + +* `#15167 `__: CI: make sure CI stays on VS2019 unless changed explicitly +* `#15306 `__: Revert "BLD Respect the --skip-build flag in setup.py" +* `#15504 `__: MAINT: np.all(lb == ub) for optimize.minimize +* `#15530 `__: REL: prep for SciPy 1.8.1 +* `#15531 `__: [BUG] Fix importing scipy.lib._pep440 +* `#15558 `__: CI: re-enable Pythran in Azure Windows CI jobs +* `#15566 `__: BUG: fix error message +* `#15580 `__: BUG: Avoid C Preprocessor symbol in _hypotests_pythran.py. +* `#15614 `__: REL: filter out @ in authors name and add count +* `#15637 `__: DOC, MAINT: fix links to wrapped functions and SciPy's distributions +* `#15669 `__: BUG: stats: fix a bug in UNU.RAN error handler +* `#15691 `__: MAINT: stats: bootstrap: fix bug with \`method="BCa"\` when \`statistic\`... +* `#15798 `__: MAINT,BUG: stats: update to UNU.RAN 1.9.0 +* `#15870 `__: TST: signal: Convert a test with 'assert_array_less' to 'less... +* `#15910 `__: make sure CI stays on VS2019 unless changed explicitly +* `#15926 `__: MAINT: 1.8.1 backports/prep +* `#16035 `__: BUG: allow scalar input to the \`.dot\` method of sparse matrices +* `#16041 `__: MAINT: add include dir explicitly for PROPACK to build with classic... +* `#16139 `__: WIP, BLD, MAINT: git security/version shim +* `#16152 `__: TST: Fortify invalid-value warning filters to small changes in... +* `#16155 `__: MAINT: correct wrong license of Biasedurn +* `#16158 `__: MAINT: better UNU.RAN licensing information +* `#16163 `__: MAINT: update UNU.RAN copyright information +* `#16172 `__: CI: pin Pip to 22.0.4 to avoid issues with \`--no-build-isolation\` +* `#16175 `__: TST: fix test failure due to changes in numpy scalar behavior. + diff --git a/libraries/scipy/releasenotes/1.9.0-notes.rst b/libraries/scipy/releasenotes/1.9.0-notes.rst new file mode 100644 index 0000000..f982efd --- /dev/null +++ b/libraries/scipy/releasenotes/1.9.0-notes.rst @@ -0,0 +1,1242 @@ +========================== +SciPy 1.9.0 Release Notes +========================== + +.. contents:: + +SciPy 1.9.0 is the culmination of 6 months of hard work. It contains +many new features, numerous bug-fixes, improved test coverage and better +documentation. There have been a number of deprecations and API changes +in this release, which are documented below. All users are encouraged to +upgrade to this release, as there are a large number of bug-fixes and +optimizations. Before upgrading, we recommend that users check that +their own code does not use deprecated SciPy functionality (to do so, +run your code with ``python -Wd`` and check for ``DeprecationWarning`` s). +Our development attention will now shift to bug-fix releases on the +1.9.x branch, and on adding new features on the main branch. + +This release requires Python 3.8-3.11 and NumPy 1.18.5 or greater. + +For running on PyPy, PyPy3 6.0+ is required. + + +************************** +Highlights of this release +************************** + +- We have modernized our build system to use ``meson``, substantially improving + our build performance, and providing better build-time configuration and + cross-compilation support, +- Added `scipy.optimize.milp`, new function for mixed-integer linear + programming, +- Added `scipy.stats.fit` for fitting discrete and continuous distributions + to data, +- Tensor-product spline interpolation modes were added to + `scipy.interpolate.RegularGridInterpolator`, +- A new global optimizer (DIviding RECTangles algorithm) + `scipy.optimize.direct`. + + +************ +New features +************ + + +`scipy.interpolate` improvements +================================ +- Speed up the ``RBFInterpolator`` evaluation with high dimensional + interpolants. +- Added new spline based interpolation methods for + `scipy.interpolate.RegularGridInterpolator` and its tutorial. +- `scipy.interpolate.RegularGridInterpolator` and `scipy.interpolate.interpn` + now accept descending ordered points. +- ``RegularGridInterpolator`` now handles length-1 grid axes. +- The ``BivariateSpline`` subclasses have a new method ``partial_derivative`` + which constructs a new spline object representing a derivative of an + original spline. This mirrors the corresponding functionality for univariate + splines, ``splder`` and ``BSpline.derivative``, and can substantially speed + up repeated evaluation of derivatives. + +`scipy.linalg` improvements +=========================== +- `scipy.linalg.expm` now accepts nD arrays. Its speed is also improved. +- Minimum required LAPACK version is bumped to ``3.7.1``. + + +`scipy.fft` improvements +======================== +- Added ``uarray`` multimethods for `scipy.fft.fht` and `scipy.fft.ifht` + to allow provision of third party backend implementations such as those + recently added to CuPy. + +`scipy.optimize` improvements +============================= +- A new global optimizer, `scipy.optimize.direct` (DIviding RECTangles algorithm) + was added. For problems with inexpensive function evaluations, like the ones + in the SciPy benchmark suite, ``direct`` is competitive with the best other + solvers in SciPy (``dual_annealing`` and ``differential_evolution``) in terms + of execution time. See + `gh-14300 `__ for more details. + +- Add a ``full_output`` parameter to `scipy.optimize.curve_fit` to output + additional solution information. +- Add a ``integrality`` parameter to `scipy.optimize.differential_evolution`, + enabling integer constraints on parameters. +- Add a ``vectorized`` parameter to call a vectorized objective function only + once per iteration. This can improve minimization speed by reducing + interpreter overhead from the multiple objective function calls. +- The default method of `scipy.optimize.linprog` is now ``'highs'``. +- Added `scipy.optimize.milp`, new function for mixed-integer linear + programming. +- Added Newton-TFQMR method to ``newton_krylov``. +- Added support for the ``Bounds`` class in ``shgo`` and ``dual_annealing`` for + a more uniform API across `scipy.optimize`. +- Added the ``vectorized`` keyword to ``differential_evolution``. +- ``approx_fprime`` now works with vector-valued functions. + +`scipy.signal` improvements +=========================== +- The new window function `scipy.signal.windows.kaiser_bessel_derived` was + added to compute the Kaiser-Bessel derived window. +- Single-precision ``hilbert`` operations are now faster as a result of more + consistent ``dtype`` handling. + +`scipy.sparse` improvements +=========================== +- Add a ``copy`` parameter to `scipy.sparce.csgraph.laplacian`. Using inplace + computation with ``copy=False`` reduces the memory footprint. +- Add a ``dtype`` parameter to `scipy.sparce.csgraph.laplacian` for type casting. +- Add a ``symmetrized`` parameter to `scipy.sparce.csgraph.laplacian` to produce + symmetric Laplacian for directed graphs. +- Add a ``form`` parameter to `scipy.sparce.csgraph.laplacian` taking one of the + three values: ``array``, or ``function``, or ``lo`` determining the format of + the output Laplacian: + * ``array`` is a numpy array (backward compatible default); + * ``function`` is a pointer to a lambda-function evaluating the + Laplacian-vector or Laplacian-matrix product; + * ``lo`` results in the format of the ``LinearOperator``. + +`scipy.sparse.linalg` improvements +================================== +- ``lobpcg`` performance improvements for small input cases. + +`scipy.spatial` improvements +============================ +- Add an ``order`` parameter to `scipy.spatial.transform.Rotation.from_quat` + and `scipy.spatial.transform.Rotation.as_quat` to specify quaternion format. + + +`scipy.stats` improvements +========================== +- `scipy.stats.monte_carlo_test` performs one-sample Monte Carlo hypothesis + tests to assess whether a sample was drawn from a given distribution. Besides + reproducing the results of hypothesis tests like `scipy.stats.ks_1samp`, + `scipy.stats.normaltest`, and `scipy.stats.cramervonmises` without small sample + size limitations, it makes it possible to perform similar tests using arbitrary + statistics and distributions. + +- Several `scipy.stats` functions support new ``axis`` (integer or tuple of + integers) and ``nan_policy`` ('raise', 'omit', or 'propagate'), and + ``keepdims`` arguments. + These functions also support masked arrays as inputs, even if they do not have + a `scipy.stats.mstats` counterpart. Edge cases for multidimensional arrays, + such as when axis-slices have no unmasked elements or entire inputs are of + size zero, are handled consistently. + +- Add a ``weights`` parameter to `scipy.stats.hmean`. + +- Several improvements have been made to `scipy.stats.levy_stable`. Substantial + improvement has been made for numerical evaluation of the pdf and cdf, + resolving [#12658](https://github.com/scipy/scipy/issues/12658) and + [#14944](https://github.com/scipy/scipy/issues/14994). The improvement is + particularly dramatic for stability parameter ``alpha`` close to or equal to 1 + and for ``alpha`` below but approaching its maximum value of 2. The alternative + fast Fourier transform based method for pdf calculation has also been updated + to use the approach of Wang and Zhang from their 2008 conference paper + *Simpson’s rule based FFT method to compute densities of stable distribution*, + making this method more competitive with the default method. In addition, + users now have the option to change the parametrization of the Levy Stable + distribution to Nolan's "S0" parametrization which is used internally by + SciPy's pdf and cdf implementations. The "S0" parametrization is described in + Nolan's paper [*Numerical calculation of stable densities and distribution + functions*](https://doi.org/10.1080/15326349708807450) upon which SciPy's + implementation is based. "S0" has the advantage that ``delta`` and ``gamma`` + are proper location and scale parameters. With ``delta`` and ``gamma`` fixed, + the location and scale of the resulting distribution remain unchanged as + ``alpha`` and ``beta`` change. This is not the case for the default "S1" + parametrization. Finally, more options have been exposed to allow users to + trade off between runtime and accuracy for both the default and FFT methods of + pdf and cdf calculation. More information can be found in the documentation + here (to be linked). + +- Added `scipy.stats.fit` for fitting discrete and continuous distributions to + data. + +- The methods ``"pearson"`` and ``"tippet"`` from `scipy.stats.combine_pvalues` + have been fixed to return the correct p-values, resolving + [#15373](https://github.com/scipy/scipy/issues/15373). In addition, the + documentation for `scipy.stats.combine_pvalues` has been expanded and improved. + +- Unlike other reduction functions, ``stats.mode`` didn't consume the axis + being operated on and failed for negative axis inputs. Both the bugs have been + fixed. Note that ``stats.mode`` will now consume the input axis and return an + ndarray with the ``axis`` dimension removed. + +- Replaced implementation of `scipy.stats.ncf` with the implementation from + Boost for improved reliability. + +- Add a `bits` parameter to `scipy.stats.qmc.Sobol`. It allows to use from 0 + to 64 bits to compute the sequence. Default is ``None`` which corresponds to + 30 for backward compatibility. Using a higher value allow to sample more + points. Note: ``bits`` does not affect the output dtype. + +- Add a `integers` method to `scipy.stats.qmc.QMCEngine`. It allows sampling + integers using any QMC sampler. + +- Improved the fit speed and accuracy of ``stats.pareto``. + +- Added ``qrvs`` method to ``NumericalInversePolynomial`` to match the + situation for ``NumericalInverseHermite``. + +- Faster random variate generation for ``gennorm`` and ``nakagami``. + +- ``lloyd_centroidal_voronoi_tessellation`` has been added to allow improved + sample distributions via iterative application of Voronoi diagrams and + centering operations + +- Add `scipy.stats.qmc.PoissonDisk` to sample using the Poisson disk sampling + method. It guarantees that samples are separated from each other by a + given ``radius``. + +- Add `scipy.stats.pmean` to calculate the weighted power mean also called + generalized mean. + + +******************* +Deprecated features +******************* + +- Due to collision with the shape parameter ``n`` of several distributions, + use of the distribution ``moment`` method with keyword argument ``n`` is + deprecated. Keyword ``n`` is replaced with keyword ``order``. +- Similarly, use of the distribution ``interval`` method with keyword arguments + ``alpha`` is deprecated. Keyword ``alpha`` is replaced with keyword + ``confidence``. +- The ``'simplex'``, ``'revised simplex'``, and ``'interior-point'`` methods + of `scipy.optimize.linprog` are deprecated. Methods ``highs``, ``highs-ds``, + or ``highs-ipm`` should be used in new code. +- Support for non-numeric arrays has been deprecated from ``stats.mode``. + ``pandas.DataFrame.mode`` can be used instead. +- The function `spatial.distance.kulsinski` has been deprecated in favor + of `spatial.distance.kulczynski1`. +- The ``maxiter`` keyword of the truncated Newton (TNC) algorithm has been + deprecated in favour of ``maxfun``. +- The ``vertices`` keyword of ``Delauney.qhull`` now raises a + DeprecationWarning, after having been deprecated in documentation only + for a long time. +- The ``extradoc`` keyword of ``rv_continuous``, ``rv_discrete`` and + ``rv_sample`` now raises a DeprecationWarning, after having been deprecated in + documentation only for a long time. + +******************** +Expired Deprecations +******************** +There is an ongoing effort to follow through on long-standing deprecations. +The following previously deprecated features are affected: + +- Object arrays in sparse matrices now raise an error. +- Inexact indices into sparse matrices now raise an error. +- Passing ``radius=None`` to `scipy.spatial.SphericalVoronoi` now raises an + error (not adding ``radius`` defaults to 1, as before). +- Several BSpline methods now raise an error if inputs have ``ndim > 1``. +- The ``_rvs`` method of statistical distributions now requires a ``size`` + parameter. +- Passing a ``fillvalue`` that cannot be cast to the output type in + `scipy.signal.convolve2d` now raises an error. +- `scipy.spatial.distance` now enforces that the input vectors are + one-dimensional. +- Removed ``stats.itemfreq``. +- Removed ``stats.median_absolute_deviation``. +- Removed ``n_jobs`` keyword argument and use of ``k=None`` from + ``kdtree.query``. +- Removed ``right`` keyword from ``interpolate.PPoly.extend``. +- Removed ``debug`` keyword from ``scipy.linalg.solve_*``. +- Removed class ``_ppform`` ``scipy.interpolate``. +- Removed BSR methods ``matvec`` and ``matmat``. +- Removed ``mlab`` truncation mode from ``cluster.dendrogram``. +- Removed ``cluster.vq.py_vq2``. +- Removed keyword arguments ``ftol`` and ``xtol`` from + ``optimize.minimize(method='Nelder-Mead')``. +- Removed ``signal.windows.hanning``. +- Removed LAPACK ``gegv`` functions from ``linalg``; this raises the minimally + required LAPACK version to 3.7.1. +- Removed ``spatial.distance.matching``. +- Removed the alias ``scipy.random`` for ``numpy.random``. +- Removed docstring related functions from ``scipy.misc`` (``docformat``, + ``inherit_docstring_from``, ``extend_notes_in_docstring``, + ``replace_notes_in_docstring``, ``indentcount_lines``, ``filldoc``, + ``unindent_dict``, ``unindent_string``). +- Removed ``linalg.pinv2``. + +****************************** +Backwards incompatible changes +****************************** + +- Several `scipy.stats` functions now convert ``np.matrix`` to ``np.ndarray``s + before the calculation is performed. In this case, the output will be a scalar + or ``np.ndarray`` of appropriate shape rather than a 2D ``np.matrix``. + Similarly, while masked elements of masked arrays are still ignored, the + output will be a scalar or ``np.ndarray`` rather than a masked array with + ``mask=False``. +- The default method of `scipy.optimize.linprog` is now ``'highs'``, not + ``'interior-point'`` (which is now deprecated), so callback functions and + some options are no longer supported with the default method. With the + default method, the ``x`` attribute of the returned ``OptimizeResult`` is + now ``None`` (instead of a non-optimal array) when an optimal solution + cannot be found (e.g. infeasible problem). +- For `scipy.stats.combine_pvalues`, the sign of the test statistic returned + for the method ``"pearson"`` has been flipped so that higher values of the + statistic now correspond to lower p-values, making the statistic more + consistent with those of the other methods and with the majority of the + literature. +- `scipy.linalg.expm` due to historical reasons was using the sparse + implementation and thus was accepting sparse arrays. Now it only works with + nDarrays. For sparse usage, `scipy.sparse.linalg.expm` needs to be used + explicitly. +- The definition of `scipy.stats.circvar` has reverted to the one that is + standard in the literature; note that this is not the same as the square of + `scipy.stats.circstd`. +- Remove inheritance to `QMCEngine` in `MultinomialQMC` and + `MultivariateNormalQMC`. It removes the methods `fast_forward` and `reset`. +- Init of `MultinomialQMC` now require the number of trials with `n_trials`. + Hence, `MultinomialQMC.random` output has now the correct shape ``(n, pvals)``. +- Several function-specific warnings (``F_onewayConstantInputWarning``, + ``F_onewayBadInputSizesWarning``, ``PearsonRConstantInputWarning``, + ``PearsonRNearConstantInputWarning``, ``SpearmanRConstantInputWarning``, and + ``BootstrapDegenerateDistributionWarning``) have been replaced with more + general warnings. + + +************* +Other changes +************* + +- A draft developer CLI is available for SciPy, leveraging the ``doit``, + ``click`` and ``rich-click`` tools. For more details, see + [gh-15959](https://github.com/scipy/scipy/pull/15959). + +- The SciPy contributor guide has been reorganized and updated + (see [#15947](https://github.com/scipy/scipy/pull/15947) for details). + +- QUADPACK Fortran routines in `scipy.integrate`, which power + `scipy.integrate.quad`, have been marked as `recursive`. This should fix rare + issues in multivariate integration (`nquad` and friends) and obviate the need + for compiler-specific compile flags (`/recursive` for ifort etc). Please file + an issue if this change turns out problematic for you. This is also true for + ``FITPACK`` routines in `scipy.interpolate`, which power ``splrep``, + ``splev`` etc., and ``*UnivariateSpline`` and ``*BivariateSpline`` classes. + +- the ``USE_PROPACK`` environment variable has been renamed to + ``SCIPY_USE_PROPACK``; setting to a non-zero value will enable + the usage of the ``PROPACK`` library as before + +- Building SciPy on windows with MSVC now requires at least the vc142 + toolset (available in Visual Studio 2019 and higher). + +Lazy access to subpackages +========================== + +Before this release, all subpackages of SciPy (`cluster`, `fft`, `ndimage`, +etc.) had to be explicitly imported. Now, these subpackages are lazily loaded +as soon as they are accessed, so that the following is possible (if desired +for interactive use, it's not actually recommended for code, +see :ref:`scipy-api`): +``import scipy as sp; sp.fft.dct([1, 2, 3])``. Advantages include: making it +easier to navigate SciPy in interactive terminals, reducing subpackage import +conflicts (which before required +``import networkx.linalg as nla; import scipy.linalg as sla``), +and avoiding repeatedly having to update imports during teaching & +experimentation. Also see +[the related community specification document](https://scientific-python.org/specs/spec-0001/). + +SciPy switched to Meson as its build system +=========================================== + +This is the first release that ships with [Meson](https://mesonbuild.com) as +the build system. When installing with ``pip`` or ``pypa/build``, Meson will be +used (invoked via the ``meson-python`` build hook). This change brings +significant benefits - most importantly much faster build times, but also +better support for cross-compilation and cleaner build logs. + +.. note:: + + This release still ships with support for ``numpy.distutils``-based builds + as well. Those can be invoked through the ``setup.py`` command-line + interface (e.g., ``python setup.py install``). It is planned to remove + ``numpy.distutils`` support before the 1.10.0 release. + +When building from source, a number of things have changed compared to building +with ``numpy.distutils``: + +- New build dependencies: ``meson``, ``ninja``, and ``pkg-config``. + ``setuptools`` and ``wheel`` are no longer needed. +- BLAS and LAPACK libraries that are supported haven't changed, however the + discovery mechanism has: that is now using ``pkg-config`` instead of hardcoded + paths or a ``site.cfg`` file. +- The build defaults to using OpenBLAS. See :ref:`blas-lapack-selection` for + details. + +The two CLIs that can be used to build wheels are ``pip`` and ``build``. In +addition, the SciPy repo contains a ``python dev.py`` CLI for any kind of +development task (see its ``--help`` for details). For a comparison between old +(``distutils``) and new (``meson``) build commands, see :ref:`meson-faq`. + +For more information on the introduction of Meson support in SciPy, see +`gh-13615 `__ and +`this blog post `__. + + +******* +Authors +******* + +* endolith (12) +* h-vetinari (11) +* Caio Agiani (2) + +* Emmy Albert (1) + +* Joseph Albert (1) +* Tania Allard (3) +* Carsten Allefeld (1) + +* Kartik Anand (1) + +* Virgile Andreani (2) + +* Weh Andreas (1) + +* Francesco Andreuzzi (5) + +* Kian-Meng Ang (2) + +* Gerrit Ansmann (1) +* Ar-Kareem (1) + +* Shehan Atukorala (1) + +* avishai231 (1) + +* Blair Azzopardi (1) +* Sayantika Banik (2) + +* Ross Barnowski (9) +* Christoph Baumgarten (3) +* Nickolai Belakovski (1) +* Peter Bell (9) +* Sebastian Berg (3) +* Bharath (1) + +* bobcatCA (2) + +* boussoffara (2) + +* Islem BOUZENIA (1) + +* Jake Bowhay (41) + +* Matthew Brett (11) +* Dietrich Brunn (2) + +* Michael Burkhart (2) + +* Evgeni Burovski (96) +* Matthias Bussonnier (20) +* Dominic C (1) +* Cameron (1) + +* CJ Carey (3) +* Thomas A Caswell (2) +* Ali Cetin (2) + +* Hood Chatham (5) + +* Klesk Chonkin (1) +* Craig Citro (1) + +* Dan Cogswell (1) + +* Luigi Cruz (1) + +* Anirudh Dagar (5) +* Brandon David (1) +* deepakdinesh1123 (1) + +* Denton DeLoss (1) + +* derbuihan (2) + +* Sameer Deshmukh (13) + +* Niels Doucet (1) + +* DWesl (8) +* eytanadler (30) + +* Thomas J. Fan (5) +* Isuru Fernando (3) +* Joseph Fox-Rabinovitz (1) +* Ryan Gibson (4) + +* Ralf Gommers (327) +* Srinivas Gorur-Shandilya (1) + +* Alex Griffing (2) +* Matt Haberland (461) +* Tristan Hearn (1) + +* Jonathan Helgert (1) + +* Samuel Hinton (1) + +* Jake (1) + +* Stewart Jamieson (1) + +* Jan-Hendrik Müller (1) +* Yikun Jiang (1) + +* JuliaMelle01 (1) + +* jyuv (12) + +* Toshiki Kataoka (1) +* Chris Keefe (1) + +* Robert Kern (4) +* Andrew Knyazev (11) +* Matthias Koeppe (4) + +* Sergey Koposov (1) +* Volodymyr Kozachynskyi (1) + +* Yotaro Kubo (2) + +* Jacob Lapenna (1) + +* Peter Mahler Larsen (8) +* Eric Larson (4) +* Laurynas Mikšys (1) + +* Antony Lee (1) +* Gregory R. Lee (2) +* lerichi (1) + +* Tim Leslie (2) +* P. L. Lim (1) +* Smit Lunagariya (43) +* lutefiskhotdish (1) + +* Cong Ma (12) +* Syrtis Major (1) +* Nicholas McKibben (18) +* Melissa Weber Mendonça (10) +* Mark Mikofski (1) +* Jarrod Millman (13) +* Harsh Mishra (6) +* ML-Nielsen (3) + +* Matthew Murray (1) + +* Andrew Nelson (50) +* Dimitri Papadopoulos Orfanos (1) + +* Evgueni Ovtchinnikov (2) + +* Sambit Panda (1) +* Nick Papior (2) +* Tirth Patel (43) +* Petar Mlinarić (1) +* petroselo (1) + +* Ilhan Polat (64) +* Anthony Polloreno (1) +* Amit Portnoy (1) + +* Quentin Barthélemy (9) +* Patrick N. Raanes (1) + +* Tyler Reddy (185) +* Pamphile Roy (199) +* Vivek Roy (2) + +* sabonerune (1) + +* Niyas Sait (2) + +* Atsushi Sakai (25) +* Mazen Sayed (1) + +* Eduardo Schettino (5) + +* Daniel Schmitz (6) + +* Eli Schwartz (4) + +* SELEE (2) + +* Namami Shanker (4) +* siddhantwahal (1) + +* Gagandeep Singh (8) +* Soph (1) + +* Shivnaren Srinivasan (1) + +* Scott Staniewicz (1) + +* Leo C. Stein (4) +* Albert Steppi (7) +* Christopher Strickland (1) + +* Kai Striega (4) +* Søren Fuglede Jørgensen (1) +* Aleksandr Tagilov (1) + +* Masayuki Takagi (1) + +* Sai Teja (1) + +* Ewout ter Hoeven (2) + +* Will Tirone (2) +* Bas van Beek (7) +* Dhruv Vats (1) +* Arthur Volant (1) +* Samuel Wallan (5) +* Stefan van der Walt (8) +* Warren Weckesser (84) +* Anreas Weh (1) +* Nils Werner (1) +* Aviv Yaish (1) + +* Dowon Yi (1) +* Rory Yorke (1) +* Yosshi999 (1) + +* yuanx749 (2) + +* Gang Zhao (23) +* ZhihuiChen0903 (1) +* Pavel Zun (1) + +* David Zwicker (1) + + +A total of 154 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + + +*********************** +Issues closed for 1.9.0 +*********************** + +* `#1884 `__: stats distributions fit problems (Trac #1359) +* `#2047 `__: derivatives() method is missing in BivariateSpline (Trac #1522) +* `#2071 `__: TST: stats: \`check_sample_var\` should be two-sided (Trac #1546) +* `#2414 `__: stats binom at non-integer n (Trac #1895) +* `#2623 `__: stats.distributions statistical power of test suite +* `#2625 `__: wilcoxon() function does not return z-statistic +* `#2650 `__: (2D) Interpolation functions should work with complex numbers +* `#2834 `__: ksone fitting +* `#2868 `__: nan and stats.percentileofscore +* `#2877 `__: distributions.ncf numerical issues +* `#2993 `__: optimize.approx_fprime & jacobians +* `#3214 `__: stats distributions ppf-cdf roundtrip +* `#3758 `__: discrete distribution defined by \`values\` with non-integer... +* `#4130 `__: BUG: stats: fisher_exact returns incorrect p-value +* `#4897 `__: expm is 10x as slow as matlab according to http://stackoverflow.com/questions/30048315 +* `#5103 `__: Docs suggest scipy.sparse.linalg.expm_multiply supports LinearOperator... +* `#5266 `__: Deprecated routines in Netlib LAPACK >3.5.0 +* `#5890 `__: Undefined behavior when using scipy.interpolate.RegularGridInterpolator... +* `#5982 `__: Keyword collision in scipy.stats.levy_stable.interval +* `#6472 `__: scipy.stats.invwishart does not check if scale matrix is symmetric +* `#6551 `__: BUG: stats: inconsistency in docs and behavior of gmean and hmean +* `#6624 `__: incorrect handling of nan by RegularGridInterpolator +* `#6882 `__: Certain recursive scipy.integrate.quad (e.g. dblquad and nquad)... +* `#7469 `__: Misleading interp2d documentation +* `#7560 `__: Should RegularGridInterpolator support length 1 dimensions? +* `#8850 `__: Scipy.interpolate.griddata Error : Exception ignored in: 'scipy.spatial.qhull._Qhull.__dealloc__' +* `#8928 `__: BUG: scipy.stats.norm wrong expected value of function when loc... +* `#9213 `__: __STDC_VERSION__ check in C++ code +* `#9231 `__: infinite loop in stats.fisher_exact +* `#9313 `__: geometric distribution stats.geom returns negative values if... +* `#9524 `__: interpn returns nan with perfectly valid data +* `#9591 `__: scipy.interpolate.interp1d with kind=“previous” doesn't extrapolate... +* `#9815 `__: stats.mode's nan_policy 'propagate' not working? +* `#9944 `__: documentation for \`scipy.interpolate.RectBivariateSpline\` is... +* `#9999 `__: BUG: malloc() calls in Cython and C that are not checked for... +* `#10096 `__: Add literature reference for circstd (and circvar?) +* `#10446 `__: RuntimeWarning: invalid value encountered in stats.genextreme +* `#10577 `__: Additional discussion for scipy.stats roadmap +* `#10821 `__: Errors with the Yeo-Johnson Transform that also Appear in Scikit-Learn +* `#10983 `__: LOBPCG inefficinet when computing > 20% of eigenvalues +* `#11145 `__: unexpected SparseEfficiencyWarning at scipy.sparse.linalg.splu +* `#11406 `__: scipy.sparse.linalg.svds (v1.4.1) on singular matrix does not... +* `#11447 `__: scipy.interpolate.interpn: Handle ValueError('The points in dimension... +* `#11673 `__: intlinprog: integer linear program solver +* `#11742 `__: MAINT: stats: getting skewness alone takes 34000x longer than... +* `#11806 `__: Unexpectedly poor results when distribution fitting with \`weibull_min\`... +* `#11828 `__: UnivariateSpline gives varying results when multithreaded on... +* `#11948 `__: fitting discrete distributions +* `#12073 `__: Add note in documentation +* `#12370 `__: truncnorm.rvs is painfully slow on version 1.5.0rc2 +* `#12456 `__: Add generalized mean calculation +* `#12480 `__: RectBivariateSpline derivative evaluator is slow +* `#12485 `__: linprog returns an incorrect message +* `#12506 `__: ENH: stats: one-sided p-values for statistical tests +* `#12545 `__: stats.pareto.fit raises RuntimeWarning +* `#12548 `__: scipy.stats.skew returning MaskedArray +* `#12633 `__: Offer simpler development workflow? +* `#12658 `__: scipy.stats.levy_stable.pdf can be inaccurate and return nan +* `#12733 `__: scipy.stats.truncnorm.cdf slow +* `#12838 `__: Accept multiple matrices in \`scipy.linalg.expm\` +* `#12848 `__: DOC: stats: multivariate distribution documentation issues +* `#12870 `__: Levy Stable Random Variates Code has a typo +* `#12871 `__: Levy Stable distribution uses parameterisation that is not location... +* `#13200 `__: Errors made by scipy.optimize.linprog +* `#13462 `__: Too many warnings and results objects in public API for scipy.stats +* `#13582 `__: ENH: stats: \`rv_continuous.stats\` with array shapes: use \`_stats\`... +* `#13615 `__: RFC: switch to Meson as a build system +* `#13632 `__: stats.rv_discrete is not checking that xk values are integers +* `#13655 `__: MAINT: stats.rv_generic: \`moment\` method falls back to \`_munp\`... +* `#13689 `__: Wilcoxon does not appropriately detect ties when mode=exact. +* `#13835 `__: Change name of \`alpha\` parameter in \`interval()\` method +* `#13872 `__: Add method details or reference to \`scipy.integrate.dblquad\` +* `#13912 `__: Adding Poisson Disc sampling to QMC +* `#13996 `__: Fisk distribution documentation typo +* `#14035 `__: \`roots_jacobi\` support for large parameter values +* `#14081 `__: \`scipy.optimize._linprog_simplex._apply_pivot\` relies on asymmetric... +* `#14095 `__: scipy.stats.norm.pdf takes too much time and memory +* `#14162 `__: Thread safety RectBivariateSpline +* `#14267 `__: BUG: online doc returns 404 - wrong \`reference\` in url +* `#14313 `__: ks_2samp: example description does not match example output +* `#14418 `__: \`ttest_ind\` for two sampled distributions with the same single... +* `#14455 `__: Adds Mixed Integer Linear Programming from highs +* `#14462 `__: Shapiro test returning negative p-value +* `#14471 `__: methods 'revised simplex' and 'interior-point' are extremely... +* `#14505 `__: \`Optimization converged to parameters that are outside the range\`... +* `#14527 `__: Segmentation fault with KDTree +* `#14548 `__: Add convention flag to quanternion in \`Scipy.spatial.transform.rotation.Rotation\` +* `#14565 `__: optimize.minimize: Presence of callback causes method TNC to... +* `#14622 `__: BUG: (sort of) mannwhitneyu hits max recursion limit with imbalanced... +* `#14645 `__: ENH: MemoryError when trying to bootstrap with large amounts... +* `#14716 `__: BUG: stats: The \`loguniform\` distribution is overparametrized. +* `#14731 `__: BUG: Incorrect residual graph in scipy.sparse.csgraph.maximum_flow +* `#14745 `__: BUG: scipy.ndimage.convolve documentation is incorrect +* `#14750 `__: ENH: Add one more derivative-free optimization method +* `#14753 `__: Offer to collaborate on truncated normal estimation by minimax... +* `#14777 `__: BUG: Wrong limit and no warning in stats.t for df=np.inf +* `#14793 `__: BUG: Missing pairs in cKDTree.query_pairs when coordinates contain... +* `#14861 `__: BUG: unclear error message when all bounds are all equal for... +* `#14889 `__: BUG: NumPy's \`random\` module should not be in the \`scipy\`... +* `#14914 `__: CI job with code coverage is failing (yet again) +* `#14926 `__: RegularGridInterpolator should be called RectilinearGridInterpolator +* `#14986 `__: Prevent new Python versions from trying to install older releases... +* `#14994 `__: BUG: Levy stable +* `#15009 `__: BUG: scipy.stats.multiscale_graphcorr p-values are computed differently... +* `#15059 `__: BUG: documentation inconsistent with code for find_peaks_cwt +* `#15082 `__: DOC: Sampling from the truncated normal +* `#15110 `__: BUG: truncnorm.cdf returns incorrect values at tail +* `#15125 `__: Deprecate \`scipy.spatial.distance.kulsinski\` +* `#15133 `__: BUG: Log_norm description is incorrect and produces incorrect... +* `#15150 `__: BUG: RBFInterpolator is much slower than Rbf for vector data +* `#15172 `__: BUG: special: High relative error in \`log_ndtr\` +* `#15195 `__: BUGS: stats: Tracking issue for distributions that warn and/or... +* `#15199 `__: BUG: Error occured \`spsolve_triangular\` +* `#15210 `__: BUG: A sparse matrix raises a ValueError when \`__rmul__\` with... +* `#15245 `__: MAINT: scipy.stats._levy_stable should be treated as subpackage... +* `#15252 `__: DOC: Multivariate normal CDF docstring typo +* `#15296 `__: BUG: SciPy 1.7.x build failure on Cygwin +* `#15308 `__: BUG: OpenBLAS 0.3.18 support +* `#15338 `__: DOC: Rename \`\*args\` param in \`f_oneway\` to \`\*samples\` +* `#15345 `__: BUG: boschloo_exact gives pvalue > 1 (and sometimes nan) +* `#15368 `__: build warnings for \`unuran_wrapper.pyx\` +* `#15373 `__: BUG: Tippett’s and Pearson’s method for combine_pvalues are not... +* `#15415 `__: \`integrate.quad_vec\` missing documentation for \`limit\` parameter +* `#15456 `__: Segfault in HiGHS code when building with Mingw-w64 on Windows +* `#15458 `__: DOC: Documentation inaccuracy of scipy.interpolate.bisplev +* `#15488 `__: ENH: missing examples for scipy.optimize in docs +* `#15507 `__: BUG: scipy.optimize.linprog: the algorithm determines the problem... +* `#15508 `__: BUG: Incorrect error message in multivariate_normal +* `#15541 `__: BUG: scipy.stats.powerlaw, why should x ∈ (0,1)? x can exceed... +* `#15551 `__: MAINT: stats: deprecating non-numeric array support in \`stats.mode\` +* `#15568 `__: BENCH/CI: Benchmark timeout +* `#15572 `__: BUG: \`scipy.spatial.transform.rotation\`, wrong deprecation... +* `#15575 `__: BUG: Tests failing for initial build [arm64 machine] +* `#15589 `__: BUG: scipy.special.factorialk docstring inconsistent with behaviour +* `#15601 `__: BUG: Scalefactors for \`signal.csd\` with \`average=='median'\`... +* `#15617 `__: ENH: stats: all multivariate distributions should be freezable +* `#15631 `__: BUG: stats.fit: intermittent failure in doctest +* `#15635 `__: CI:ASK: Remove LaTeX doc builds? +* `#15638 `__: DEV: \`dev.py\` missing PYTHONPATH when building doc +* `#15644 `__: DOC: stats.ks_1samp: incorrect commentary in examples +* `#15666 `__: CI: CircleCI build_docs failure on main +* `#15670 `__: BUG: AssertionError in test__dual_annealing.py in test_bounds_class +* `#15689 `__: BUG: default value of shape parameter in fit method of rv_continuous... +* `#15692 `__: CI: scipy.scipy (Main refguide_asv_check) failure in main +* `#15696 `__: DOC: False information in docs - scipy.stats.ttest_1samp +* `#15700 `__: BUG: AssertionError in test_propack.py +* `#15730 `__: BUG: "terminate called after throwing an instance of 'std::out_of_range'"... +* `#15732 `__: DEP: execute deprecation of inexact indices into sparse matrices +* `#15734 `__: DEP: deal with deprecation of ndim >1 in bspline +* `#15735 `__: DEP: add actual DeprecationWarning for sym_pos-keyword of scipy.linalg.solve +* `#15736 `__: DEP: Remove \`debug\` keyword from \`scipy.linalg.solve_\*\` +* `#15737 `__: DEP: Execute deprecation of pinv2 +* `#15739 `__: DEP: sharpen deprecation for >1-dim inputs in optimize.minimize +* `#15740 `__: DEP: Execute deprecation for squeezing input vectors in spatial.distance +* `#15741 `__: DEP: remove spatial.distance.matching +* `#15742 `__: DEP: raise if fillvalue cannot be cast to output type in \`signal.convolve2d\` +* `#15743 `__: DEP: enforce radius for \`spatial.SphericalVoronoi\` +* `#15744 `__: DEP: sharpen deprecation of dual_annealing argument 'local_search_options' +* `#15745 `__: DEP: remove signal.windows.hanning +* `#15746 `__: DEP: remove k=None from KDTree.query +* `#15747 `__: DEP: stats: remove support for \`_rvs\` without \`size\` parameter +* `#15750 `__: DEP: remove \`n_jobs\` from kdtree +* `#15751 `__: DEP: remove ftol/xtol from neldermead +* `#15752 `__: DEP: remove right keyword from interpolate.PPoly.extend +* `#15753 `__: DEP: remove \`_ppform\` +* `#15754 `__: DEP: Remove mlab mode from dendrogram +* `#15757 `__: DEP: docstring-related deprecations +* `#15758 `__: DEP: remove LAPACK \*gegv functions +* `#15759 `__: DEP: remove old BSR methods +* `#15760 `__: DEP: remove py_vq2 +* `#15761 `__: DEP: remove stats.itemfreq +* `#15762 `__: DEP: remove stats.median_absolute_deviation +* `#15773 `__: BUG: iirfilter allows Wn[1] < Wn[0] for band-pass and band-stop... +* `#15780 `__: BUG: CI on Azure broken with PyTest 7.1 +* `#15843 `__: BUG: scipy.stats.brunnermunzel incorrectly returns nan for undocumented... +* `#15854 `__: CI: Windows Meson job failing sometimes on OpenBLAS binary download +* `#15866 `__: BUG/CI: Wrong python version used for tests labeled "Linux Tests... +* `#15871 `__: BUG: stats: Test failure of \`TestTruncnorm.test_moments\` on... +* `#15899 `__: BUG: _calc_uniform_order_statistic_medians documentation example... +* `#15927 `__: BUG: Inconsistent handling of INF and NAN in signal.convolve +* `#15931 `__: BUG: scipy/io/arff/tests/test_arffread.py::TestNoData::test_nodata... +* `#15960 `__: BUG: Documentation Error in scipy.signal.lfilter +* `#15961 `__: BUG: scipy.stats.beta and bernoulli fails with float32 inputs +* `#15962 `__: Race condition in macOS Meson build between \`_matfuncs_expm\`... +* `#15987 `__: CI: \`np.matrix\` deprecation warning +* `#16007 `__: BUG: Confusing documentation in \`ttest_ind_from_stats\` +* `#16011 `__: BUG: typo in documentation for scipy.optimize.basinhopping +* `#16020 `__: BUG: dev.py FileNotFoundError +* `#16027 `__: jc should be (n-1)/2 +* `#16031 `__: BUG: scipy.sparse.linalg.norm does not work on sparse arrays +* `#16036 `__: Missing \`f\` prefix on f-strings +* `#16054 `__: Bug: Meson build with dev.py fails to detect SciPy with debian... +* `#16065 `__: BUG: Gitpod build with \`python runtests.py\` fails; move to... +* `#16074 `__: BUG: refguide check fails with \`numpydoc==1.3\` +* `#16081 `__: CI, MAINT: minor refguide failure with stats.describe +* `#16121 `__: DOC: scipy.interpolate.RegularGridInterpolator and interpn works... +* `#16162 `__: BUG: curve_fit gives wrong results with Pandas float32 +* `#16171 `__: BUG: scipy.stats.multivariate_hypergeom.rvs raises ValueError... +* `#16219 `__: \`TestSobol.test_0dim\` failure on 32-bit Linux job +* `#16233 `__: BUG: Memory leak in function \`sf_error\` due to new reference... +* `#16254 `__: DEP: add deprecation warning to \`maxiter\` kwarg in \`_minimize_tnc\` +* `#16292 `__: BUG: compilation error: no matching constructor for initialization... +* `#16300 `__: BLD: pip install build issue with meson in Ubuntu virtualenv +* `#16337 `__: TST: stats/tests/test_axis_nan_policy.py::test_axis_nan_policy_full... +* `#16347 `__: TST, MAINT: 32-bit Linux test failures in wheels repo +* `#16358 `__: TST, MAINT: test_theilslopes_warnings fails on 32-bit Windows +* `#16378 `__: DOC: pydata-sphinx-theme v0.9 defaults to darkmode depending... +* `#16381 `__: BUG: bootstrap get ValueError for paired statistic +* `#16382 `__: BUG: truncnorm.fit does not fit correctly +* `#16403 `__: MAINT: NumPy main will require a few updates due to new floating... +* `#16409 `__: BUG: SIGSEGV in qhull when array type is wrong +* `#16418 `__: BUG: breaking change: scipy.stats.mode returned value has changed... +* `#16419 `__: BUG: scipy.stats.nbinom.logcdf returns wrong results when some... +* `#16426 `__: BUG: stats.shapiro inplace modification of user array +* `#16446 `__: BUG: Issue with stripping on macOS Monterey + xcode 13.2 +* `#16465 `__: BLD: new sdist has some metadata issues +* `#16466 `__: BUG: linprog failure - OptimizeResult.x returns NoneType +* `#16495 `__: HiGHS does not compile on windows (on conda-forge infra) +* `#16523 `__: BUG: test failure in pre-release job: \`TestFactorized.test_singular_with_umfpack\` +* `#16540 `__: BLD: meson 0.63.0 and new CI testing failures on Linux +* `#16555 `__: Building 1.9.x branch from source requires fix in meson-python... +* `#16609 `__: BUG: \`scipy.optimize.linprog\` reports optimal for trivially... +* `#16681 `__: BUG: linprog integrality only accepts list, not array +* `#16718 `__: BUG: memoryview error with Cython 0.29.31 + +*********************** +Pull requests for 1.9.0 +*********************** + +* `#9523 `__: ENH: improvements to the Stable distribution +* `#11829 `__: Fixes safe handling of small singular values in svds. +* `#13490 `__: DEV: stats: check for distribution/method keyword name collisions +* `#13572 `__: ENH: n-D and nan_policy support for scipy.stats.percentileofscore +* `#13918 `__: ENH: Poisson Disk sampling for QMC +* `#13955 `__: DOC: SciPy extensions for code style and docstring guidelines. +* `#14003 `__: DOC: clarify the definition of the pdf of \`stats.fisk\` +* `#14036 `__: ENH: fix numerical issues in roots_jacobi and related special... +* `#14087 `__: DOC: explain null hypotheses in ttest functions +* `#14142 `__: DOC: Add better error message for unpacking issue +* `#14143 `__: Support LinearOperator in expm_multiply +* `#14300 `__: ENH: Adding DIRECT algorithm to \`\`scipy.optimize\`\` +* `#14576 `__: ENH: stats: add one-sample Monte Carlo hypothesis test +* `#14642 `__: ENH: add Lloyd's algorithm to \`scipy.spatial\` to improve a... +* `#14718 `__: DOC: stats: adjust bootstrap doc to emphasize that batch controls... +* `#14781 `__: BUG: stats: handle infinite \`df\` in \`t\` distribution +* `#14847 `__: ENH: BLD: enable building SciPy with Meson +* `#14877 `__: DOC: ndimage convolve origin documentation (#14745) +* `#15001 `__: ENH: sparse.linalg: More comprehensive tests (Not only for 1-D... +* `#15026 `__: ENH: allow approx_fprime to work with vector-valued func +* `#15079 `__: ENH:linalg: expm overhaul and ndarray processing +* `#15140 `__: ENH: Make \`stats.kappa3\` work with array inputs +* `#15154 `__: DOC: a small bug in docstring example of \`lobpcg\` +* `#15165 `__: MAINT: Avoid using del to remove numpy symbols in scipy.__init__.py +* `#15168 `__: REL: set version to 1.9.0.dev0 +* `#15169 `__: DOC: fix formatting of Methods in multivariate distributions +* `#15171 `__: \`AttrDict\` raises \`AttributeError\` on missing attributes,... +* `#15176 `__: BUG: special: Clean up some private namespaces and fix \`special.__all__\` +* `#15182 `__: MAINT: fix typos principle -> principal +* `#15184 `__: MAINT: CI: Rename 'Nightly CPython' job to 'NumPy main' +* `#15187 `__: BUG: special: Fix numerical precision issue of log_ndtr +* `#15188 `__: MAINT: sparse.linalg: Using more concise and user-friendly f-string... +* `#15190 `__: MAINT: interpolate: speed up the RBFInterpolator evaluation with... +* `#15196 `__: BUG: stats: Fix handling of support endpoints in two distributions. +* `#15197 `__: MAINT: build dependency updates +* `#15202 `__: MAINT: special: Don't use macro for 'extern "C"' in strictly... +* `#15205 `__: BUG: stats: Fix spurious warnings generated by several distributions. +* `#15207 `__: MAINT: sparse.linalg: Using the interface with the trace of sparse... +* `#15219 `__: DOC: Corrected docstring of ndimage.sum_labels +* `#15223 `__: DOC: x0->x for finite_diff_rel_step docstring closes #15208 +* `#15230 `__: ENH: expose submodules via \`__getattr__\` to allow lazy access +* `#15234 `__: TST: stats: mark very slow tests as \`xslow\` +* `#15235 `__: BUG: Fix rmul dispatch of spmatrix +* `#15243 `__: DOC: stats: add reference for gstd +* `#15244 `__: Added example for morphology: binary_dilation and erosion +* `#15250 `__: ENH: Make \`stats.kappa4\` work with array +* `#15251 `__: [MRG] ENH: Update \`laplacian\` function introducing the new... +* `#15255 `__: MAINT: Remove \`distutils\` usage in \`runtests.py\` to fix deprecation... +* `#15259 `__: MAINT: optimize, special, signal: Use custom warnings instead... +* `#15261 `__: DOC: Add inline comment in Hausdorff distance calculation +* `#15265 `__: DOC: update .mailmap +* `#15266 `__: CI: remove coverage usage from Windows jobs +* `#15269 `__: BLD: add setup.py for \`stats/_levy_stable\` +* `#15272 `__: BUG: Fix owens_t function when a tends to infinity +* `#15274 `__: DOC: fix docstring in _cdf() function of _multivariate.py +* `#15284 `__: TST: silence RuntimeWarning from \`np.det\` in \`signal.place_poles\`... +* `#15285 `__: CI: simplify 32-bit Linux testing +* `#15286 `__: MAINT: Highs submodule CI issue - use shallow cloning +* `#15289 `__: DOC: Misc numpydoc formatting. +* `#15291 `__: DOC: some more docstring/numpydoc formatting. +* `#15294 `__: ENH: add integrality constraints for linprog +* `#15300 `__: DOC: Misc manual docs updates. +* `#15302 `__: DOC: More docstring reformatting. +* `#15304 `__: CI: fix Gitpod build by adding HiGHS submodule checkout +* `#15305 `__: BLD: update NumPy to >=1.18.5, setuptools to <60.0 +* `#15309 `__: CI: update OpenBLAS to 0.3.18 in Azure jobs +* `#15310 `__: ENH: signal: Add Kaiser-Bessel derived window function +* `#15312 `__: BUG: special: Fix loss of precision in pseudo_huber when r/delta... +* `#15314 `__: MAINT: changed needed after renaming \`master\` branch to \`main\` +* `#15315 `__: MAINT: account for NumPy master -> main renaming +* `#15325 `__: CI: reshuffle two Windows Azure CI jobs, and don't run 'full'... +* `#15330 `__: ENH: optimize: support undocumented option \`full_output\` for... +* `#15336 `__: DOC: update detailed roadmap +* `#15344 `__: MAINT:stats: Renamed \`\*args\` param to \`\*samples\` +* `#15347 `__: ENH: stats: add weights in harmonic mean +* `#15352 `__: BLD: put upper bound \`setuptools<60.0\` in conda environment... +* `#15357 `__: ENH: interpolate: add new methods for RegularGridInterpolator. +* `#15360 `__: MAINT: speed up rvs of nakagami in scipy.stats +* `#15361 `__: MAINT: sparse.linalg: Remove unnecessary operations +* `#15366 `__: Make signal functions respect input dtype. +* `#15370 `__: DOC: governance members moved to scipy.org +* `#15371 `__: MAINT: stats: fix unuran compile-time warnings +* `#15378 `__: MAINT: remove version pinning on gmpy2 +* `#15380 `__: ENH/MAINT: Version switcher from the sphinx theme +* `#15385 `__: DOC: fix typo +* `#15387 `__: MAINT: Fix a couple build warnings. +* `#15388 `__: DOC: interpolate: improve \`RectBivariateSpline\` doc +* `#15391 `__: ENH: graph Laplacian as LinearOperator, add dtype and symmetrized... +* `#15392 `__: ENH: integrality constraints for differential_evolution +* `#15394 `__: ENH: optimize: improvements to \`LinearConstraint\` class +* `#15396 `__: DOC: Git:// protocol on github pending removal. +* `#15399 `__: ENH: stats: add \`axis\` tuple and \`nan_policy\` to \`hmean\` +* `#15400 `__: MAINT: sparse.linalg: Move the test function of GMRES to the... +* `#15401 `__: MAINT: DOC: analytics from analytics.scientific-python +* `#15402 `__: DOC: update pip_quickstart (submodules) +* `#15406 `__: MAINT: use \`Rotation.Random\` instead of manual generation +* `#15407 `__: BLD: meson: split pyx->c and Python extension build +* `#15408 `__: MAINT: check for negative weights in \`Rotation.align_vectors\` +* `#15410 `__: ENH: add \`order\` parameter to specify quaternion format +* `#15413 `__: ENH: stats: add \`rvs\` method for \`gennorm\` +* `#15424 `__: ENH: bypass LinearOperator in lobpcg for small-size cases +* `#15427 `__: MAINT: Manage imports in \`sparse.linalg\` +* `#15431 `__: Revert "ENH: add \`order\` parameter to specify quaternion format" +* `#15436 `__: ENH: stats: fit: function for fitting discrete and continuous... +* `#15439 `__: ENH: differential_evolution vectorized kwd +* `#15440 `__: MAINT: Try to detect scipy path in \`runtests.py\` while not... +* `#15442 `__: MAINT: Fix meson build warnings on windows +* `#15443 `__: DOC, BUG: Fix error in heading remapping for custom \`scipy.optimize:function\` domain directive +* `#15445 `__: ENH: stats: add \`nnlf\` method for discrete distributions +* `#15451 `__: BLD: further refinement of Cython dependencies +* `#15452 `__: BUG/DOC/TST: combine_pvalues: fix Tippett and Pearson +* `#15453 `__: ENH: Make dual_annealing work with Bounds class +* `#15454 `__: BLD: remove dependency on libnpymath from \`spatial._distance_wrap\` +* `#15455 `__: ENH: Support Bounds class in shgo +* `#15459 `__: DOC: documents parameter \`limit\` for function \`integrate.quad_vec\`. +* `#15460 `__: ENH: optimize: milp: mixed integer linear programming +* `#15462 `__: CI: switch one macOS CI job from distutils to meson +* `#15464 `__: ENH: Performance improvements for \`linear_sum_assignment\` +* `#15465 `__: DOC: stats: add weights in formulas and examples for gmean and... +* `#15466 `__: MAINT: fix compile errors with CPython 3.11 +* `#15469 `__: MAINT: Remove \`distutils\` usage +* `#15470 `__: ENH: \`stats.qmc\`: faster hypercube point comparison and scrambling... +* `#15472 `__: ENH: stats: add \`axis\` tuple and \`nan_policy\` to \`skew\` +* `#15485 `__: BLD: updates to Meson build files for more correct linking and... +* `#15487 `__: MAINT: typo in bsplines.py +* `#15496 `__: DOC: signal: fixed parameter 'order' for butter bandpass +* `#15497 `__: MAINT: update vendored uarray +* `#15499 `__: CI: remove matplotlib from 32-bit linux job, it fails to build +* `#15501 `__: MAINT: Remove unused variable warnings +* `#15502 `__: DEV: meson: allow specifying build directory and install prefix +* `#15512 `__: MAINT: optimize.linprog: make HiGHS default and deprecate old... +* `#15523 `__: DOC: fixed the link for fluiddyn's transonic vision in dev/roadmap.html. +* `#15526 `__: MAINT: add qrvs method to NumericalInversePolynomial in scipy.stats +* `#15529 `__: DOC: forward port 1.8.0 relnotes +* `#15532 `__: TST: parametrize test_ldl_type_size_combinations +* `#15546 `__: DOC: missing section for metrics +* `#15555 `__: MAINT: make unuran clone shallow +* `#15557 `__: DOC: fixes inaccuracy in bisplev documentation +* `#15559 `__: BENCH: selection of linalg solvers to facilitate expansion +* `#15560 `__: DOC: types and return values for Bessel Functions +* `#15561 `__: MAINT: update HiGHS submodule to include fix for Windows segfault +* `#15563 `__: CI: add a Windows CI job on GitHub Actions using Meson +* `#15564 `__: DOC: stray backticks +* `#15565 `__: DOC: incorrect underline lenght in section. +* `#15567 `__: ENH: stats.pareto fit improvement for parameter combinations +* `#15569 `__: DOC: pip quickstart: setup.py -> meson +* `#15570 `__: MAINT: bump test tolerance in test_linprog +* `#15571 `__: DOC: Wrong underline length +* `#15578 `__: Make Windows Python setup more standard +* `#15581 `__: MAINT: clarify deprecation warning spatial.transform.rotation +* `#15583 `__: DOC: clarify O(N) SO(N) in random rotations +* `#15586 `__: ENH: stats: Add 'alternative' and confidence interval to pearsonr +* `#15590 `__: DOC: factorialk docstring inconsistent with code +* `#15597 `__: DOC: update \`hyp2f1\` docstring example based on doctest +* `#15598 `__: BUG/ENH: \`lsq_linear\`: fixed incorrect \`lsmr_tol\` in first... +* `#15603 `__: BENCH: optimize: milp: add MILP benchmarks +* `#15606 `__: MAINT: allow multiplication sign \`×\` +* `#15611 `__: BUG:signal: Fix median bias in csd(..., average="median") +* `#15616 `__: CI: pin asv to avoid slowdowns in 0.5/0.5.1 +* `#15619 `__: DOC: stats: update interval and moment method signatures +* `#15625 `__: MAINT: Clean up \`type: ignore\` comments related to third-party... +* `#15626 `__: TST, MAINT: ignore np distutils dep +* `#15629 `__: MAINT: stats: fix \`trim1\` \`axis\` behavior +* `#15632 `__: ENH: stats.wilcoxon: return z-statistic (as requested) +* `#15634 `__: CI: Improve concurrency to cancel running jobs on PR update +* `#15645 `__: DOC: Add code example to the documentation of \`sparse.linalg.cg\`. +* `#15646 `__: DOC: stats.ks_1samp: correct examples +* `#15647 `__: ENH: add variable bits to \`stats.qmc.Sobol\` +* `#15648 `__: DOC: Add examples to documentation for \`scipy.special.ellipr{c,d,f,g,j}\` +* `#15649 `__: DEV/DOC: remove latex/pdf documentation +* `#15651 `__: DOC: stats.ks_2samp/stats.kstest: correct examples +* `#15652 `__: DOC: stats.circstd: add reference, notes, comments +* `#15655 `__: REL: fix small issue in pavement.py for release note writing +* `#15656 `__: DOC: Fix example for subset_by_index in eigh doc +* `#15661 `__: DOC: Additional examples for optimize user guide +* `#15662 `__: DOC: stats.fit: fix intermittent failure in doctest +* `#15663 `__: DOC: stats.burr12: fix typo +* `#15664 `__: BENCH: Add benchmarks for special.factorial/factorial2/factorialk +* `#15673 `__: DOC: fix intersphinx links +* `#15682 `__: MAINT: sparse.linalg: Clear up unnecessary modules imported in... +* `#15684 `__: DOC: add formula and documentation improvements for scipy.special.chndtr... +* `#15690 `__: ENH: add uarray multimethods for fast Hankel transforms +* `#15694 `__: MAINT,CI: signal: fix failing refguide check +* `#15699 `__: DOC: stats.ttest_1samp: update example +* `#15701 `__: BUG: Fix dual_annealing bounds test +* `#15703 `__: BUG: fix test fail in test_propack.py (loosen atol) +* `#15710 `__: MAINT: sparse.linalg: \`bnorm\` only calculate once +* `#15712 `__: ENH: \`scipy.stats.qmc.Sobol\`: allow 32 or 64 bit computation +* `#15715 `__: ENH: stats: add _axis_nan_policy_factory to moment +* `#15718 `__: ENH: Migration of \`write_release_and_log\` into standalone script +* `#15723 `__: TST: stats: make \`check_sample_var\` two-sided +* `#15724 `__: TST: stats: simplify \`check_sample_mean\` +* `#15725 `__: DEV: Try to detect scipy from dev installed path +* `#15728 `__: ENH: changed vague exception messages to a more descriptive ones... +* `#15729 `__: ENH: stats: add weighted power mean +* `#15763 `__: ENH: stats: replace ncf with Boost non_central_f distribution +* `#15766 `__: BUG: improve exceptions for private attributes in refactored... +* `#15768 `__: [DOC] fix typo in cython optimize help example +* `#15769 `__: MAINT: stats: check integrality in \`_argcheck\` as needed +* `#15771 `__: MAINT: stats: resolve discrete rvs dtype platform dependency +* `#15774 `__: MAINT: stats: remove deprecated \`median_absolute_deviation\` +* `#15775 `__: DOC: stats.lognorm: rephrase note about parameterization +* `#15776 `__: DOC: stats.powerlaw: more explicit explanation of support +* `#15777 `__: MAINT: stats.shapiro: subtract median from shapiro input +* `#15778 `__: MAINT: stats: more specific error type from \`rv_continuous.fit\` +* `#15779 `__: CI: don't run meson tests on forks and remove skip flags +* `#15782 `__: DEPR: remove k=None in KDTree.query +* `#15783 `__: CI:Pin pytest version to 7.0.1 on Azure +* `#15785 `__: MAINT: stats: remove deprecated itemfreq +* `#15786 `__: DOC: Add examples of integrals to integrate.quadpack +* `#15788 `__: DOC: update macOS and Linux contributor docs to use Python 3.9 +* `#15789 `__: DOC, MAINT: Remove numpydoc submodule +* `#15791 `__: MAINT: add ShapeInfo to continuous distributions in scipy.stats +* `#15795 `__: DEP: remove n_jobs from cKDTree +* `#15797 `__: scipy/_lib/boost: Update to d8626c9d2d937abf6a38a844522714ad72e63281 +* `#15799 `__: DEP: add warning for documented-as-deprecated extradoc +* `#15802 `__: DOC: Import Error in examples +* `#15803 `__: DOC: error in TransferFunctionDiscrete example +* `#15804 `__: DEP: sharpen warning message on >1-dim for optimize.minimize +* `#15805 `__: DEP: specify version to remove dual_annealing argument 'local_search_options' +* `#15809 `__: DOC,MAINT: remove \`quad_explain\` that has become irrelevant. +* `#15810 `__: DOC: stats.mood: validity only when observations are unique +* `#15811 `__: DOC: fix evaluate_all_bspl example. +* `#15812 `__: DOC: Couple of single to double backticks +* `#15813 `__: DOC: information about skip on CircleCI +* `#15817 `__: MAINT: stats.fisher_exact: improve docs and fix bugs +* `#15819 `__: DEP: docstring-related deprecations (#15757) +* `#15821 `__: DEP: add actual DeprecationWarning for sym_pos-keyword of scipy.linalg.solve +* `#15822 `__: DEP: remove \`right\` from interpolate.PPoly.extend +* `#15823 `__: DOC: Interpolative tutorial - wrong matrix fill var +* `#15824 `__: BUG: Handle base case for scipy.integrate.simpson when span along... +* `#15825 `__: TST: stats: xfail_on_32bit studentized_range moment test +* `#15827 `__: DOC: change docs that specify the SNR ratio definition for find_peaks_cwt(). +* `#15828 `__: DEP: raise value error for object arrays +* `#15830 `__: MAINT: stats: collocate bootstrap/permutation_test/monte_carlo_test +* `#15831 `__: MAINT: stats.rv_generic: fix unnecessary call to \`_munp\` in... +* `#15835 `__: FIX: Incorect boschloo pvalue +* `#15837 `__: DOC: Simplify conda command +* `#15840 `__: DOC: special: Add 'Examples' for wrightomega. +* `#15842 `__: DOC: Add examples for \`CGS\`, \`GCROTMK\` and \`BiCGSTAB\` iterative... +* `#15846 `__: DOC: Add efficiency condition for CSC sparse matrix and remove... +* `#15847 `__: BUG: adds warning to scipy.stats.brunnermunzel +* `#15848 `__: DOC: fix interp2d docs showing wrong Z array ordering. +* `#15850 `__: MAINT: sparse.linalg: Missing tfqmr in the re-entrancy test +* `#15853 `__: DEP: remove the keyword debug from linalg.solve +* `#15855 `__: ENH: stats.rv_continuous.expect: split interval to improve reliability +* `#15867 `__: CI: fix python version matrix in linux workflow +* `#15868 `__: CI: fix Azure workflows +* `#15872 `__: DEP: remove mlab from dendrogram +* `#15874 `__: DEP: remove py_vq2 +* `#15875 `__: DEP: remove old BSR methods +* `#15876 `__: DEP: remove _ppform +* `#15881 `__: DEP: remove signal.windows.hanning +* `#15882 `__: DEP: enforced radius in spherical voronoi +* `#15885 `__: DOC: stats: clarify truncnorm shape parameter definition +* `#15886 `__: BUG: check that iirfilter argument Wn satisfies Wn[0] < Wn[1] +* `#15887 `__: DEP: remove ftol/xtol from neldermead +* `#15894 `__: [BUG] make p-values consistent with the literature +* `#15895 `__: CI: remove pin on Jinja2 +* `#15898 `__: DOC: stats: correct documentation of \`wilcoxon\`'s behavior... +* `#15900 `__: DOC: fix import in example in _morestats +* `#15905 `__: MAINT: stats._moment: warn when catastrophic cancellation occurs +* `#15909 `__: DEP: deal with deprecation of ndim >1 in bspline +* `#15911 `__: MAINT: stats: fix \`gibrat\` name +* `#15914 `__: MAINT: special: Clean up C style in ndtr.c +* `#15916 `__: MAINT: stats: adjust tolerance of failing TestTruncnorm +* `#15917 `__: MAINT: stats: remove support for \`_rvs\` without \`size\` parameter +* `#15920 `__: ENH: stats.mannwhitneyu: add iterative implementation +* `#15923 `__: MAINT: stats: attempt to consolidate warnings and errors +* `#15932 `__: MAINT: stats: fix and thoroughly test \`rv_sample\` at non-integer... +* `#15933 `__: TST: test_nodata respect endianness +* `#15938 `__: DOC: sparse.linalg: add citations for COLAMD +* `#15939 `__: Update _dual_annealing.py +* `#15945 `__: BUG/ENH: \`MultinomialQMC.random\` shape to (n, pvals) +* `#15946 `__: DEP: remove inheritance to \`QMCEngine\` in \`MultinomialQMC\`... +* `#15947 `__: DOC: Revamp contributor setup guides +* `#15953 `__: DOC: Add meson docs to use gcc, clang build in parallel and optimization... +* `#15955 `__: BUG Fix signature of D_IIR_forback(1,2) +* `#15959 `__: ENH: Developer CLI for SciPy +* `#15965 `__: MAINT: stats: ensure that \`rv_continuous._fitstart\` shapes... +* `#15968 `__: BUG: Fix debug and coverage arguments with dev.py +* `#15970 `__: BLD: specify \`cython_lapack\` dependency for \`matfuncs_expm\` +* `#15973 `__: DOC: Add formula renderings to integrate.nquad. +* `#15981 `__: ENH: optimize: Add Newton-TFQMR method and some tests for Newton-Krylov +* `#15982 `__: BENCH: stats: Distribution memory and CDF/PPF round trip benchmarks +* `#15983 `__: TST: sparse.linalg: Add tests for the parameter \`show\` +* `#15991 `__: TST: fix for np.kron matrix issue. +* `#15992 `__: DOC: Fixed \`degrees\` parameter in return section +* `#15997 `__: MAINT: integrate: add \`recursive\` to QUADPACK Fortran sources +* `#15998 `__: BUG: Fix yeojohnson when transformed data has zero variance +* `#15999 `__: MAINT: Adds doit.db.db to gitignore +* `#16004 `__: MAINT: rename MaximumFlowResult.residual to flow +* `#16005 `__: DOC: sparse.linalg: Fixed the description of input matrix of... +* `#16010 `__: MAINT: Add a check to verify all \`.pyi\` files are installed... +* `#16012 `__: DOC: Fix broken link and add python headers to contributing guide +* `#16015 `__: DEP: bump version for deprecating residual to flow. +* `#16018 `__: Doc: fix arch linux building from source local dependencies instructions +* `#16019 `__: DOC: fix conda env name in quickstart guide [skip ci] +* `#16021 `__: DOC: typos in basinhopping documentation +* `#16024 `__: CI: unpin pytest and pytest-xdist +* `#16026 `__: BUG: Allow \`spsolve_triangular\` to work with matrices with... +* `#16029 `__: BUG: Fix meson-info file errors and add more informative exception +* `#16030 `__: MAINT: stats: more accurate error message for \`multivariate_normal\` +* `#16032 `__: FIX: show warning when passing NAN into input of convolve method +* `#16037 `__: MAINT: fix missing \`f\` prefix on f-strings +* `#16042 `__: MAINT: stats.dirichlet: fix interface inconsistency +* `#16044 `__: DEV: do.py, adoption of pkg pydevtool (removed non SciPy specific... +* `#16045 `__: ENH: Use circleci-artifacts-redirector-action +* `#16051 `__: MAINT: Miscellaneous small changes to filter_design +* `#16053 `__: Mark fitpack sources as \`recursive\` +* `#16055 `__: MAINT: stats: replace \`np.var\` with \`_moment(..., 2)\` to... +* `#16058 `__: DEV: Fix meson debian python build +* `#16060 `__: MAINT: Allow all Latin-1 Unicode letters in the source code. +* `#16062 `__: DOC: Document QUADPACK routines used in \`\*quad\` +* `#16067 `__: DEP: remove spatial.distance.matching +* `#16070 `__: ENH: interpolate: handle length-1 grid axes in RegularGridInterpolator +* `#16073 `__: DOC: expand RegularGridInterpolator docstring +* `#16075 `__: CI: Fix refguidecheck failures; unpin Sphinx +* `#16077 `__: BUG: interpolate: RGI(nan) is nan +* `#16078 `__: DEV,BLD: Use Meson in Gitpod builds +* `#16082 `__: BUG: refguide-check: allow multiline namedtuples +* `#16083 `__: DOC: fixing a sign issue in FFTlog function documentation +* `#16092 `__: ENH: interpolate: Add functionality to accept descending points... +* `#16095 `__: MAINT: Remove old filtered warnings +* `#16100 `__: MAINT: Fix a couple compiler warnings. +* `#16104 `__: DOC: stats: symmetry not checked for (inv)wishart distributions +* `#16111 `__: BUG: Fix norm for sparse arrays +* `#16115 `__: MAINT: merge \`environment.yml\` and \`environment_meson.yml\` +* `#16117 `__: MAINT: stats.wilcoxon: return \`zstatistic\` only when \`method='approx'\` +* `#16118 `__: Download openblas binary from GH repo +* `#16122 `__: CI: Speed up ci build that keeps timing out +* `#16125 `__: DOC: interpolate: fix typos "the the" -> "the" +* `#16126 `__: DOC: interpolate: details rectilinear grids in docstrings +* `#16128 `__: BUG: interpolate: fix extrapolation behaviors of \`previous\`... +* `#16130 `__: Increase time to timeout on azure +* `#16134 `__: BUG: signal: Fix calculation of extended image indices in convolve2d. +* `#16135 `__: MAINT: sparse.linalg: A minor improvement with zero initial guess +* `#16137 `__: Clean up fitpack smoke tests +* `#16138 `__: TST: interpolate: mark rbf chunking tests as slow +* `#16141 `__: DOC: Plot poles as x and zeros as o in signal +* `#16144 `__: DEP: Execute deprecation for squeezing input vectors in spatial.distance +* `#16145 `__: ENH: Fix signal.iircomb w0 bugs, add support for both frequency... +* `#16150 `__: Add typing info for Rotation.concatenate +* `#16165 `__: BUG: fix extension module initialization, needs use of \`PyMODINIT_FUNC\` +* `#16166 `__: MAINT:linalg: Expose Cython functions for generic use +* `#16167 `__: ENH: Tweak theilslopes and siegelslopes to return a tuple_bunch +* `#16168 `__: BUG: special: Fix the test 'test_d' that is run when SCIPY_XSLOW... +* `#16173 `__: Adds note to the curve_fit() docstring to use float64. +* `#16176 `__: MAINT: remove questionable uses of \`Py_FatalError\` in module... +* `#16177 `__: MAINT: Cleanup unused code in meson-files +* `#16180 `__: DEV: do.py build. On setup checks if intro-buildoptions.json... +* `#16181 `__: BUG: stats: fix multivariate_hypergeom.rvs method +* `#16183 `__: ENH: Simplify return names in stats.theil/siegelslopes (and fix... +* `#16184 `__: DEP: raise if fillvalue cannot be cast to output type in signal.convolve2d +* `#16185 `__: BUG: stats: Fix handling of float32 inputs for the boost-based... +* `#16187 `__: BLD: default to Meson in pyproject.toml +* `#16194 `__: BLD: add a build option to force use of the g77 ABI with Meson +* `#16198 `__: DEP: sharpen deprecation in NumericalInverseHermite +* `#16206 `__: CI: Test NumPy main branch also with Python 3.11 +* `#16220 `__: Create a new spline from a partial derivative of a bivariate... +* `#16223 `__: MAINT: interpolate: move RGI to a separate file +* `#16228 `__: TST: interpolate: move test_spalde_scalar to other fitpack tests +* `#16229 `__: REL: DOC: fix documentation URLs +* `#16230 `__: BUG: fix extension module initialization, needs use of PyMODINIT_FUNC,... +* `#16239 `__: MAINT: tools: Add more output to a refguide-check error message. +* `#16241 `__: DOC: stats: update roadmap +* `#16242 `__: BUG: Make KDTree more robust against nans. +* `#16245 `__: DEP: Execute deprecation of pinv2 +* `#16247 `__: DOC:linalg: Remove references to removed pinv2 function +* `#16248 `__: DOC: prep 1.9.0 release notes +* `#16249 `__: Refguide check verbosity abs names +* `#16257 `__: DEP: Deprecation follow-ups +* `#16259 `__: Revert "CI: pin Pip to 22.0.4 to avoid issues with \`--no-build-isolation\`" +* `#16261 `__: DEP: add deprecation warning to maxiter kwarg in _minimize_tnc +* `#16264 `__: DOC: update the RegularGridInterpolator docstring +* `#16265 `__: DEP: deprecate spatial.distance.kulsinski +* `#16267 `__: DOC: broken donation link on GitHub +* `#16273 `__: DOC: remove deprecated functions from refguide +* `#16276 `__: MAINT: sparse.linalg: Update some docstrings. +* `#16279 `__: MAINT: stats: override \`loguniform.fit\` to resolve overparameterization +* `#16282 `__: BUG: special: DECREF scipy_special object before exiting sf_error(). +* `#16283 `__: Corrections To Docs +* `#16287 `__: BLD: sync pyproject.toml changes from oldest-supported-numpy +* `#16289 `__: MAINT: stats: remove function-specific warning messages +* `#16290 `__: BLD: fix issue with \`python setup.py install\` and \`_directmodule\` +* `#16295 `__: MAINT: move \`import_array\` before module creation in module... +* `#16296 `__: DOC: REL: fix \`make dist\` issue with missing dependencies +* `#16303 `__: MAINT: revert addition of multivariate_beta +* `#16304 `__: MAINT: add a more informative error message for broken installs +* `#16309 `__: BLD: CI: fix issue in wheel metadata, and add basic "build in... +* `#16316 `__: REL: update version switcher for 1.8.1 +* `#16321 `__: DOC: fix incorrect formatting of deprecation tags +* `#16326 `__: REL: update version switcher for 1.9 +* `#16329 `__: MAINT: git security shim for 1.9.x +* `#16339 `__: MAINT, TST: bump tol for _axis_nan_policy_test +* `#16341 `__: BLD: update Pythran requirement to 0.11.0, to support Clang >=13 +* `#16353 `__: MAINT: version bounds 1.9.0rc1 +* `#16360 `__: MAINT, TST: sup warning for theilslopes +* `#16361 `__: MAINT: SCIPY_USE_PROPACK +* `#16370 `__: MAINT: update Boost submodule to include Cygwin fix +* `#16374 `__: MAINT: update pydata-sphinx-theme +* `#16379 `__: DOC: dark theme css adjustments +* `#16390 `__: TST, MAINT: adjust 32-bit xfails for HiGHS +* `#16393 `__: MAINT: use correct type for element wise comparison +* `#16414 `__: BUG: spatial: Handle integer arrays in HalfspaceIntersection. +* `#16420 `__: MAINT: next round of 1.9.0 backports +* `#16422 `__: TST: fix test issues with casting-related warnings with numpy... +* `#16427 `__: MAINT: stats.shapiro: don't modify input in place +* `#16429 `__: MAINT: stats.mode: revert gh-15423 +* `#16436 `__: DOC: optimize: Mark deprecated linprog methods explicitly +* `#16444 `__: BUG: fix fail to open tempfile in messagestream.pyx (#8850) +* `#16451 `__: MAINT: few more 1.9.0 backports +* `#16453 `__: DOC: Copy-edit 1.9.0-notes.rst +* `#16457 `__: TST: skip 32-bit test_pdist_correlation_iris_nonC +* `#16458 `__: MAINT: 1.9.0 backports +* `#16473 `__: REL: update 1.9.0 release notes +* `#16482 `__: DOC: Update Returns section of optimize.linprog. +* `#16484 `__: MAINT: remove raw html from README.rst +* `#16485 `__: BLD: fix warnings from f2py templating parsing +* `#16493 `__: BLD: clean up unwanted files in sdist, via \`.gitattributes\` +* `#16507 `__: REL: more tweaks to sdist contents +* `#16512 `__: [1.9] MAINT: skip complex128 propack tests on windows +* `#16514 `__: DOC: reflect correctly where windows wheels are built +* `#16526 `__: MAINT: 1.9.0rc2 backports +* `#16530 `__: MAINT: fix umfpack test failure with numpy 1.23 +* `#16539 `__: MAINT: more 1.9.0rc2 backports +* `#16541 `__: BLD: fix regression in building _lsap with symbol visibility +* `#16549 `__: BLD: fix an outdated requirement for macOS arm64 in pyproject.toml +* `#16551 `__: BLD: fix \`__STDC_VERSION__\` check in \`special/_round.h\` +* `#16553 `__: BLD: raise an error with clear message for too-new Python version +* `#16556 `__: DOC: small tweaks to 1.9.0 release notes +* `#16563 `__: DOC: Reflect MSVC minimum toolchain requirement +* `#16570 `__: MAINT: backports before 1.9.0rc3 +* `#16572 `__: MAINT: update bundled licenses for removal of scipy-sphinx-theme +* `#16581 `__: MAINT: stats: fix skew/kurtosis empty 1d input +* `#16586 `__: MAINT: stats.truncnorm: improve CDF accuracy/speed +* `#16593 `__: TST: stats: replace TestTruncnorm::test_moments +* `#16599 `__: MAINT: stats.truncnorm.rvs: improve performance +* `#16605 `__: MAINT: stats.truncnorm: simplify remaining methods +* `#16622 `__: ENH: FIX: update HiGHS submodule to resolve MIP infeasibility... +* `#16638 `__: DOC: update docs on building with Meson +* `#16664 `__: MAINT: stats._axis_nan_policy: preserve dtype of masked arrays... +* `#16671 `__: BLD: update \`meson\` and \`meson-python\` versions for 1.9.0... +* `#16684 `__: MAINT: optimize.linprog: ensure integrality can be an array +* `#16688 `__: DOC: a few mailmap updates +* `#16719 `__: MAINT: stats: Work around Cython bug. +* `#16721 `__: MAINT: stats.monte_carlo_test: used biased estimate of p-value diff --git a/libraries/scipy/releasenotes/1.9.1-notes.rst b/libraries/scipy/releasenotes/1.9.1-notes.rst new file mode 100644 index 0000000..3cea88e --- /dev/null +++ b/libraries/scipy/releasenotes/1.9.1-notes.rst @@ -0,0 +1,53 @@ +========================== +SciPy 1.9.1 Release Notes +========================== + +.. contents:: + +SciPy 1.9.1 is a bug-fix release with no new features +compared to 1.9.0. Notably, some important meson build +fixes are included. + +Authors +======= + +* Anirudh Dagar (1) +* Ralf Gommers (12) +* Matt Haberland (2) +* Andrew Nelson (1) +* Tyler Reddy (14) +* Atsushi Sakai (1) +* Eli Schwartz (1) +* Warren Weckesser (2) + +A total of 8 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + +Issues closed for 1.9.1 +----------------------- + +* `#14517 `__: scipy/linalg/tests/test_decomp.py::TestSchur::test_sort test... +* `#16765 `__: DOC: \`scipy.stats.skew\` no longer returns 0 on constant input +* `#16787 `__: BUG: Can't build 1.10 with mingw-w64 toolchain and numpy 1.21.6... +* `#16813 `__: BUG: scipy.interpolate interp1d extrapolate behaviour change... +* `#16878 `__: BUG: optimize.milp fails to execute when given exactly 3 constraints + + +Pull requests for 1.9.1 +----------------------- + +* `#16709 `__: BLD: make the way we count commits for version numbering more... +* `#16736 `__: REL: prep for SciPy 1.9.1 +* `#16749 `__: BLD: install missing \`.pxd\` files, and update TODOs/FIXMEs... +* `#16750 `__: BLD: make OpenBLAS detection work with CMake +* `#16755 `__: TST: sparse.linalg: Loosen tolerance for the lobpcg test 'test_tolerance_float32' +* `#16760 `__: BLD: use a bit more idiomatic approach to constructing paths... +* `#16768 `__: DOC: stats.skew/kurtosis: returns NaN when input has only one... +* `#16794 `__: BLD/REL: on Windows use numpy 1.22.3 as the version to build... +* `#16822 `__: BUG/TST: linalg: Check the results of 'schur' more carefully. +* `#16825 `__: BUG: interpolate: fix "previous" and "next" extrapolate logic... +* `#16862 `__: BUG, DOC: Fix sphinx autosummary generation for \`odr\` and \`czt\` +* `#16881 `__: MAINT: optimize.milp: fix input validation when three constraints... +* `#16901 `__: MAINT: 1.9.1 backports +* `#16904 `__: BLD: update dependency ranges for meson-python and pybind11 for... diff --git a/libraries/scipy/releasenotes/1.9.2-notes.rst b/libraries/scipy/releasenotes/1.9.2-notes.rst new file mode 100644 index 0000000..f5a0cb4 --- /dev/null +++ b/libraries/scipy/releasenotes/1.9.2-notes.rst @@ -0,0 +1,71 @@ +========================== +SciPy 1.9.2 Release Notes +========================== + +.. contents:: + +SciPy 1.9.2 is a bug-fix release with no new features +compared to 1.9.1. It also provides wheel for Python 3.11 +on several platforms. + +Authors +======= + +* Hood Chatham (1) +* Thomas J. Fan (1) +* Ralf Gommers (22) +* Matt Haberland (5) +* Julien Jerphanion (1) +* Loïc Estève (1) +* Nicholas McKibben (2) +* Naoto Mizuno (1) +* Andrew Nelson (3) +* Tyler Reddy (28) +* Pamphile Roy (1) +* Ewout ter Hoeven (2) +* Warren Weckesser (1) +* Meekail Zain (1) + + +A total of 14 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + +Issues closed for 1.9.2 +----------------------- + +* `#16545 `__: BUG: 1.9.0rc1: \`OptimizeResult\` not populated when \`optimize.milp\`... +* `#16569 `__: BUG: \`sparse.hstack\` returns incorrect result when the stack... +* `#16898 `__: BUG: optimize.minimize backwards compatability in scipy 1.9 +* `#16935 `__: BUG: using msvc + meson to build scipy --> cl cannot be used... +* `#16952 `__: BUG: error from \`scipy.stats.mode\` with \`NaN\`s, \`axis !=... +* `#16964 `__: BUG: scipy 1.7.3 wheels on PyPI require numpy<1.23 in contradiction... +* `#17026 `__: BUG: ncf_gen::ppf(..) causes segfault +* `#17050 `__: Pearson3 PPF does not function properly with negative skew. +* `#17124 `__: BUG: OSX-64 Test failure test_ppf_against_tables getting NaN + + +Pull requests for 1.9.2 +----------------------- + +* `#16628 `__: FIX: Updated dtype resolution in \`_stack_along_minor_axis\` +* `#16814 `__: FIX: milp: return feasible solutions if available on time out +* `#16842 `__: ENH: cibuildwheel infrastructure +* `#16909 `__: MAINT: minimize, restore squeezed ((1.0)) addresses #16898 +* `#16911 `__: REL: prep for SciPy 1.9.2 +* `#16922 `__: DOC: update version switcher for 1.9.1 and pin theme to 0.9 +* `#16934 `__: MAINT: cast \`linear_sum_assignment\` to PyCFunction +* `#16943 `__: BLD: use compiler flags in a more portable way +* `#16954 `__: MAINT: stats.mode: fix bug with \`axis!=1\`, \`nan_policy='omit'\`,... +* `#16966 `__: MAINT: fix NumPy upper bound +* `#16969 `__: BLD: fix usage of \`get_install_data\`, which defaults to purelib +* `#16975 `__: DOC: Update numpy supported versions for 1.9.2 +* `#16991 `__: BLD: fixes for building with MSVC and Intel Fortran +* `#17011 `__: Rudimentary test for manylinux_aarch64 with cibuildwheel +* `#17013 `__: BLD: make MKL detection a little more robust, add notes on TODOs +* `#17046 `__: CI: Update cibuildwheel to 2.10.1 +* `#17055 `__: MAINT: stats.pearson3: fix ppf for negative skew +* `#17064 `__: BUG: Fix numerical precision error of \`truncnorm.logcdf\` when... +* `#17096 `__: FIX: ensure a hold on GIL before raising warnings/errors +* `#17127 `__: TST: stats.studentized_range: fix incorrect test +* `#17131 `__: MAINT: pyproject.toml: Update build system requirements +* `#17132 `__: MAINT: 1.9.2 backports diff --git a/libraries/scipy/releasenotes/1.9.3-notes.rst b/libraries/scipy/releasenotes/1.9.3-notes.rst new file mode 100644 index 0000000..dfc4dae --- /dev/null +++ b/libraries/scipy/releasenotes/1.9.3-notes.rst @@ -0,0 +1,122 @@ +========================== +SciPy 1.9.3 Release Notes +========================== + +.. contents:: + +SciPy 1.9.3 is a bug-fix release with no new features +compared to 1.9.2. + +Authors +======= + +* Jelle Aalbers (1) +* Peter Bell (1) +* Jake Bowhay (3) +* Matthew Brett (3) +* Evgeni Burovski (5) +* drpeteb (1) + +* Sebastian Ehlert (1) + +* GavinZhang (1) + +* Ralf Gommers (2) +* Matt Haberland (15) +* Lakshaya Inani (1) + +* Joseph T. Iosue (1) +* Nathan Jacobi (1) + +* jmkuebler (1) + +* Nikita Karetnikov (1) + +* Lechnio (1) + +* Nicholas McKibben (1) +* Andrew Nelson (1) +* o-alexandre-felipe (1) + +* Tirth Patel (1) +* Tyler Reddy (51) +* Martin Reinecke (1) +* Marie Roald (1) + +* Pamphile Roy (2) +* Eli Schwartz (1) +* serge-sans-paille (1) +* ehsan shirvanian (1) + +* Mamoru TASAKA (1) + +* Samuel Wallan (1) +* Warren Weckesser (7) +* Gavin Zhang (1) + + +A total of 31 people contributed to this release. +People with a "+" by their names contributed a patch for the first time. +This list of names is automatically generated, and may not be fully complete. + +Issues closed for 1.9.3 +----------------------- + +* `#3691 `__: scipy.interpolate.UnivariateSpline segfault +* `#5286 `__: BUG: multivariate_normal returns a pdf for values outside its... +* `#6551 `__: BUG: stats: inconsistency in docs and behavior of gmean and hmean +* `#9245 `__: running scipy.interpolate.tests.test_fitpack::test_bisplev_integer_overflow... +* `#12471 `__: test_bisplev_integer_overflow: Segmentation fault (core dumped) +* `#13321 `__: Bug: setting iprint=0 hides all output from fmin_l_bfgs_b, but... +* `#13730 `__: \`scipy.stats.mood\` does not correct for ties +* `#14019 `__: ks_2samp throws \`RuntimeWarning: overflow encountered in double_scalars\` +* `#14589 `__: \`shgo\` error since scipy 1.8.0.dev0+1529.803e52d +* `#14591 `__: Input data validation for RectSphereBivariateSpline +* `#15101 `__: BUG: binom.pmf - RuntimeWarning: divide by zero +* `#15342 `__: BUG: scipy.optimize.minimize: Powell's method function evaluated... +* `#15964 `__: BUG: lombscargle fails if argument is a view +* `#16211 `__: BUG: Possible bug when using winsorize on pandas data instead... +* `#16459 `__: BUG: stats.ttest_ind returns wrong p-values with permutations +* `#16500 `__: odr.Model default meta value fails with __getattr__ +* `#16519 `__: BUG: Error in error message for incorrect sample dimension in... +* `#16527 `__: BUG: dimension of isuppz in syevr is mistranslated +* `#16600 `__: BUG: \`KDTree\`'s optional argument \`eps\` seems to have no... +* `#16656 `__: dtype not preserved with operations on sparse arrays +* `#16751 `__: BUG: \`stats.fit\` on \`boltzmann\` expects \`bound\` for \`lambda\`,... +* `#17012 `__: BUG: Small oversight in sparse.linalg.lsmr? +* `#17020 `__: BUG: Build failure due to problems with shebang line in cythoner.py +* `#17088 `__: BUG: stats.rayleigh.fit: returns \`loc\` that is inconsistent... +* `#17104 `__: BUG? Incorrect branch in \`LAMV\` / \`_specfunc.lamv\` +* `#17196 `__: DOC: keepdims in stats.mode is incorrectly documented + + +Pull requests for 1.9.3 +----------------------- + +* `#5288 `__: BUG: multivariate_normal returns a pdf for values outside its... +* `#13322 `__: Bug: setting iprint=0 hides all output from fmin_l_bfgs_b, but... +* `#13349 `__: BUG: stats: Reformulate loggamma._rvs to handle c << 1. +* `#13411 `__: BUG: fix out-of-bound evaluations in optimize.minimize, powell... +* `#15363 `__: BUG: fix powell evaluated outside limits +* `#15381 `__: BUG: fix stats.rv_histogram for non-uniform bins +* `#16212 `__: stats.mood: correct for when ties are present +* `#16288 `__: BUG: fix a crash in \`fpknot\` +* `#16318 `__: MAINT: stats: fix _contains_nan on Pandas Series +* `#16460 `__: Fix ttest permutations +* `#16506 `__: MAINT: fix SHGO extra arguments +* `#16521 `__: BUG: Fix error in error message for incorrect sample dimension... +* `#16525 `__: MAINT: stats.ks_2samp: always emit warning when exact method... +* `#16528 `__: BUG: fix syevr series segfault by explicitly specifying operator... +* `#16562 `__: BUG: optimize: Fix differential_evolution error message. +* `#16573 `__: FIX: \`odr.Model\` error with default \`meta\` value +* `#16588 `__: FIX: stats: ignore divide-by-zero warnings from Boost binom impl +* `#16590 `__: MAINT: stats.vonmises: wrap rvs to -pi, pi interval +* `#16630 `__: BUG: eps param no effect fixed +* `#16645 `__: MAINT: Ensure Pythran input for lombscargle are contiguous +* `#16649 `__: Detect integer overflow in bivariate splines in fitpackmodule.c,... +* `#16657 `__: BUG: sparse: Fix indexing sparse matrix with empty index arguments. +* `#16669 `__: FIX: spurious divide error with \`gmean\` +* `#16701 `__: BUG: fix mutable data types as default arguments in \`ord.{Data,RealData}\` +* `#16752 `__: MAINT: stats.boltzmann: correct _shape_info typo +* `#16780 `__: BUG: interpolate: sanity check x and y in make_interp_spline(x,... +* `#16836 `__: MAINT: avoid \`func_data\`, it conflicts with system header on... +* `#16872 `__: BUG: interpolate: work array sizes for RectSphereBivariateSpline +* `#16965 `__: BUG: linalg: Fix the XSLOW test test_sgesdd_lwork_bug_workaround() +* `#17043 `__: MAINT: fix small LSMR problem +* `#17090 `__: MAINT: stats.rayleigh: enforce constraint on location +* `#17105 `__: FIX: special: use intended branching for \`lamv\` implementation +* `#17166 `__: MAINT: stats.rv_discrete.pmf: should be zero at non-integer argument +* `#17176 `__: REL: Prep for SciPy 1.9.3 +* `#17190 `__: BUG: special: Fix two XSLOW test failures. +* `#17193 `__: MAINT: update meson.build to make it work on IBM i system +* `#17200 `__: BLD: fix issue with incomplete threads dependency handling +* `#17204 `__: Keepdims incorrectly documneted fix +* `#17209 `__: MAINT: Handle numpy's deprecation of accepting out-of-bound integers. +* `#17210 `__: BLD: fix invalid shebang for build helper script diff --git a/libraries/scipy/requirements.txt b/libraries/scipy/requirements.txt new file mode 100644 index 0000000..24ce15a --- /dev/null +++ b/libraries/scipy/requirements.txt @@ -0,0 +1 @@ +numpy diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ab4b023 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[tool.pytest.ini_options] +addopts = [ + "--import-mode=importlib" +] +pythonpath = [ + "src", "src/upgraider" +] \ No newline at end of file diff --git a/ql/README.md b/ql/README.md new file mode 100644 index 0000000..28faae6 --- /dev/null +++ b/ql/README.md @@ -0,0 +1,5 @@ +# CodeQL queries to find API usages + +This folder contains a CodeQL query that searches for additional examples of the deprecated/problematic usages for each library in the `libraries//examples` folder. These QL queries can be run on set of checked out local repositories or through [Multi-repository variant analysis](https://codeql.github.com/docs/codeql-for-visual-studio-code/running-codeql-queries-at-scale-with-mrva/). + +See [CodeQL documentation](https://codeql.github.com/docs/) for more information. \ No newline at end of file diff --git a/ql/qlpack.yml b/ql/qlpack.yml new file mode 100644 index 0000000..c14e707 --- /dev/null +++ b/ql/qlpack.yml @@ -0,0 +1,7 @@ +name: python-snippet-search +version: 0.1.0 +extractor: python +dependencies: + # This uses the latest version of the codeql/python-all library. + # You may want to change to a more precise semver string. + codeql/python-all: "*" diff --git a/ql/queries/all-apis.ql b/ql/queries/all-apis.ql new file mode 100644 index 0000000..8f46ec1 --- /dev/null +++ b/ql/queries/all-apis.ql @@ -0,0 +1,46 @@ +/** + * @name misc-api-calls + * @description Find calls to various APIs from diff libraries + * @kind problem + */ + +import python +import semmle.python.ApiGraphs + +from API::CallNode call, API::Node targetFunction +where + ( + // networkx (https://networkx.github.io/documentation/stable/reference/index.html) + targetFunction = API::moduleImport("networkx").getMember("OrderedGraph") + or + targetFunction = API::moduleImport("networkx").getMember("from_numpy_matrix") + or + targetFunction = API::moduleImport("networkx").getMember("to_numpy_matrix") + or + // numpy (https://numpy.org/doc/stable/reference/index.html) + targetFunction = API::moduleImport("numpy").getMember("fastCopyAndTranspose") + or + targetFunction = API::moduleImport("numpy").getMember("msort") + or + // pandas (https://pandas.pydata.org/docs/reference/index.html) + targetFunction = + API::moduleImport("pandas").getMember("Categorical").getReturn().getMember("to_dense") + or + targetFunction = + API::moduleImport("pandas").getMember("ExcelWriter").getReturn().getMember("save") + or + targetFunction = + API::moduleImport("pandas").getMember("Index").getReturn().getMember("is_boolean") + or + targetFunction = + API::moduleImport("pandas").getMember("Index").getReturn().getMember("is_mixed") + or + targetFunction = API::moduleImport("pandas").getMember("to_datetime") and + exists(call.getParameter(-1, "infer_datetime_format")) + or + targetFunction = API::moduleImport("pandas").getMember("factorize") and + exists(call.getParameter(-1, "na_sentinel")) + ) and + call = targetFunction.getACall() and + exists(call.getLocation().getFile().getRelativePath()) +select targetFunction.toString(), call.getLocation() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4e40d45 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +pandas +plotly +markdownify +openai==0.27.2 +sqlalchemy==2.0.4 +requests +jsonpickle +python-dotenv +stackapi +tiktoken +dataclasses_json +pytest diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ce2a6bc --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +from setuptools import setup, find_packages + +with open('requirements.txt') as f: + requirements = f.read().splitlines() + +setup( + name="upgraider", + version="0.1.0", + description="LLM-based update of code examples", + author="GitHub Next", + author_email="nadi@ualberta.ca", + packages=find_packages("src"), + package_dir={"": "src"}, + package_data={"upgraider": ["resources/**/*"]}, + python_requires="==3.10.6", + url="https://github.com/githubnext/upgraider", + install_requires=requirements, + entry_points={ + "console_scripts": [ + "upgraider_brush = upgraider.update_brushes_code:main", + "explore_api= apiexploration.run_api_diff:main" + ], + }, +) diff --git a/src/apiexploration/Library.py b/src/apiexploration/Library.py new file mode 100644 index 0000000..0067cbb --- /dev/null +++ b/src/apiexploration/Library.py @@ -0,0 +1,166 @@ +import subprocess +from dataclasses_json import dataclass_json +import jsonpickle +import enum +from collections import OrderedDict +import inspect +import os +from dataclasses import dataclass, field + +@dataclass +class Parameter(): + name: str + type: str + default: str + +@dataclass +class Function(): + name: str + parameters: OrderedDict[Parameter] + return_annotation: object + +class DiffType(enum.Enum): + ''' + Enum for the type of difference between two functions. + Currently only supports added and removed functions and general parameter changes (Without the specific type of change) + ''' + UNKNOWN = -1 + ADDED = 1 + REMOVED = 2 + PARAMETERS_CHANGED = 3 + +@dataclass +class FunctionDiff(): + old_function: Function | None + new_function: Function | None + diff_type: DiffType = DiffType.UNKNOWN + +@dataclass_json +@dataclass +class Library(): + name: str + ghurl: str + baseversion: str + currentversion: str + path: str = field(default_factory=str) + +@dataclass +class LibraryDiff(): + library: Library + baseapi: list[Function] = field(default_factory=list) + currentapi: list[Function] = field(default_factory=list) + api_diff: list[FunctionDiff] = field(default_factory=list) + + +def load_api(library: str, filename: str): + with open(os.path.join(os.path.dirname(__file__), f"../../libraries/{library}/api/", filename), 'r') as jsonfile: + api = jsonpickle.decode(jsonfile.read()) + return api + +def diff_api_versions(library: Library): + ''' + Finds differences betweeen a library's base and current api + @param library: the library to analyze + @return: a list of FunctionDiff objects. + ''' + base_api = load_api(library.name, f"{library.name}_{library.baseversion}.json") + current_api = load_api(library.name, f"{library.name}_{library.currentversion}.json") + differences = [] + + #compare functions in baseapi and currentapi + for old_fn_name in base_api.keys(): + if old_fn_name not in current_api.keys(): + print(f"Function {old_fn_name} has been removed") + diff = FunctionDiff(base_api[old_fn_name], None, DiffType.REMOVED) + differences.append(diff) + else: + old_function = base_api[old_fn_name] + new_function = current_api[old_fn_name] + + if old_function['parameters'].keys() != new_function['parameters'].keys(): + print(f"Function {old_fn_name} has changed parameters") + diff = FunctionDiff(old_function, new_function, DiffType.PARAMETERS_CHANGED) + differences.append(diff) + + + #find new added functions in currentapi + new_functions = set(current_api.keys()).difference(set(base_api.keys())) + for new_fn_name in new_functions: + print(f"Function {new_fn_name} has been added") + diff = FunctionDiff(None, current_api[new_fn_name], DiffType.ADDED) + differences.append(diff) + + return differences + +def analyze_signature(callable, fqn: str) -> dict: + ''' + Analyze the signature of a function or class constructor (must be a callable object) + @param callable: the callable object + @param fqn: the fully qualified name of the callable object + @return: a dictionary with a single function; the key is the fqn and the value is the Function object + ''' + + try: + signature = inspect.signature(callable) + except ValueError: + print("WARN: Could not get signature for function: ", fqn) + return {} + + parameters = OrderedDict() + for param_name, param in signature.parameters.items(): + parameter = Parameter(param_name, param.annotation, param.default) + parameters.update({param_name: parameter}) + + function = Function(fqn, parameters, signature.return_annotation) + return {fqn: function} + +def get_functions(python_class) -> dict: + ''' + Get all member functions of a class + @param python_class: the class to analyze + @return: a dictionary of all functions in the class; key is the fqn and the value is the Function object + ''' + api = {} + for name, data in inspect.getmembers(python_class, inspect.isfunction): + if name.startswith("_"): + continue + fqn = ".".join([python_class.__module__, python_class.__name__, name]) + api.update(analyze_signature(data, fqn)) + return api + +def analyze_class(python_class, fqn): + ''' + Analyze a class and return a dictionary of all functions in the class, including the constructor + @param python_class: the class to analyze + @param fqn: the fully qualified name of the class + @return: a dictionary of all functions in the class, including constructors; key is the fqn and the value is the Function object + ''' + api = {} + api.update(analyze_signature(python_class, fqn)) + api.update(get_functions(python_class)) + return api + +def analyze_module(module): + ''' + Analyze a module and return a dictionary of all functions in the module and its classes + @param module: the module to analyze + @return: a dictionary of all functions in the module and its classes; key is the fqn and the value is the Function object + ''' + api = {} + + for name, data in inspect.getmembers(module): + if inspect.isclass(data): + fqn = ".".join([module.__name__, name]) + api.update(analyze_class(data,fqn) ) + + elif inspect.isfunction(data): + fqn = ".".join([module.__name__, name]) + api.update(analyze_signature(data, fqn)) + + + return api + + + + + diff --git a/src/apiexploration/__init__.py b/src/apiexploration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/apiexploration/explore_api.py b/src/apiexploration/explore_api.py new file mode 100644 index 0000000..f31b81b --- /dev/null +++ b/src/apiexploration/explore_api.py @@ -0,0 +1,22 @@ + +import importlib +import argparse +import jsonpickle +from Library import analyze_module + +# This script is used to explore the API of a module and save it to a json file +# It is triggered by load_module.sh in a separate venv, so we can import the desired version of the module + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--module_name", help="The name of the module to explore") + parser.add_argument("--module_version", help="The version of the module to explore") + parser.add_argument("--main_venv_path", help="The path to the folder where the main venv is running from") + args = parser.parse_args() + + module = importlib.import_module(args.module_name) + api = analyze_module(module) + + with open(f"{args.main_venv_path}/libraries/{args.module_name}/api/{args.module_name}_{args.module_version}.json", "w") as file: + json_obj = jsonpickle.encode(api, unpicklable=False, indent=3) + file.write(json_obj) \ No newline at end of file diff --git a/src/apiexploration/load_module.sh b/src/apiexploration/load_module.sh new file mode 100755 index 0000000..f7d5a0c --- /dev/null +++ b/src/apiexploration/load_module.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +module_name=$1 +module_version=$2 + +MAIN_VENV=`pwd` + +cd $SCRATCH_VENV + +source .venv/bin/activate + +pip install jsonpickle + +pip uninstall -y $module_name + +pip install -q -v "$module_name==$module_version" > /dev/null + +pip show $module_name | grep Version + +python $MAIN_VENV/src/apiexploration/explore_api.py --module_name=$module_name --module_version=$module_version --main_venv=$MAIN_VENV + +deactivate diff --git a/src/apiexploration/run_api_diff.py b/src/apiexploration/run_api_diff.py new file mode 100644 index 0000000..7c82463 --- /dev/null +++ b/src/apiexploration/run_api_diff.py @@ -0,0 +1,50 @@ +from Library import Library, diff_api_versions +import subprocess +import jsonpickle +import os +from os import environ as env +import csv +import pandas as pd + +from dotenv import load_dotenv + +load_dotenv() + +def main(): + script_dir = os.path.dirname(__file__) + + for lib_dir in os.listdir(os.path.join(script_dir, "../../libraries")): + + if lib_dir.startswith('.'): + continue + + lib_path = os.path.join(script_dir, f"../../libraries/{lib_dir}") + + with open(os.path.join(lib_path, "library.json"), 'r') as jsonfile: + libinfo = jsonpickle.decode(jsonfile.read()) + library = Library(libinfo['name'], libinfo['ghurl'], libinfo['baseversion'], libinfo['currentversion']) + + if not os.path.exists(os.path.join(script_dir, f"../../libraries/{lib_dir}/api/{library.name}_{library.baseversion}.json")): + subprocess.run([f"{script_dir}/load_module.sh", library.name, library.baseversion], check=True) + else: + print(f"Skipping analyzing API of {library.name} {library.baseversion} because it already exists") + + if not os.path.exists(os.path.join(script_dir, f"../../libraries/{lib_dir}/api/{library.name}_{library.currentversion}.json")): + subprocess.run([f"{script_dir}/load_module.sh", library.name, library.currentversion], check=True) + else: + print(f"Skipping analyzing API of {library.name} {library.currentversion} because it already exists") + + differences = diff_api_versions(library) + library.api_diff = differences + jsondata = jsonpickle.encode(differences,unpicklable=False, indent=3) + + # write differences to json file + output_json_file = os.path.join(script_dir, f"../../libraries/{lib_dir}/api/", f"{library.name}_{library.baseversion}_{library.currentversion}_diff.json") + with open(output_json_file, 'w') as jsonfile: + jsonfile.write(jsondata) + + df = pd.read_json(jsondata) + df.to_csv(os.path.join(script_dir, f"../../libraries/{lib_dir}/api/{library.name}_{library.baseversion}_{library.currentversion}_diff.csv")) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/benchmark/__init__.py b/src/benchmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/benchmark/list_libraries.py b/src/benchmark/list_libraries.py new file mode 100644 index 0000000..6d2fd90 --- /dev/null +++ b/src/benchmark/list_libraries.py @@ -0,0 +1,39 @@ +import os +import logging as log +from apiexploration.Library import Library +import json +import jsonpickle + +def list_libraries(): + libraries = [] + script_dir = os.path.dirname(__file__) + libraries_folder = os.path.join(script_dir, "../../libraries") + + for lib_dir in os.listdir(libraries_folder): + if lib_dir.startswith('.'): + continue + + lib_path = os.path.join(libraries_folder, f"{lib_dir}") + with open(os.path.join(lib_path, "library.json"), 'r') as jsonfile: + libinfo = json.load(jsonfile) + library = Library( + name=libinfo['name'], + ghurl=libinfo['ghurl'], + baseversion=libinfo['baseversion'], + currentversion=libinfo['currentversion'], + path=lib_path + ) + + libraries.append(library) + + return libraries + +if __name__ == "__main__": + libraries = list_libraries() + + # the Library class is not json serializable because its reliance on OrderedDict + # so we use jsonpickle to encode the libraries + lib_json = jsonpickle.encode(libraries, unpicklable=False) + + print(lib_json) + \ No newline at end of file diff --git a/src/benchmark/parse_reports.py b/src/benchmark/parse_reports.py new file mode 100644 index 0000000..2ad0321 --- /dev/null +++ b/src/benchmark/parse_reports.py @@ -0,0 +1,210 @@ +import os +from upgraider.Report import Report, DBSource, FixStatus +import jsonpickle +import argparse + +def percentage(num, denom): + if denom == 0: + return "--" + + return f"{(num/denom)*100:.2f}%" + +def display_perc(percent): + # if percent is a number + if isinstance(percent, float): + if percent is None: + return "--" + + return f"{percent:.2f}%" + + return f"{percent}%" + +def get_display_status(status): + + if status == None: + return "N/A" + if status == FixStatus.FIXED: + return ":white_check_mark:" + elif status == None or status == FixStatus.NOT_FIXED: + return ":x:" + elif status == FixStatus.NEW_ERROR: + return ":warning:" + else: + return ":question:" + +def get_total_display(total_fixed_model, total_fixed_doc, total_examples): + largest_total = max(total_fixed_model, total_fixed_doc) + model_display = f"{total_fixed_model} ({percentage(total_fixed_model, total_examples)})" + doc_display = f"{total_fixed_doc} ({percentage(total_fixed_doc, total_examples)})" + + if largest_total == total_fixed_model: + model_display = f"**{model_display}**" + + if largest_total == total_fixed_doc: + doc_display = f"**{doc_display}**" + + return f"Total | {total_examples} | {model_display}| {doc_display} |" + +def display_detailed_stats(title, reports: dict): + + print(f"# {title}") + print(f"Lib | Example | Model Only | Model + Doc |") + print("----|---------|------------:|------------:|") + total_examples = 0 + total_fixed_model = 0 + total_fixed_doc = 0 + + for lib, report in reports.items(): + if report == {}: + continue + + doc_report = report.get(DBSource.documentation.value) + model_report = report.get("modelonly") + + for example in doc_report.snippets.keys(): + modelonly_status = model_report.snippets[example]['fix_status'] if model_report else None + doc_status = doc_report.snippets[example]['fix_status'] if doc_report else None + + print(f"{lib} | {example} | {get_display_status(modelonly_status)} | {get_display_status(doc_status)} ") + + total_examples += len(doc_report.snippets.keys()) + total_fixed_model += model_report.num_fixed if model_report else 0 + total_fixed_doc += doc_report.num_fixed if doc_report else 0 + + print(get_total_display(total_fixed_model, total_fixed_doc, total_examples)) + + +def display_report(title, reports: dict): + print(f"# {title}") + print(f"| Library | # Snippets | Unique APIs | # (%) Updated | # (%) Use Ref | # (%) Fixed |") + print(f"| --- | --: | --: | --: | --: | --: |") + + + for lib, report in reports.items(): + if report == {}: #report might be empty in case of baseline comparisons where baseline doesn't have this lib + continue + + try: + doc_report = report[DBSource.documentation.value] + + doc_display = f"{doc_report.num_updated} ({display_perc(doc_report.percent_updated)}) | {doc_report.num_updated_w_refs} ({display_perc(doc_report.percent_updated_w_refs)}) | {doc_report.num_fixed} ({display_perc(doc_report.percent_fixed)})" + num_snippets = doc_report.num_snippets + num_apis = doc_report.num_apis + except KeyError: + doc_display = "N/A | N/A | N/A" + + + print(f"| {lib} | {num_snippets} | {num_apis} | {doc_display} | ") + +def parse_json_report(report_path: str): + with open(report_path, 'r') as f: + report_data = jsonpickle.decode(f.read()) + report_data['percent_updated'] = (report_data['num_updated']/report_data['num_snippets']) * 100 if report_data['num_snippets'] > 0 else 0 + report_data['percent_fixed'] = (report_data['num_fixed']/ report_data['num_updated']) * 100 if report_data['num_updated'] > 0 else 0 + report_data['percent_updated_w_refs'] = (report_data['num_updated_w_refs']/report_data['num_updated']) * 100 if report_data['num_updated'] > 0 else 0 + report = Report(**report_data) + + # sort list of snippets alphabetically + report.snippets = {k: v for k, v in sorted(report.snippets.items(), key=lambda item: item[0])} + return report + +def parse_reports(output_dir: str): + results = {} + for lib_dir in os.listdir(output_dir): + if lib_dir.startswith('.'): + continue + + results[lib_dir] = {} + doc_results_path = os.path.join(output_dir, lib_dir, DBSource.documentation.value, "report.json") + if os.path.exists(doc_results_path): + results[lib_dir][DBSource.documentation.value] = parse_json_report(doc_results_path) + + modelonly_results_path = os.path.join(output_dir, lib_dir, "modelonly", "report.json") + if os.path.exists(modelonly_results_path): + results[lib_dir]['modelonly'] = parse_json_report(modelonly_results_path) + + # sort results by lib alphabetically + results = dict(sorted(results.items(), key=lambda item: item[0])) + return results + +def pp_diff(diff, percent: bool = False, lower_is_better: bool = False): + + if (percent): + diff = round(diff, 2) + + if (diff > 0): + display = f"+{diff}" + elif (diff == 0): + display = "±0" + else: + display = f"{diff}" + + if (lower_is_better): + if (diff < 0): + return f"**{display}**" + else: #higher is better + if (diff > 0): + return f"**{display}**" + + return display + + +def compare_to_baseline(curr_results: dict, baseline: dict): + diff_stats = {} + for lib_name, lib_reports in curr_results.items(): + try: + diff_stats[lib_name] = {} + for source in [DBSource.modelonly.value, DBSource.documentation.value]: + if (source not in lib_reports): + #ignore missing db sources (current run may have not used them) + continue + + if (lib_name not in baseline or source not in baseline[lib_name]): + #ignore missing db sources (previous run may have not used them) + continue + + curr_report = lib_reports[source] + baseline_report = baseline[lib_name][source] + + diff_stats[lib_name][source] = Report( + library = curr_report.library, + num_snippets= curr_report.num_snippets, + num_apis= curr_report.num_apis, + num_updated= pp_diff(curr_report.num_updated - baseline_report.num_updated), + num_updated_w_refs= pp_diff(curr_report.num_updated_w_refs - baseline_report.num_updated_w_refs), + num_fixed= pp_diff(curr_report.num_fixed - baseline_report.num_fixed), + percent_updated= pp_diff(curr_report.percent_updated - baseline_report.percent_updated, True), + percent_updated_w_refs= pp_diff(curr_report.percent_updated_w_refs - baseline_report.percent_updated_w_refs, True), + percent_fixed= pp_diff(curr_report.percent_fixed - baseline_report.percent_fixed, True) + ) + except KeyError: + print(f"Skipping {lib_name} as it is not in baseline") + #library not in baseline + continue + + return diff_stats + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Parse and display reports') + parser.add_argument('--outputdir', type=str, help='output folder containing results') + parser.add_argument('--baselinedir', type=str, help='optional baseline results folder to compare to', default=None) + + + args = parser.parse_args() + + print(f"## Interpreting stats") + print(f"- **# (%) Updated**: Num of snippets that the model said should be updated. % in relation to total snippets") + print(f"- **# (%) Updated w/ Refs**: Num of snippets that the model used a reference for updating. % in relation to num of updated snippets") + print(f"- **# (%) Fixed**: Num of snippets the model was able to fix. % in relation to num of updated snippets (Reglardless of used refs)") + print("\n") + + results = parse_reports(args.outputdir) + display_report("Fixed Snippets Stats", results) + + display_detailed_stats("Per example results", results) + + if (args.baselinedir is not None): + baseline = parse_reports(args.baselinedir) + diff_stats = compare_to_baseline(results, baseline) + display_report("Comparison to Baseline", diff_stats) + diff --git a/src/upgraider/Database.py b/src/upgraider/Database.py new file mode 100644 index 0000000..ea4bf34 --- /dev/null +++ b/src/upgraider/Database.py @@ -0,0 +1,52 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import declarative_base, sessionmaker +from sqlalchemy import Column, Integer, String, Text +import json +import os + +script_path = os.path.dirname(os.path.realpath(__file__)) +Base = declarative_base() +engine = create_engine( + f"sqlite:///{script_path}/resources/database/releasenotes.db", + echo=False, +) +Session = sessionmaker(bind=engine) + +class LibReleaseNote(Base): + __tablename__ = "lib_release_notes" + id = Column(Integer, primary_key=True) + library = Column(String) + version = Column(String) + filename = Column(String) + +class DeprecationComment(Base): + __tablename__ = "deprecation_comments" + id = Column(Integer, primary_key=True) + lib_release_note = Column(Integer) + content = Column(String) + embedding = Column(Text) + + +def get_embedded_doc_sections() -> list[DeprecationComment]: + session = Session() + sections = ( + session.query(DeprecationComment) + .filter(DeprecationComment.embedding != "NULL") + .filter(DeprecationComment.embedding != None) + .all() + ) + + session.close() + return sections + +def load_embeddings( + sections: list[DeprecationComment], +) -> dict[int, list[float]]: + """ + Read the section embeddings and their keys from the database + """ + + return { + section.id: json.loads(section.embedding) + for section in sections + } \ No newline at end of file diff --git a/src/upgraider/Model.py b/src/upgraider/Model.py new file mode 100644 index 0000000..c63bc70 --- /dev/null +++ b/src/upgraider/Model.py @@ -0,0 +1,354 @@ +# some code in this script is based off https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb + +import numpy as np +import openai +import tiktoken +from os import environ as env +from dotenv import load_dotenv +from string import Template +import os +import re +from upgraider.Database import load_embeddings, get_embedded_doc_sections +from upgraider.Report import UpdateStatus, ModelResponse, DBSource +import logging as log +import requests +import json + +load_dotenv(override=True) + +EMBEDDING_MODEL = "text-embedding-ada-002" + +#TODO: use token length +MAX_SECTION_LEN = 500 +SEPARATOR = "\n* " + +ENCODING = "cl100k_base" # encoding for text-embedding-ada-002 + +encoding = tiktoken.get_encoding(ENCODING) +separator_len = len(encoding.encode(SEPARATOR)) + +COMPLETIONS_API_PARAMS = { + "temperature": 0.0, + "model": "code-cushman-001" +} + +GPT_3_5_TURBO_API_PARAMS = { + "temperature": 0.0, + "max_tokens": 300, + "model": "gpt-3.5-turbo" +} + +def get_update_status(update_status: str) -> UpdateStatus: + if update_status == "Update": + return UpdateStatus.UPDATE + elif update_status == "No update": + return UpdateStatus.NO_UPDATE + else: + print(f"WARNING: unknown update status {update_status}") + return UpdateStatus.UNKNOWN + +def strip_python_keyword(code: str) -> str: + """ + The model sometimes adds a python keyword to the beginning of the code snippet. + This function removes that keyword. + """ + if code.startswith("python"): + return "\n".join(code.splitlines()[1:]) + else: + return code + +def find_reason_in_response(model_response: str) -> str: + + reason = None + + prefixes = ["Reason for update:"] + # try first the case where the model respects the enumeration + reason_matches = re.search(r"^2\.(.*)", model_response, re.MULTILINE) + reason = reason_matches.group(1).strip() if reason_matches else None + + if reason is not None: + # check if reason starts with any of the prefixes and strip out the prefix + for prefix in prefixes: + if prefix in reason: + reason = reason[len(prefix):].strip() + break + else: + # did not have enumeration so let's try to search in the response + for prefix in prefixes: + reason_matches = re.search(r"^.*" + prefix + r"(.*)", model_response, re.MULTILINE) + if reason_matches: + matched_value = reason_matches.group(1).strip() + # if the group is empty, then it just matched the prefix + # then it still didn't capture the reasons (could be list) + if matched_value != '': + reason = matched_value + break + + multi_reason_matches = re.search(r"^.*" + prefix + "\n*(?P(-(.*)\n)+)", model_response, re.MULTILINE) + if multi_reason_matches: + reason = multi_reason_matches.group("reasons").strip() + if len(reason.splitlines()) == 1 and reason.startswith("-"): + # if it's a single reason, remove the - since it's not + # really a list + reason = reason[1:].strip() + break + + if reason == 'None': + reason = None + + return reason + +def find_references_in_response(model_response: str) -> str: + references = None + reference_keywords = ['Reference used:', 'Reference number:', 'References used:', 'Reference numbers used:', 'List of reference numbers used:'] + reference_matches = re.search(r"^3\.(.*)\n", model_response, re.MULTILINE) + references = reference_matches.group(1).strip() if reference_matches else None + + # response did not follow enumerated format + if references == None: + for keyword in reference_keywords: + if keyword in model_response: + references = model_response.split(keyword)[1].strip() + + if references.strip('.') == 'No references used': + references = None + + break + + return references + + +def parse_model_response(model_response: str) -> ModelResponse: + + # match the updated code by looking for the fenced code block, even without the correct enumeration + updated_code_response = re.search(r"\s*(```)\s*([\s\S]*?)(```|$)", model_response) + updated_code = None + if updated_code_response: + updated_code = strip_python_keyword(updated_code_response.group(2).strip()) + if updated_code != "" and "No changes needed" not in updated_code: + update_status = UpdateStatus.UPDATE + else: + update_status = UpdateStatus.NO_UPDATE + else: + if "No update" in model_response: + update_status = UpdateStatus.NO_UPDATE + else: + update_status = UpdateStatus.NO_RESPONSE + + reason = find_reason_in_response(model_response) + references = find_references_in_response(model_response) + + response = ModelResponse( + update_status = update_status, + references = references, + updated_code = updated_code, + reason = reason + ) + + return response + + +def get_embedding(text: str, model: str = EMBEDDING_MODEL) -> list[float]: + """ + Returns the embedding for the supplied text. + """ + openai.api_key = env['OPENAI_API_KEY'] + + try: + result = openai.Embedding.create(model=model, input=text) + except openai.error.InvalidRequestError as e: + print(f"ERROR: {e}") + return None + + return result["data"][0]["embedding"] + + +def vector_similarity(x: list[float], y: list[float]) -> float: + """ + Returns the similarity between two vectors. + + Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product. + """ + if x is None or y is None: + return 0.0 + return np.dot(np.array(x), np.array(y)) + +def get_reference_list( + original_code: str, + sections: list[DeprecationWarning], + threshold: float = 0.0, +): + chosen_sections = [] + chosen_sections_len = 0 + ref_count = 0 + + context_embeddings = load_embeddings(sections) + + most_relevant_document_sections = order_document_sections_by_query_similarity( + original_code, context_embeddings, threshold + ) + + for similarity, section_index in most_relevant_document_sections: + + if chosen_sections_len > MAX_SECTION_LEN: + break + + # Add sections as context, until we run out of space. + section_content = [ + section for section in sections if section.id == section_index][0].content + + section_tokens = section_content.split(" ") + + if len(section_tokens) < 3: + continue # skip one or two word references + + len_if_added = chosen_sections_len + len(section_tokens) + separator_len + + # if current section will exceed max length, truncate it + if len_if_added > MAX_SECTION_LEN: + section_content = " ".join( + section_tokens[: MAX_SECTION_LEN - chosen_sections_len] + ) + + chosen_sections_len = len_if_added + ref_count += 1 + + chosen_sections.append( + "\n" + str(ref_count) + ". " + section_content.replace("\n", " ") + ) + + return chosen_sections + +def get_readycontext_refs_list( + ready_context: str +): + chosen_sections = [] + current_length = 0 + for context in ready_context: + if current_length < MAX_SECTION_LEN: + chosen_sections.append( + "\n" + str(ref_count) + ". " + context.replace("\n", " ") + ) + ref_count += 1 + current_length += len(context.split(" ")) + + return ready_context + +def order_document_sections_by_query_similarity( + query: str, + contexts: dict[(int, int), np.array], + threshold: float = None +) -> list[(float, (int, int))]: + """ + Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings + to find the most relevant sections. + + Return the list of document sections, sorted by relevance in descending order. + """ + query_embedding = get_embedding(query) + + if query_embedding is None: + return [] + + document_similarities = sorted( + [ + (vector_similarity(query_embedding, doc_embedding), doc_index) + for doc_index, doc_embedding in contexts.items() + ], + reverse=True, + ) + + if threshold: + document_similarities = [sim for sim in document_similarities if sim[0] > threshold] + + + return document_similarities + +def construct_fixing_prompt( + original_code: str, + sections: list[DeprecationWarning], + ready_context: str = None, + threshold: float = None, +): + # print("constructing prompt...") + + if not ready_context: + references = get_reference_list(original_code=original_code, sections=sections, threshold=threshold) + else: + references = get_readycontext_refs_list(ready_context=ready_context) + + script_dir = os.path.dirname(__file__) + with open(os.path.join(script_dir, "resources/chat_template.txt"), "r") as file: + chat_template = Template(file.read()) + prompt_text = chat_template.substitute(original_code=original_code, references="".join(references)) + + return prompt_text, len(references) + +def display_conversation(messages: list[dict[str, str]]): + for message in messages: + print(message["role"] + ": " + message["content"] + '\n') + +def fix_suggested_code( + query: str, + show_prompt: bool = False, + db_source: str = DBSource.documentation, + model: str = "gpt-3.5", + threshold: float = None, + ready_context: str = None, +) : + + sections = None + if not ready_context: + if db_source == DBSource.documentation: + sections = get_embedded_doc_sections() + elif db_source == DBSource.modelonly: + sections = [] + else: + raise ValueError(f"Invalid db_source {db_source}") + + prompt_text, ref_count = construct_fixing_prompt(original_code=query, sections=sections, ready_context=ready_context, threshold=threshold) + + if model == "gpt-3.5": + prompt = [ + {"role": "system", "content":"You are a smart code reviewer who can spot code that uses a non-existent or deprecated API."}, + {"role": "user", "content": prompt_text} + ] + model_response, parsed_response = fix_suggested_code_chat(prompt) + elif model == "gpt-4": + model_response, parsed_response = fix_suggested_code_completion(prompt_text) + + return prompt_text, model_response, parsed_response, ref_count + + + +def fix_suggested_code_chat( + prompt: list[str] +) : + # print("Fixing code with chat API....") + + openai.api_key = env['OPENAI_API_KEY'] + + response = openai.ChatCompletion.create(messages=prompt, **GPT_3_5_TURBO_API_PARAMS) + response_text = response['choices'][0]['message']['content'] + + return response_text, parse_model_response(response_text) + +def fix_suggested_code_completion( + prompt: str +) -> str: + gpt4_endpoint = env['GPT4_ENDPOINT'] + auth_headers = env['GPT4_AUTH_HEADERS'] + headers = { + "Content-Type": "application/json", + **json.loads(auth_headers), + } + json_data = { + 'prompt': prompt, + 'temperature': 0, + 'best_of': 1, + 'max_tokens': 300 + } + + response = requests.post(gpt4_endpoint, headers=headers, data=json.dumps(json_data)).json() + response_text = response['choices'][0]['text'].strip(" \n") + return response_text, parse_model_response(response_text) diff --git a/src/upgraider/Report.py b/src/upgraider/Report.py new file mode 100644 index 0000000..de9f6d2 --- /dev/null +++ b/src/upgraider/Report.py @@ -0,0 +1,89 @@ +from enum import Enum +from apiexploration.Library import Library, FunctionDiff +from dataclasses import dataclass +from dataclasses_json import dataclass_json + +class DBSource(Enum): + documentation = "doc" + modelonly = "modelonly" + + def __eq__(self, other): + if isinstance(other, str): + return self.value == other + return super().__eq__(other) + +class ProblemType(Enum): + DEPRECATION_WARNING = "DEPRECATION_WARNING" + ERROR = "ERROR" + +@dataclass +class RunProblem: + type: str + name: str + element_name: str + target_obj: str | None = None + +@dataclass +class RunResult: + problem_free: bool # true if no error or warning, false otherwise + problem: RunProblem = None + msg: str = None + +class UpdateStatus(Enum): + UPDATE = "UPDATE" + NO_UPDATE = "NO_UPDATE" + UNKNOWN = "UNKNOWN" + NO_RESPONSE = "NO_RESPONSE" + + def __eq__(self, other): + if isinstance(other, str): + return self.value == other + return super().__eq__(other) + +class FixStatus(Enum): + FIXED = "FIXED" + NOT_FIXED = "NOT_FIXED" + NEW_ERROR = "NEW_ERROR" + + def __eq__(self, other): + if isinstance(other, str): + return self.value == other + return super().__eq__(other) + +@dataclass_json +@dataclass +class ModelResponse: + update_status: UpdateStatus + references: str + updated_code: str + reason: str + +@dataclass_json +@dataclass +class SnippetReport: + original_file: str + api: str + prompt_file: str + original_run: RunResult + model_response: ModelResponse + model_reponse_file: str + num_references: int + modified_file: str + modified_run: RunResult + fix_status: FixStatus + diff: str = None + +@dataclass_json +@dataclass +class Report: + library: Library + num_snippets: int = None + num_apis: int = None + db_source: str = None + num_fixed: int = None + num_updated: int = None + num_updated_w_refs: int = None + snippets: list[SnippetReport] = None + percent_updated: float = None + percent_updated_w_refs: float = None + percent_fixed: float = None \ No newline at end of file diff --git a/src/upgraider/__init__.py b/src/upgraider/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/upgraider/fix_lib_examples.py b/src/upgraider/fix_lib_examples.py new file mode 100644 index 0000000..6e04fe7 --- /dev/null +++ b/src/upgraider/fix_lib_examples.py @@ -0,0 +1,236 @@ + +import argparse +from upgraider.Report import Report, SnippetReport, UpdateStatus, RunResult, FixStatus +from upgraider.run_code import run_code +from upgraider.Model import fix_suggested_code +import os +import json +import difflib +from apiexploration.Library import Library +from enum import Enum +import ast +from collections import namedtuple, defaultdict +import time + +class ResultType(Enum): + PROMPT = 1 + RESPONSE = 2 + +Import = namedtuple("Import", ["module", "name", "alias"]) + +def _write_result(result: str, result_type:ResultType, output_dir: str, example_file: str): + result_file_root = os.path.splitext(example_file)[0] + + if result_type == ResultType.RESPONSE: + result_file = os.path.join(output_dir, f"responses/{result_file_root}_response.txt") + elif result_type == ResultType.PROMPT: + result_file = os.path.join(output_dir, f"prompts/{result_file_root}_prompt.txt") + else: + print(f"Invalid result type: {result_type}") + return + + os.makedirs(os.path.dirname(result_file), exist_ok=True) + with open(result_file, 'w') as f: + f.write(result) + return result_file + +def _determine_fix_status(original_code_result: RunResult, final_code_result: RunResult) -> FixStatus: + # original status is always an error or warning + if final_code_result.problem_free == True: + return FixStatus.FIXED + else: + if original_code_result.problem != final_code_result.problem: + return FixStatus.NEW_ERROR + else: + return FixStatus.NOT_FIXED + +#https://stackoverflow.com/questions/845276/how-to-print-the-comparison-of-two-multiline-strings-in-unified-diff-format +#https://stackoverflow.com/posts/845432/, Andrea Francia +def _unidiff(expected, actual): + """ + Helper function. Returns a string containing the unified diff of two multiline strings. + """ + expected=expected.splitlines(1) + actual=actual.splitlines(1) + + diff=difflib.unified_diff(expected, actual) + + return ''.join(diff) + +def _format_import(import_stmt: Import) -> str: + if import_stmt.module: + if import_stmt.alias is not None: + return f"from {'.'.join(import_stmt.module)} import {'.'.join(import_stmt.name)} as {import_stmt.alias}" + else: + return f"from {'.'.join(import_stmt.module)} import {'.'.join(import_stmt.name)}" + else: + if import_stmt.alias is not None: + return f"import {'.'.join(import_stmt.name)} as {import_stmt.alias}" + else: + return f"import {'.'.join(import_stmt.name)}" + +# GaretJax, https://stackoverflow.com/questions/9008451/python-easy-way-to-read-all-import-statements-from-py-module +def _get_imports(code: str) -> list[Import]: + try: + ast_root = ast.parse(code) + + for node in ast.iter_child_nodes(ast_root): + if isinstance(node, ast.Import): + module = [] + elif isinstance(node, ast.ImportFrom): + module = node.module.split('.') + else: + continue + + for n in node.names: + yield Import(module, n.name.split('.'), n.asname) + except: + return None + +def _fix_imports(old_code: str, updated_code: str) -> str: + old_imports = _get_imports(old_code) + updated_imports = _get_imports(updated_code) + + if old_imports is None or updated_imports is None: + print("WARNING: could not parse imports for either old or updated code") + return updated_code + + # if there is an old import that is not in the updated code, add it + for old_import in old_imports: + if old_import not in updated_imports: + updated_code = f"{_format_import(old_import)}\n{updated_code}" + + return updated_code + + +def fix_example(library: Library, + example_file: str, + examples_path: str, + requirements_file: str, + output_dir: str, + db_source: str, + model: str = 'gpt3-5', + threshold:float = None): + + example_file_path = os.path.join(examples_path, example_file) + + print(f"Fixing {example_file_path}...") + with open(example_file_path, 'r') as f: + original_code = f.read() + + original_code_result = run_code(library, example_file_path, requirements_file) + + prompt_text, model_response, parsed_response, ref_count = fix_suggested_code(original_code, show_prompt=False, db_source=db_source, model=model, threshold=threshold) + + print("Writing prompt to file...") + prompt_file = _write_result(prompt_text, ResultType.PROMPT, output_dir, example_file) + + print("Writing model response to file...") + model_response_file = _write_result(model_response, ResultType.RESPONSE, output_dir, example_file) + + final_code_result = None # will stay as None if no update occurs + updated_code_file = None + diff = None + updated_code = None + example_file_root = os.path.splitext(example_file)[0] + + if parsed_response.update_status == UpdateStatus.UPDATE: + updated_code = parsed_response.updated_code + if updated_code is None: + print(f"WARNING: update occurred for {example_file} but could not retrieve updated code") + else: + updated_code = _fix_imports(old_code=original_code, updated_code=updated_code) + updated_code_file = os.path.join(output_dir, f"updated/{example_file_root}_updated.py") + os.makedirs(os.path.dirname(updated_code_file), exist_ok=True) + with open(updated_code_file, 'w') as f: + f.write(updated_code) + + final_code_result = run_code(library, updated_code_file, requirements_file) + diff = _unidiff(original_code, updated_code) + + snippet_results = SnippetReport( + original_file=example_file, + api=example_file_root, # for now, file name is in format .py + prompt_file=prompt_file, + num_references=ref_count, + modified_file=updated_code_file, + original_run=original_code_result, + model_response=parsed_response, + model_reponse_file=model_response_file, + modified_run=final_code_result, + fix_status=_determine_fix_status(original_code_result, final_code_result) if final_code_result is not None else FixStatus.NOT_FIXED, + diff=diff + ) + + return snippet_results + + +def fix_examples(library: Library, output_dir: str, db_source: str, model:str, threshold: float = None): + print(f"=== Fixing examples for {library.name} with model {model}") + + report = Report(library) + snippets = {} + examples_path = os.path.join(library.path, "examples") + + if os.path.exists(examples_path): + requirements_file = os.path.join(library.path, "requirements.txt") + + if not os.path.exists(requirements_file): + requirements_file = None + + for example_file in os.listdir(examples_path): + if example_file.startswith('.'): + continue + + snippet_results = fix_example(library=library, example_file=example_file, examples_path=examples_path, requirements_file=requirements_file, output_dir=output_dir, db_source=db_source, model=model, threshold=threshold) + + print(f"Finished fixing {example_file}...") + snippets[example_file] = snippet_results + + # wait 30 seconds between each example + time.sleep(30) + + report.snippets = snippets + report.num_snippets = len(snippets) + report.db_source = db_source + report.num_fixed = len([s for s in snippets.values() if s.fix_status == FixStatus.FIXED]) + report.num_updated = len([s for s in snippets.values() if s.model_response.update_status == UpdateStatus.UPDATE]) + report.num_updated_w_refs = len([s for s in snippets.values() if s.model_response.update_status == UpdateStatus.UPDATE and s.model_response.references is not None and 'No references used' not in s.model_response.references]) + report.num_apis = len(set([s.api for s in snippets.values()])) + + output_json_file = os.path.join(output_dir, "report.json") + jsondata = report.to_json(indent=4) + os.makedirs(os.path.dirname(output_json_file), exist_ok=True) + with open(output_json_file, 'w') as jsonfile: + jsonfile.write(jsondata) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Fix example(s) for a given library') + parser.add_argument('--libpath', type=str, help='absolute path of target library folder', required=True) + parser.add_argument('--outputDir', type=str, help='absolute path of directory to write output to', required=True) + parser.add_argument('--dbsource', type=str, help='Which database to use for retrieval, doc (documentation) or modelonly to not augment with retrieval', required=True) + parser.add_argument('--threshold', type=float, help='Similarity Threshold for retrieval') + parser.add_argument('--examplefile', type=str, help='Specific example file to run on (optional). Only name of example file needed.', required=False) + parser.add_argument("--model", type=str, help="Which model to use for fixing", default="gpt-3.5", choices=["gpt-3.5", "gpt-4"]) + + args = parser.parse_args() + script_dir = os.path.dirname(__file__) + + with open(os.path.join(args.libpath, "library.json"), 'r') as jsonfile: + libinfo = json.loads(jsonfile.read()) + library = Library( + name=libinfo['name'], + ghurl=libinfo['ghurl'], + baseversion=libinfo['baseversion'], + currentversion=libinfo['currentversion'], + path=args.libpath + ) + output_dir = os.path.join(script_dir, args.outputDir) + + if args.examplefile is not None: + # fix a specific example + fix_example(library=library, example_file=args.examplefile, examples_path=os.path.join(library.path, "examples"), requirements_file=os.path.join(library.path, "requirements.txt"), output_dir=output_dir, db_source=args.dbsource, model=args.model, threshold=args.threshold) + else: + # fix all examples for this library + fix_examples(library=library, output_dir=output_dir, model=args.model, db_source=args.dbsource, threshold=args.threshold) diff --git a/src/upgraider/populate_doc_db.py b/src/upgraider/populate_doc_db.py new file mode 100644 index 0000000..487fa36 --- /dev/null +++ b/src/upgraider/populate_doc_db.py @@ -0,0 +1,125 @@ + +from Model import get_embedding +from docutils.utils import Reporter +from docutils.core import publish_file +from docutils.parsers.rst import roles, nodes +from bs4 import BeautifulSoup +import os +from upgraider.Database import Session, DeprecationComment, LibReleaseNote +import re +import json + +def parse_html(html_file: str): + deprecation_items = [] + + with open(html_file, 'r') as f: + html = f.read() + soup = BeautifulSoup(html, 'html.parser') + + for section in soup.find_all("div"): + section_id = section.get('id') + + if section_id and ('deprecat' in section_id.lower() or 'api' in section_id.lower()): + for list_item in section.find_all("li"): + deprecation_items.append(list_item.text) + + for paragraph in section.find_all("p"): + text = paragraph.text + + next_sibling = paragraph.find_next_sibling("pre") + if next_sibling is not None: + text += "\n" + next_sibling.text + + deprecation_items.append(text) + + + return deprecation_items + +def save_items(dep_items: list[str], session, release_id): + for item in dep_items: + embedding = json.dumps(get_embedding(item)) + session.add(DeprecationComment( + content=item, + lib_release_note=release_id, + embedding=embedding + )) + + session.commit() + +def get_version_from_filename(filename: str): + result = re.search(r"(?P 0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)?(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?", filename) + if result is not None: + version = result.group('major') + "." + result.group('minor') + + if result.group('patch') is not None: + version += "." + result.group('patch') + + return version + + return None + +def main(): + script_dir = os.path.dirname(__file__) + roles.register_generic_role('issue', nodes.emphasis) + roles.register_generic_role('ref', nodes.emphasis) + roles.register_generic_role('meth', nodes.emphasis) + roles.register_generic_role('class', nodes.emphasis) + roles.register_generic_role('func', nodes.emphasis) + roles.register_generic_role('attr', nodes.emphasis) + + libraries_folder = os.path.join(script_dir, "../../libraries") + for lib_dir in os.listdir(libraries_folder): + if lib_dir.startswith("."): + continue + + print(f"Populating DB with release note data for {lib_dir}...") + + session = Session() + + lib_path = os.path.join(script_dir, f"../../libraries/{lib_dir}") + + for note in os.listdir(os.path.join(lib_path, "releasenotes")): + version = get_version_from_filename(note) + print(f"Processing release note {note} for version {version}...") + if note.startswith(".") or not note.endswith(".rst"): + continue + + lib_release = session.query(LibReleaseNote).filter(LibReleaseNote.library == lib_dir).filter(LibReleaseNote.filename == note).first() + + if lib_release is not None: + continue # Release note already exists in DB + + lib_release = LibReleaseNote( + library=lib_dir, + filename=note, + version=version + ) + + session.add(lib_release) + session.commit() + + release_id = lib_release.id + + base_name = os.path.splitext(note)[0] + output_html_file = os.path.join(lib_path, "releasenotes", f"{base_name}.html") + + source_path = os.path.join(lib_path, "releasenotes", note) + + publish_file(source_path=source_path, writer_name='html', destination_path=output_html_file, settings_overrides={'report_level':Reporter.SEVERE_LEVEL}) + deprecated_items = parse_html(output_html_file) + + + print(f"Found {len(deprecated_items)} deprecated items for {note}") + save_items(deprecated_items, session=session, release_id=release_id) + os.remove(output_html_file) + print("Finished embedding and saving items") + + session.commit() + session.close() + + + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/upgraider/resources/chat_template.txt b/src/upgraider/resources/chat_template.txt new file mode 100644 index 0000000..c5ae127 --- /dev/null +++ b/src/upgraider/resources/chat_template.txt @@ -0,0 +1,18 @@ +Given the provided numbered reference information, decide if the provided code needs to be updated. +Focus only on updates that do not change the code's functionality and are related to outdated, deprecated, or non-existent APIs. +You must reply in the following exact numbered format. +1. ```The full updated code snippet in a fenced code block``` or an empty fenced code block if you don't want to update the code +2. Reason for update (if any) +3. List of reference numbers used (if any) to update the code. If none of the references below were useful, say 'No references used' + +Provided code: + +``` +$original_code +``` + +Provided reference information: + +$references + +Your Response: \ No newline at end of file diff --git a/src/upgraider/resources/database/.gitattributes b/src/upgraider/resources/database/.gitattributes new file mode 100644 index 0000000..8ada963 --- /dev/null +++ b/src/upgraider/resources/database/.gitattributes @@ -0,0 +1 @@ +*.db filter=lfs diff=lfs merge=lfs -text diff --git a/src/upgraider/resources/database/releasenotes.db b/src/upgraider/resources/database/releasenotes.db new file mode 100644 index 0000000..25841f7 --- /dev/null +++ b/src/upgraider/resources/database/releasenotes.db @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6440ced3968a37ec3dd346f36884c8282698cf2c6f84fc5c34e83cee902882 +size 562692096 diff --git a/src/upgraider/run_code.py b/src/upgraider/run_code.py new file mode 100644 index 0000000..34edb5d --- /dev/null +++ b/src/upgraider/run_code.py @@ -0,0 +1,69 @@ +import subprocess +import os +import re +import argparse +from Report import RunResult, RunProblem, ProblemType +from apiexploration.Library import Library + +from dotenv import load_dotenv + +load_dotenv() +script_dir = os.path.dirname(__file__) + +def find_attribute_error(error_msg: str): + attribute_err = re.search(r"AttributeError: (.*) object has no attribute (.*)\n",error_msg) + if attribute_err is not None: + return RunProblem(type=ProblemType.ERROR, name="AttributeError", element_name=attribute_err.group(2), target_obj=attribute_err.group(1)) + + attribute_err = re.search(r"AttributeError: module (.*) has no attribute (.*)\n", error_msg) + if attribute_err is not None: + return RunProblem(type=ProblemType.ERROR, name="AttributeError", element_name=attribute_err.group(2), target_obj=attribute_err.group(1)) + +def find_type_error(error_msg: str): + typeerror = re.search(r"TypeError: (.*) got an unexpected keyword argument (.*)\n", error_msg) + if typeerror is not None: + return RunProblem(type=ProblemType.ERROR, name="TypeError", element_name=typeerror.group(2), target_obj=typeerror.group(1)) + +def run_code(library: Library, file: str, requirements_file: str) -> RunResult: + print(f"Running {file}...") + + problem_free = True + run_result = RunResult(problem_free) + + try: + if requirements_file is not None: + result = subprocess.run([f"{script_dir}/run_code.sh", file, library.name, library.currentversion, requirements_file], check=True, stderr=subprocess.PIPE) + else: + result = subprocess.run([f"{script_dir}/run_code.sh", file, library.name, library.currentversion], check=True, stderr=subprocess.PIPE) + + error_msg = result.stderr.decode('utf-8') + + #usually DeprecationWarning or FutureWarning + future_res = re.search(r"(.*\.py):(\d*): (.*)Warning: (.*) (is|has been) deprecated (.*)\n",result.stderr.decode('utf-8')) + if future_res is not None: + warning = RunProblem(type=ProblemType.DEPRECATION_WARNING, name=future_res.group(3), element_name=future_res.group(4)) + run_result.problem = warning + run_result.problem_free = False + run_result.msg = error_msg + + except subprocess.CalledProcessError as e: + error_msg = e.stderr.decode('utf-8') + run_result.problem_free = False + run_result.msg = error_msg + + # look for more specific errors + if "AttributeError" in error_msg: + run_result.problem = find_attribute_error(error_msg) + elif "TypeError" in error_msg: + run_result.problem = find_type_error(error_msg) + + + return run_result + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--file", help="The full path of the python file to run") + + args = parser.parse_args() + + print(run_code(f"{script_dir}/../../data/{args.file}")) \ No newline at end of file diff --git a/src/upgraider/run_code.sh b/src/upgraider/run_code.sh new file mode 100755 index 0000000..f15030f --- /dev/null +++ b/src/upgraider/run_code.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +filename=$1 +libname=$2 +libversion=$3 +reqfile=$4 + +echo "SCRATCH_VENV: $SCRATCH_VENV" + +cd $SCRATCH_VENV + +source .venv/bin/activate + +pip install --disable-pip-version-check $libname==$libversion + +if [[ ! -z "$reqfile" ]] ; then + pip install --disable-pip-version-check -r $reqfile +fi + +echo "Running $filename in venv" + +python $filename + +deactivate \ No newline at end of file diff --git a/src/upgraider/run_experiment.py b/src/upgraider/run_experiment.py new file mode 100644 index 0000000..f0ac13c --- /dev/null +++ b/src/upgraider/run_experiment.py @@ -0,0 +1,58 @@ +import os +import logging as log +from fix_lib_examples import fix_examples +from apiexploration.Library import Library +from upgraider.Report import DBSource +import json +import argparse + + +def main(): + threshold = 0.5 + print("Starting experiment...") + script_dir = os.path.dirname(__file__) + + parser = argparse.ArgumentParser(description='Run upgraider on all library examples') + parser.add_argument('--outputDir', type=str, help='directory to write output to', required=True) + parser.add_argument("--model", type=str, help="Which model to use for fixing", default="gpt-3.5", choices=["gpt-3.5", "gpt-4"]) + + args = parser.parse_args() + + libraries_folder = os.path.join(script_dir, "../../libraries") + output_dir = args.outputDir + model = args.model + + for lib_dir in os.listdir(libraries_folder): + if lib_dir.startswith('.'): + continue + lib_path = os.path.join(libraries_folder, lib_dir) + with open(os.path.join(libraries_folder, f"{lib_dir}/library.json"), 'r') as jsonfile: + libinfo = json.loads(jsonfile.read()) + library = Library( + name=libinfo['name'], + ghurl=libinfo['ghurl'], + baseversion=libinfo['baseversion'], + currentversion=libinfo['currentversion'], + path=lib_path + ) + + print(f"Fixing examples for {library.name} with no references...") + fix_examples( + library=library, + output_dir=os.path.join(output_dir, lib_dir, DBSource.modelonly.value), + db_source=DBSource.modelonly.value, + threshold=threshold, + model = model + ) + + print(f"Fixing examples for {library.name} with documentation...") + fix_examples( + library=library, + output_dir=os.path.join(output_dir, lib_dir, DBSource.documentation.value), + db_source=DBSource.documentation.value, + threshold=threshold, + model = model + ) + +if __name__ == "__main__": + main() diff --git a/src/upgraider/update_brushes_code.py b/src/upgraider/update_brushes_code.py new file mode 100644 index 0000000..d9ff093 --- /dev/null +++ b/src/upgraider/update_brushes_code.py @@ -0,0 +1,25 @@ +from upgraider.Model import fix_suggested_code, UpdateStatus +import sys +import textwrap + +def create_comment(reason: str) -> str: + lines = textwrap.wrap(reason, width=80) + lines = ["# " + line for line in lines] + return "\n".join(lines) + +def main(): + code = sys.stdin.read() + prompt_text, model_response, parsed_response, ref_count= fix_suggested_code(code, model="gpt-4") + + # stdout will be empty if there is no update + if (parsed_response.update_status == UpdateStatus.NO_UPDATE): + return + + if parsed_response.reason is not None: + print("# I updated this code for you because:") + print(create_comment(parsed_response.reason)) + + print(parsed_response.updated_code) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..cb9f46b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +import os +import sys + + +# Insert the root of the project into the path so we can import from +# the `tests` package. +root_path = os.path.abspath(os.path.join(__file__, "..", "..")) +sys.path.insert(0, root_path) diff --git a/tests/test_Model.py b/tests/test_Model.py new file mode 100644 index 0000000..06813e0 --- /dev/null +++ b/tests/test_Model.py @@ -0,0 +1,139 @@ +from upgraider.Model import UpdateStatus, parse_model_response + + +def test_correctly_formatted_response(): + reference = "32639" + updated_code = """ +import numpy as np + +import pandas as pd + +cat = pd.Categorical(["a", "b", "c", "a"], ordered=True) +dense_cat = np.asarray(cat) +print(dense_cat) +""" + reason = "The method Categorical.to_dense() has been deprecated and replaced with np.asarray(cat)." + + response = f""" +1. ```{updated_code}``` +2. {reason} +3. {reference} + """ + + result = parse_model_response(response) + assert result.update_status == UpdateStatus.UPDATE + assert result.references == reference + assert result.updated_code == updated_code.strip() + assert result.reason == reason + +def test_incorrect_short_repsonse(): + reference = "No references used" + + response = f""" +{reference} + """ + + result = parse_model_response(response) + assert result.update_status == UpdateStatus.NO_RESPONSE + +def test_incorrect_long_repsonse(): + response = """ + No updates needed. +Reason: The code is using valid and up-to-date numpy APIs to create an array and sort it. No deprecated or non-existent APIs are being used. +References used: No references used. +""" + result = parse_model_response(response) + + assert result.update_status == UpdateStatus.NO_UPDATE + assert result.references is None + +def test_reason_not_enumerated(): + reason = "Here is the model's reason." + references = "3" + response= f""" +Possible response: + +``` +some code +``` + +Reason for update: {reason} + +List of reference numbers used: {references} +""" + + result = parse_model_response(response) + assert result.update_status == UpdateStatus.UPDATE + assert result.reason == reason + assert result.references == references + +def test_reason_enumerated(): + reason = "Here is the model's reason." + reference = "3" + response= f""" +Possible response: + +``` +some code +``` + +2. {reason} +3. {reference} +""" + result = parse_model_response(response) + assert result.update_status == UpdateStatus.UPDATE + assert result.reason == reason + assert result.references == reference + +def test_no_reason(): + response= f""" +``` +some code +``` + +- Reason for update: None +- List of reference numbers used: No references used +""" + result = parse_model_response(response) + assert result.update_status == UpdateStatus.UPDATE + assert result.reason is None + assert result.references is None + +def test_codeexists_but_not_updated(): + response = f""" +Possible response: + +``` +# No changes needed +import pandas as pd +some code +``` + +- Reason for update: None +- List of reference numbers used: No references used +""" + result = parse_model_response(response) + assert result.update_status == UpdateStatus.NO_UPDATE + assert result.reason is None + assert result.references is None + +def test_enumerated_reason(): + reason1 = "reason 1" + reason2 = "reason 2" + response= f""" +``` +some code +``` + +Reason for update: + +- {reason1} +- {reason2} + +List of reference numbers used: + +- 6 +""" + result = parse_model_response(response) + assert result.update_status == UpdateStatus.UPDATE + assert result.reason == f"- {reason1}\n- {reason2}" diff --git a/tests/test_fix_lib_examples.py b/tests/test_fix_lib_examples.py new file mode 100644 index 0000000..bab5fe2 --- /dev/null +++ b/tests/test_fix_lib_examples.py @@ -0,0 +1,42 @@ +from upgraider.fix_lib_examples import _fix_imports + +def test_basic_fix_imports(): + old_code = """ +import pandas + +idx = pandas.Index([0,'1',3, 'fooo']) +if idx.is_mixed(): + print('mixed type') + """ + + new_code = """ +import pandas.api.types as pdtypes + +idx = pandas.Index([0,'1',3, 'fooo']) +if pdtypes.is_any_real_numeric_dtype(idx): + print('mixed type') + """ + + expected_code = "import pandas\n" + new_code + + assert _fix_imports(old_code, new_code) == expected_code + +def test_module_fix_imports(): + old_code = """ +from pandas import Index +from modulex import y as z + +print("hello") + """ + + new_code = """ +import pandas.api.types as pdtypes + +print("hello there") + """ + + # because we always add imports to the top of the file, they will be added in reverse order + # so just make sure the missing imports are there + fixed_code = _fix_imports(old_code, new_code) + assert "from modulex import y as z" in fixed_code + assert "from pandas import Index" in fixed_code