feat: implement the sliding tile puzzle env (#189)

Co-authored-by: Sasha <[email protected]> Co-authored-by: Clement Bonnet <[email protected]>
instadeepai · Mar 14, 2024 · a903c4f · a903c4f
1 parent bae3ab8
commit a903c4f
Show file tree

Hide file tree

Showing 24 changed files with 1,242 additions and 11 deletions.
diff --git a/docs/api/environments/sliding_tile_puzzle.md b/docs/api/environments/sliding_tile_puzzle.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.logic.sliding_tile_puzzle.env.SlidingTilePuzzle
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/env_anim/sliding_tile_puzzle.gif b/docs/env_anim/sliding_tile_puzzle.gif
diff --git a/docs/env_img/sliding_tile_puzzle.png b/docs/env_img/sliding_tile_puzzle.png
diff --git a/docs/environments/sliding_tile_puzzle.md b/docs/environments/sliding_tile_puzzle.md
@@ -0,0 +1,52 @@
+# Sliding Tile Puzzle Environment
+
+<p align="center">
+    <img src="../env_anim/sliding_tile_puzzle.gif" width="500"/>
+</p>
+
+This is a Jax JIT-able implementation of the classic [Sliding Tile Puzzle game](https://en.wikipedia.org/wiki/Sliding_puzzle).
+
+The Sliding Tile Puzzle game is a classic puzzle that challenges a player to slide (typically flat) pieces along certain routes (usually on a board) to establish a certain end-configuration. The pieces to be moved may consist of simple shapes, or they may be imprinted with colors, patterns, sections of a larger picture (like a jigsaw puzzle), numbers, or letters.
+
+The puzzle is often 3×3, 4×4 or 5×5 in size and made up of square tiles that are slid into a square base, larger than the tiles by one tile space, in a specific large configuration. Tiles are moved/arranged by sliding an adjacent tile into a position occupied by the missing tile, which creates a new space. The sliding puzzle is mechanical and requires the use of no other equipment or tools.
+
+## Observation
+
+The observation in the Sliding Tile Puzzle game includes information about the puzzle, the position of the empty tile, and the action mask.
+
+- `puzzle`: jax array (int32) of shape `(grid_size, grid_size)`, representing the current game state. Each element in the array corresponds to a puzzle tile. The tile represented by 0 is the empty tile.
+
+  - Here is an example of a random observation of the game board:
+
+        ```
+        [[ 1 2 3 4]
+         [ 5 6 7 8]
+         [ 9 10 0 12]
+         [ 13 14 15 11]]
+        ```
+  - In this array, the tile represented by 0 is the empty tile that can be moved.
+
+- `empty_tile_position`: a tuple (int32) of shape `(2,)` representing the position of the empty tile in the grid. For example, (2, 2) would represent the third row and the third column in a zero-indexed grid.
+
+- `action_mask`: jax array (bool) of shape `(4,)`, indicating which actions are valid in the current state of the environment. The actions include moving the empty tile up, right, down, or left. For example, an action mask `[True, False, True, False]` means that the valid actions are to move the empty tile upward or downward.
+
+- `step_count`: jax array (int32) of shape `()`, current number of steps in the episode.
+
+## Action
+
+The action space is a `DiscreteArray` of integer values in `[0, 1, 2, 3]`. Specifically, these four actions correspond to moving the empty tile: up (0), right (1), down (2), or left (3).
+
+## Reward
+
+The reward could be either:
+
+- **DenseRewardFn**: This reward function provides a dense reward based on the difference of correctly placed tiles between the current state and the next state. The reward is positive for each newly correctly placed tile and negative for each newly incorrectly placed tile.
+
+- **SparseRewardFn**: This reward function provides a sparse reward, only rewarding when the puzzle is solved.
+The reward is 1 if the puzzle is solved, and 0 otherwise.
+
+The goal in all cases is to solve the puzzle in a way that maximizes the reward.
+
+## Registered Versions 📖
+
+- `SlidingTilePuzzle-v0`, the Sliding Tile Puzzle with a grid size of 5x5.
diff --git a/jumanji/__init__.py b/jumanji/__init__.py
@@ -134,3 +134,7 @@
 register(id="Sokoban-v0", entry_point="jumanji.environments:Sokoban")
 # Pacman - minimal version of Atarti Pacman game
 register(id="PacMan-v0", entry_point="jumanji.environments:PacMan")
+# SlidingTilePuzzle - A sliding tile puzzle environment with the default grid size of 5x5.
+register(
+    id="SlidingTilePuzzle-v0", entry_point="jumanji.environments:SlidingTilePuzzle"
+)
diff --git a/jumanji/environments/__init__.py b/jumanji/environments/__init__.py
@@ -14,12 +14,20 @@
 
 import sys
 
-from jumanji.environments.logic import game_2048, minesweeper, rubiks_cube
+from jumanji.environments.logic import (
+    game_2048,
+    graph_coloring,
+    minesweeper,
+    rubiks_cube,
+    sliding_tile_puzzle,
+    sudoku,
+)
 from jumanji.environments.logic.game_2048.env import Game2048
 from jumanji.environments.logic.graph_coloring.env import GraphColoring
-from jumanji.environments.logic.minesweeper import Minesweeper
-from jumanji.environments.logic.rubiks_cube import RubiksCube
-from jumanji.environments.logic.sudoku import Sudoku
+from jumanji.environments.logic.minesweeper.env import Minesweeper
+from jumanji.environments.logic.rubiks_cube.env import RubiksCube
+from jumanji.environments.logic.sliding_tile_puzzle.env import SlidingTilePuzzle
+from jumanji.environments.logic.sudoku.env import Sudoku
 from jumanji.environments.packing import bin_pack, flat_pack, job_shop, knapsack, tetris
 from jumanji.environments.packing.bin_pack.env import BinPack
 from jumanji.environments.packing.flat_pack.env import FlatPack
@@ -44,7 +52,7 @@
 from jumanji.environments.routing.cvrp.env import CVRP
 from jumanji.environments.routing.maze.env import Maze
 from jumanji.environments.routing.mmst.env import MMST
-from jumanji.environments.routing.multi_cvrp import MultiCVRP
+from jumanji.environments.routing.multi_cvrp.env import MultiCVRP
 from jumanji.environments.routing.pac_man.env import PacMan
 from jumanji.environments.routing.robot_warehouse.env import RobotWarehouse
 from jumanji.environments.routing.snake.env import Snake

diff --git a/jumanji/environments/logic/sliding_tile_puzzle/__init__.py b/jumanji/environments/logic/sliding_tile_puzzle/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from jumanji.environments.logic.sliding_tile_puzzle.env import SlidingTilePuzzle
+from jumanji.environments.logic.sliding_tile_puzzle.types import Observation, State
diff --git a/jumanji/environments/logic/sliding_tile_puzzle/conftest.py b/jumanji/environments/logic/sliding_tile_puzzle/conftest.py
@@ -0,0 +1,42 @@
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import jax
+import jax.numpy as jnp
+import pytest
+
+from jumanji.environments.logic.sliding_tile_puzzle import SlidingTilePuzzle
+from jumanji.environments.logic.sliding_tile_puzzle.generator import RandomWalkGenerator
+from jumanji.environments.logic.sliding_tile_puzzle.types import State
+
+
+@pytest.fixture
+def sliding_tile_puzzle() -> SlidingTilePuzzle:
+    """Instantiates a default SlidingTilePuzzle environment."""
+    generator = RandomWalkGenerator(grid_size=3)
+    return SlidingTilePuzzle(generator=generator)
+
+
+@pytest.fixture
+def state() -> State:
+    key = jax.random.PRNGKey(0)
+    empty_pos = jnp.array([0, 0])
+    puzzle = jnp.array(
+        [
+            [0, 1, 3],
+            [4, 2, 5],
+            [7, 8, 6],
+        ]
+    )
+    return State(puzzle=puzzle, empty_tile_position=empty_pos, key=key, step_count=0)
diff --git a/jumanji/environments/logic/sliding_tile_puzzle/constants.py b/jumanji/environments/logic/sliding_tile_puzzle/constants.py
@@ -0,0 +1,24 @@
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import jax.numpy as jnp
+
+EMPTY_TILE = 0
+INITIAL_STEP_COUNT = 0
+
+UP = [-1, 0]
+RIGHT = [0, 1]
+DOWN = [1, 0]
+LEFT = [0, -1]
+
+MOVES = jnp.array([UP, RIGHT, DOWN, LEFT])