Import random. Numpy, as np. torch.nn.functional, as F.
Import matplotlib.pyplot into plt
Import dataclass
Type import Tuple Dict List
Dataset and DataLoader can be imported from torch.utils.data
try:
Import tqdm from tqdm.auto
The exception:
def tqdm(x, **kwargs): return x
SEED = 7
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if device.type == "cuda":
torch.backends.cudnn.benchmark = True
@dataclass
WorldConfig class:
Grid_size int = 8.
The cell_px value is 14
Maximum number of steps: 45
n_obstacles = 8
Integer spawn_margin = 1
Class GridWorldRGBNoPIL
ACTIONS = {0:(0,-1),1:(0,1),2:(-1,0),3:(1,0),4:(0,0)}
ACTION_NAMES = {0:"UP",1:"DOWN",2:"LEFT",3:"RIGHT",4:"STAY"}
def __init__(self, cfg: WorldConfig):
Self.cfg = Cfg
self.reset()
def reset(self) -> Dict:
Grid Size g = Self.cfg.grid_size
self.steps = 0
def sample_empty(exclude=set()):
While True
x = random.randint(self.cfg.spawn_margin, g-1-self.cfg.spawn_margin)
y = random.randint(self.cfg.spawn_margin, g-1-self.cfg.spawn_margin)
Return (xyz) if (xyz) is not includedReturn
self.obstacles = set()
ax, ay = sample_empty()
gx, gy = sample_empty(exclude={(ax,ay)})
used = {(ax,ay),(gx,gy)}
for _ in range(self.cfg.n_obstacles):
ox, oy = sample_empty(exclude=used)
self.obstacles.add((ox,oy))
used.add((ox,oy))
self.agent = (ax,ay)
self.goal = (gx,gy)
return {"image": self._render_u8()}
def _in_bounds(self, x, y):
Returning 0 to float:
x,y = pos; gx,gy = self.goal
Return abs (x-gx), plus abs (y-gy).
def _state_vector(self) -> np.ndarray:
g = grid_size.cfg - 1.
ax,ay = self.agent; gx,gy = self.goal
Return np.array ([ax/g, ay/g, gx/g, gy/g], dtype=np.float32)
def step(self, action: int):
self.steps += 1
Self.ACTIONS = dx + dyThe word is pronounced as ob = '".info =[int(action)]
x,y = self.agent
nx, ny = x+dx, y+dy
if self._in_bounds(nx,ny) and (nx,ny) not in self.obstacles:
self.agent = (nx,ny)
done = (self.agent == self.goal) or (self.steps >= self.cfg.max_steps)
d_prev = self._dist_to_goal((x,y))
d_now = self._dist_to_goal(self.agent)
reward = 0.1*(d_prev - d_now) + (1.0 if self.agent == self.goal else 0.0)
obs = {"image": self._render_u8()}
info = {"state": self._state_vector()}
Information about return, reward, and bool (done).
def _render_u8(self) -> np.ndarray:
g, s = self.cfg.grid_size, self.cfg.cell_px
H = W = g*s
Bg = array(np)[245,245,245], np.uint8)
Gridline = array(np)[220,220,220], np.uint8)
obstacle_c = np.array([220,70,70], np.uint8)
goal_c = np.array([60,180,75], np.uint8)
agent_c = np.array([65,105,225], np.uint8)
Img = (H,W3, np.uint8) img[...] = bg
Image[::s,:,:] Gridline
Image[:,::s,:] Gridline
def paint_cell(x,y,color):
y0,y1 = y*s,(y+1)*s
x0,x1 = x*s,(x+1)*s
Img[y0+1:y1-1, x0+1:x1-1] Color
Paint_cell (ox_oy_obstacle_c, ox_oy_obstacle_c, ox_oy_obstacle_c).
gx,gy = self.goal; paint_cell(gx,gy, goal_c)
ax,ay = self.agent; paint_cell(ax,ay, agent_c)
Return image
WorldConfig cfg()
env = GridWorldRGBNoPIL(cfg)
plt.figure(figsize=(3,3))
plt.imshow(env.reset()["image"]); plt.axis("off"); plt.title("No-Pillow observation"); plt.show()
def to_tensor_img_u8(img_u8: np.ndarray) -> torch.Tensor:
return torch.from_numpy(img_u8).permute(2,0,1).float() / 255.0
Trending
- How to Build a Lightweight Vision-Language-Action-Inspired Embodied Agent with Latent World Modeling and Model Predictive Control
- Meet Talkie-1930, a 13B LLM Open-Weight Trained in Pre-1931 English Texts for Historical Reasoning and Generalization Research
- Google tests AI chatbot for YouTube
- Some Musk v. Altman jurors don’t like Elon Musk
- OpenMOSS Releases the MOSS Audio: A Foundation Open Source Model for Sound, Speech, Music and Time-Aware Reasoning.
- Elon Musk Boosts New Yorker’s Sam Altman Exposé on X as Trial Begins
- Create a reinforcement learning powered agent that learns to retrieve relevant long-term memories for accurate LLM question answering
- The AlphaGo Man Thinks AI’s Going the Wrong Way

