Skip to content

Commit f225b1f

Browse files
committed
Add basic mujoco env
1 parent 9bf2e24 commit f225b1f

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

mujoco_env.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# This defines an openai gym environment to walk with the wolfgang robot in mujoco.
2+
3+
import os
4+
import mujoco
5+
import gym
6+
import cv2
7+
from tqdm import tqdm
8+
9+
class WolfgangMujocoEnv(gym.Env):
10+
def __init__(self):
11+
# Load the model
12+
self.model = mujoco.MjModel.from_xml_path("/home/florian/Projekt/bitbots/bitbots_main/bitbots_wolfgang/wolfgang_description/urdf/scene.xml")
13+
self.data = mujoco.MjData(self.model)
14+
self.renderer = mujoco.Renderer(self.model, 415, 415)
15+
16+
# Extract joint names from the model (they are non unique!)
17+
# We do this by reading a null terminated string at the address of the joint name in the joint names buffer
18+
self.joint_ids_to_names = [self.model.names[self.model.name_jntadr[i]:].decode('utf-8').split('\0')[0] for i in range(self.model.njnt)]
19+
20+
# Get which values of the observation belong to which joint
21+
self.joint_id_observation_start = self.model.jnt_qposadr
22+
23+
# Get the actuator names (they are also not unique)
24+
self.actuator_ids_to_names = [self.model.names[self.model.name_actuatoradr[i]:].decode('utf-8').split('\0')[0] for i in range(self.model.nu)]
25+
26+
27+
# Define the action space
28+
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(20,))
29+
# Define the observation space
30+
self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(20,))
31+
32+
def reset(self):
33+
super().reset()
34+
35+
# Reset the simulation
36+
mujoco.mj_resetData(self.model, self.data)
37+
return self.data.qpos, self.data.qvel
38+
39+
def step(self, action):
40+
41+
# Apply the action
42+
for i in range(len(action)):
43+
self.data.ctrl[i] = action[i]
44+
45+
# Perform an action
46+
mujoco.mj_step(self.model, self.data)
47+
48+
# Get the observation
49+
observation = self.data.qpos[self.joint_id_observation_start[1]:]
50+
51+
# Calculate the reward
52+
reward = self.data.xpos[1][2]
53+
return observation, reward, False, {}
54+
55+
def render(self):
56+
# Render the simulation
57+
self.renderer.update_scene(self.data)
58+
return self.renderer.render()
59+
60+
def close(self):
61+
super().close()
62+
# Explicitly delete the objects to free memory and get a more deterministic behavior
63+
del self.data
64+
del self.renderer
65+
del self.model
66+
67+
68+
if __name__ == "__main__":
69+
env = WolfgangMujocoEnv()
70+
env.reset()
71+
last_frame = -1000
72+
for _ in tqdm(range(1000)):
73+
print(env.step(env.action_space.sample()))
74+
time = env.data.time
75+
if time - last_frame > 1/60:
76+
last_frame = time
77+
cv2.imshow("Wolfgang", cv2.cvtColor(env.render(), cv2.COLOR_RGB2BGR))
78+
cv2.waitKey(1)
79+
cv2.destroyAllWindows()
80+
env.close()

0 commit comments

Comments
 (0)