ADD: openAI tests

061af960 · DAAVVE · df87f9c8 · 061af960 · 061af960
Commit 061af960 authored 5 years ago by DAAVVE
--- a/documentation/py4j_demo/fetch.py
+++ b/documentation/py4j_demo/fetch.py
+import numpy as np
+import gym
+
+
+env = gym.make('FetchReach-v0')
+obs = env.reset()
+done = False
+
+def policy(observation, desired_goal):
+    # Here you would implement your smarter policy. In this case,
+    # we just sample random actions.
+    return env.action_space.sample()
+
+while not done:
+    action = policy(obs['observation'], obs['desired_goal'])
+    obs, reward, done, info = env.step(action)
+
+    # If we want, we can substitute a goal here and re-compute
+    # the reward. For instance, we can just pretend that the desired
+    # goal was what we achieved all along.
+    substitute_goal = obs['achieved_goal'].copy()
+    substitute_reward = env.compute_reward(
+        obs['achieved_goal'], substitute_goal, info)
+    print('reward is {}, substitute_reward is {}'.format(
+        reward, substitute_reward))
--- a/documentation/py4j_demo/test.py
+++ b/documentation/py4j_demo/test.py
+import gym
+env = gym.make('FetchReach-v1')
+env.reset()
+
+for _ in range(1000):
+    env.render()
+    env.step(env.action_space.sample()) # take a random action
+env.close()