Skip to content
Snippets Groups Projects
Commit 061af960 authored by DAAVVE's avatar DAAVVE
Browse files

ADD: openAI tests

parent df87f9c8
No related branches found
No related tags found
1 merge request!4Exp rein
import numpy as np
import gym
env = gym.make('FetchReach-v0')
obs = env.reset()
done = False
def policy(observation, desired_goal):
# Here you would implement your smarter policy. In this case,
# we just sample random actions.
return env.action_space.sample()
while not done:
action = policy(obs['observation'], obs['desired_goal'])
obs, reward, done, info = env.step(action)
# If we want, we can substitute a goal here and re-compute
# the reward. For instance, we can just pretend that the desired
# goal was what we achieved all along.
substitute_goal = obs['achieved_goal'].copy()
substitute_reward = env.compute_reward(
obs['achieved_goal'], substitute_goal, info)
print('reward is {}, substitute_reward is {}'.format(
reward, substitute_reward))
import gym
env = gym.make('FetchReach-v1')
env.reset()
for _ in range(1000):
env.render()
env.step(env.action_space.sample()) # take a random action
env.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment