测试代码#
import set_env
import logging
import multiprocessing as mp
import sys
import time
from pathlib import Path
from d2py.utils.log_config import config_logging
root_dir = Path(".").resolve()
sys.path.extend([str(root_dir.parents[2]/"tests/gym-multigrid")])
logger_dir = root_dir/".temp"
logger_dir.mkdir(parents=True, exist_ok=True)
temp_dir = root_dir/"images"
temp_dir.mkdir(parents=True, exist_ok=True)
logger_name = "drlhp"
logger = logging.getLogger(logger_name)
config_logging(f'{logger_dir}/{logger_name}.log', logger_name, maxBytes=50000, backupCount=2)
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import set_env
2 import logging
3 import multiprocessing as mp
File ~/work/pybook/pybook/doc/libs/drlhp/set_env.py:8
6 os.environ["GLOG_minloglevel"] = "true"
7 import logging
----> 8 import tensorflow as tf
9 tf.get_logger().setLevel(logging.ERROR)
10 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
ModuleNotFoundError: No module named 'tensorflow'
import argparse
import random
import numpy as np
import torch
import gymnasium as gym
# import pybullet_envs
# import matplotlib.pyplot as plt
# model
from utils.model.ppo import PPO
from utils.config import get_config
# wrappers
from utils.human_feedback_wrapper import HumanFeedback, SyntheticFeedback
from utils.reward_wrapper import FeedbackReward
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[2], line 10
5 import gymnasium as gym
6 # import pybullet_envs
7 # import matplotlib.pyplot as plt
8
9 # model
---> 10 from utils.model.ppo import PPO
11 from utils.config import get_config
12 # wrappers
File ~/work/pybook/pybook/doc/libs/drlhp/utils/model/ppo.py:8
6 from .network_utils import np2torch #build_mlp, device,
7 # from .policy import CategoricalPolicy, GaussianPolicy
----> 8 from .policy_gradient import PolicyGradient
10 class PPO(PolicyGradient):
12 def __init__(self, env, eval_env, config, seed, logger=None):
File ~/work/pybook/pybook/doc/libs/drlhp/utils/model/policy_gradient.py:3
1 import numpy as np
2 import torch
----> 3 import gym
4 import os
5 from .general import get_logger, export_plot
ModuleNotFoundError: No module named 'gym'
from dataclasses import dataclass
@dataclass
class EnvConfig:
env_name: str # ["cartpole", "pendulum", "cheetah"]
seed: int = 1
entropy: float = 0.1 # [0.0, 0.01, 0.05, 0.1]
synthetic: bool = False
constant_ask: int = 1000 # [100, 1000, 10000]
collect_initial: int = 0 # [0, 50, 200]
num_batches: int = 100
def __post_init__(self):
self.config = get_config(
self.env_name,
self.seed,
self.entropy,
self.constant_ask,
self.collect_initial,
self.num_batches
)
env_config = EnvConfig("pendulum")
if env_config.synthetic:
env = SyntheticFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)
else:
env = HumanFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 env_config = EnvConfig("pendulum")
2 if env_config.synthetic:
3 env = SyntheticFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)
File <string>:10, in __init__(self, env_name, seed, entropy, synthetic, constant_ask, collect_initial, num_batches)
Cell In[3], line 14, in EnvConfig.__post_init__(self)
13 def __post_init__(self):
---> 14 self.config = get_config(
15 self.env_name,
16 self.seed,
17 self.entropy,
18 self.constant_ask,
19 self.collect_initial,
20 self.num_batches
21 )
NameError: name 'get_config' is not defined
eval_env = gym.make(env_config.config.env_name)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 eval_env = gym.make(env_config.config.env_name)
NameError: name 'env_config' is not defined
# train model
observation = eval_env.reset()
eval_env.reset()
model = PPO(env, eval_env, env_config.config, env_config.seed)
model.train()
print (f"{env.pref_db.total_labeled} preference collected total")
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 2
1 # train model
----> 2 observation = eval_env.reset()
3 eval_env.reset()
4 model = PPO(env, eval_env, env_config.config, env_config.seed)
NameError: name 'eval_env' is not defined
obs = np.arange(100*100*3).reshape(100, 100, 3)
np.array([np.array(obs)]).shape
(1, 100, 100, 3)