测试代码

测试代码#

import set_env
import logging
import multiprocessing as mp
import sys
import time
from pathlib import Path
from d2py.utils.log_config import config_logging
root_dir = Path(".").resolve()
sys.path.extend([str(root_dir.parents[2]/"tests/gym-multigrid")])

logger_dir = root_dir/".temp"
logger_dir.mkdir(parents=True, exist_ok=True)
temp_dir = root_dir/"images"
temp_dir.mkdir(parents=True, exist_ok=True)

logger_name = "drlhp"
logger = logging.getLogger(logger_name)
config_logging(f'{logger_dir}/{logger_name}.log', logger_name, maxBytes=50000, backupCount=2)
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import set_env
      2 import logging
      3 import multiprocessing as mp

File ~/work/pybook/pybook/doc/libs/drlhp/set_env.py:8
      6 os.environ["GLOG_minloglevel"] = "true"
      7 import logging
----> 8 import tensorflow as tf
      9 tf.get_logger().setLevel(logging.ERROR)
     10 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

ModuleNotFoundError: No module named 'tensorflow'
import argparse
import random
import numpy as np
import torch
import gymnasium as gym
# import pybullet_envs
# import matplotlib.pyplot as plt

# model
from utils.model.ppo import PPO
from utils.config import get_config
# wrappers
from utils.human_feedback_wrapper import HumanFeedback, SyntheticFeedback
from utils.reward_wrapper import FeedbackReward
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[2], line 10
      5 import gymnasium as gym
      6 # import pybullet_envs
      7 # import matplotlib.pyplot as plt
      8 
      9 # model
---> 10 from utils.model.ppo import PPO
     11 from utils.config import get_config
     12 # wrappers

File ~/work/pybook/pybook/doc/libs/drlhp/utils/model/ppo.py:8
      6 from .network_utils import np2torch #build_mlp, device, 
      7 # from .policy import CategoricalPolicy, GaussianPolicy
----> 8 from .policy_gradient import PolicyGradient
     10 class PPO(PolicyGradient):
     12     def __init__(self, env, eval_env, config, seed, logger=None):

File ~/work/pybook/pybook/doc/libs/drlhp/utils/model/policy_gradient.py:3
      1 import numpy as np
      2 import torch
----> 3 import gym
      4 import os
      5 from .general import get_logger, export_plot

ModuleNotFoundError: No module named 'gym'
from dataclasses import dataclass

@dataclass
class EnvConfig:
    env_name: str # ["cartpole", "pendulum", "cheetah"]
    seed: int = 1
    entropy: float = 0.1 # [0.0, 0.01, 0.05, 0.1]
    synthetic: bool = False
    constant_ask: int = 1000 # [100, 1000, 10000]
    collect_initial: int = 0 # [0, 50, 200] 
    num_batches: int = 100

    def __post_init__(self):
        self.config = get_config(
            self.env_name, 
            self.seed, 
            self.entropy, 
            self.constant_ask, 
            self.collect_initial, 
            self.num_batches
        )
env_config = EnvConfig("pendulum")
if env_config.synthetic:
    env = SyntheticFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)
else:
    env = HumanFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 env_config = EnvConfig("pendulum")
      2 if env_config.synthetic:
      3     env = SyntheticFeedback(FeedbackReward(gym.make(env_config.config.env_name)), config=env_config.config)

File <string>:10, in __init__(self, env_name, seed, entropy, synthetic, constant_ask, collect_initial, num_batches)

Cell In[3], line 14, in EnvConfig.__post_init__(self)
     13 def __post_init__(self):
---> 14     self.config = get_config(
     15         self.env_name, 
     16         self.seed, 
     17         self.entropy, 
     18         self.constant_ask, 
     19         self.collect_initial, 
     20         self.num_batches
     21     )

NameError: name 'get_config' is not defined
eval_env = gym.make(env_config.config.env_name)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 eval_env = gym.make(env_config.config.env_name)

NameError: name 'env_config' is not defined
# train model
observation = eval_env.reset()
eval_env.reset()
model = PPO(env, eval_env, env_config.config, env_config.seed)
model.train()
print (f"{env.pref_db.total_labeled} preference collected total")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 2
      1 # train model
----> 2 observation = eval_env.reset()
      3 eval_env.reset()
      4 model = PPO(env, eval_env, env_config.config, env_config.seed)

NameError: name 'eval_env' is not defined
obs = np.arange(100*100*3).reshape(100, 100, 3)
np.array([np.array(obs)]).shape
(1, 100, 100, 3)