drlhp.reward_predictor.RewardPredictorNetwork 测试

drlhp.reward_predictor.RewardPredictorNetwork 测试#

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 设置日志级别为ERROR,以减少警告信息
# 禁用 Gemini 的底层库(gRPC 和 Abseil)在初始化日志警告
os.environ["GRPC_VERBOSITY"] = "ERROR"
os.environ["GLOG_minloglevel"] = "3"  # 0: INFO, 1: WARNING, 2: ERROR, 3: FATAL
os.environ["GLOG_minloglevel"] = "true"
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
!export TF_FORCE_GPU_ALLOW_GROWTH=true
import logging
import sys
from pathlib import Path
from d2py.utils.log_config import config_logging

root_dir = Path(".").resolve()
temp_dir = root_dir/".temp"
temp_dir.mkdir(parents=True, exist_ok=True)
sys.path.extend([str(root_dir.parents[2]/"tests/gym-multigrid")])
temp_dir = root_dir/"images"

logger_name = "drlhp"
logger = logging.getLogger(logger_name)
config_logging(f'{temp_dir}/{logger_name}.log', logger_name, maxBytes=50000, backupCount=2)
from dataclasses import dataclass
from typing import Any
import tensorflow as tf
import tensorflow.experimental.numpy as tnp
tnp.experimental_enable_numpy_behavior()
# import os.path as osp
import time
import logging
import numpy as np
from numpy.testing import assert_equal

from drlhp.utils import RunningStat, batch_iter
from drlhp.reward_predictor_core_network import RewardPredictorNetwork, net_cnn
# batchnorm = False
# dropout = 0.5
# training = True
# reuse = False
# model = Model(batchnorm, dropout, training, reuse)
# x = tnp.arange(36, dtype="float32").reshape((1, 3, 4, 3))
# y = model(x)
# h, w, c = 10, 20, 3
# batchnorm = True
# dropout = 0.5
# inputs = tf.keras.Input(shape=(None, h, w, c))
# outputs = head(batchnorm, dropout)(inputs)
# model = tf.keras.Model(inputs=inputs, outputs=outputs)
import tensorflow as tf

model = RewardPredictorNetwork(net_cnn)
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
x = tf.random.uniform((4, 7, 20, 30, 3))
s1 = x
s2 = x
r1, r2, rs1, rs2, pred = model(s1, s2)
model.summary()
import torch