记录智能体行为#

在训练期间或评估智能体时,记录一集中的智能体行为并记录累积的总奖励可能会很有趣。这可以通过两个包装器实现:RecordEpisodeStatisticsRecordVideo,第一个跟踪剧集(Episode)数据,如总奖励、剧集长度和所用时间,第二个使用环境渲染生成智能体的 mp4 视频。

我们展示了如何为两种类型的问题应用这些包装器;第一种是记录每个剧集的数据(通常是评估),第二种是定期记录数据(用于正常训练)。

记录每个剧集#

给定训练有素的智能体,您可能希望在评估期间记录几个剧集以了解智能体的行为。下面,我们提供了示例脚本,演示如何使用 RecordEpisodeStatisticsRecordVideo 进行此操作。

import gymnasium as gym
from gymnasium.wrappers import RecordEpisodeStatistics, RecordVideo

num_eval_episodes = 4

env = gym.make("CartPole-v1", render_mode="rgb_array")  # replace with your environment
env = RecordVideo(env, video_folder="cartpole-agent", name_prefix="eval",
                  episode_trigger=lambda x: True)
env = RecordEpisodeStatistics(env, buffer_length=num_eval_episodes)

for episode_num in range(num_eval_episodes):
    obs, info = env.reset()

    episode_over = False
    while not episode_over:
        action = env.action_space.sample()  # replace with actual agent
        obs, reward, terminated, truncated, info = env.step(action)

        episode_over = terminated or truncated
env.close()

print(f'Episode time taken: {env.time_queue}')
print(f'Episode total rewards: {env.return_queue}')
print(f'Episode lengths: {env.length_queue}')
Moviepy - Building video /home/runner/work/pybook/pybook/doc/libs/gym/cartpole-agent/eval-episode-0.mp4.
Moviepy - Writing video /home/runner/work/pybook/pybook/doc/libs/gym/cartpole-agent/eval-episode-0.mp4
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 12
      9 env = RecordEpisodeStatistics(env, buffer_length=num_eval_episodes)
     11 for episode_num in range(num_eval_episodes):
---> 12     obs, info = env.reset()
     14     episode_over = False
     15     while not episode_over:

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/common.py:543, in RecordEpisodeStatistics.reset(self, seed, options)
    539 def reset(
    540     self, *, seed: int | None = None, options: dict[str, Any] | None = None
    541 ) -> tuple[ObsType, dict[str, Any]]:
    542     """Resets the environment using seed and options and resets the episode rewards and lengths."""
--> 543     obs, info = super().reset(seed=seed, options=options)
    545     self.episode_start_time = time.perf_counter()
    546     self.episode_returns = 0.0

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/core.py:328, in Wrapper.reset(self, seed, options)
    324 def reset(
    325     self, *, seed: int | None = None, options: dict[str, Any] | None = None
    326 ) -> tuple[WrapperObsType, dict[str, Any]]:
    327     """Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data."""
--> 328     return self.env.reset(seed=seed, options=options)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/rendering.py:335, in RecordVideo.reset(self, seed, options)
    332 self.episode_id += 1
    334 if self.recording and self.video_length == float("inf"):
--> 335     self.stop_recording()
    337 if self.episode_trigger and self.episode_trigger(self.episode_id):
    338     self.start_recording(f"{self.name_prefix}-episode-{self.episode_id}")

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/rendering.py:407, in RecordVideo.stop_recording(self)
    405     moviepy_logger = None if self.disable_logger else "bar"
    406     path = os.path.join(self.video_folder, f"{self._video_name}.mp4")
--> 407     clip.write_videofile(path, logger=moviepy_logger)
    409 self.recorded_frames = []
    410 self.recording = False

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:54, in requires_duration(f, clip, *a, **k)
     52     raise ValueError("Attribute 'duration' not set")
     53 else:
---> 54     return f(clip, *a, **k)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:135, in use_clip_fps_by_default(f, clip, *a, **k)
    130 new_a = [fun(arg) if (name=='fps') else arg
    131          for (arg, name) in zip(a, names)]
    132 new_kw = {k: fun(v) if k=='fps' else v
    133          for (k,v) in k.items()}
--> 135 return f(clip, *new_a, **new_kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:22, in convert_masks_to_RGB(f, clip, *a, **k)
     20 if clip.ismask:
     21     clip = clip.to_RGB()
---> 22 return f(clip, *a, **k)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/VideoClip.py:300, in VideoClip.write_videofile(self, filename, fps, codec, bitrate, audio, audio_fps, preset, audio_nbytes, audio_codec, audio_bitrate, audio_bufsize, temp_audiofile, rewrite_audio, remove_temp, write_logfile, verbose, threads, ffmpeg_params, logger)
    292 if make_audio:
    293     self.audio.write_audiofile(audiofile, audio_fps,
    294                                audio_nbytes, audio_bufsize,
    295                                audio_codec, bitrate=audio_bitrate,
    296                                write_logfile=write_logfile,
    297                                verbose=verbose,
    298                                logger=logger)
--> 300 ffmpeg_write_video(self, filename, fps, codec,
    301                    bitrate=bitrate,
    302                    preset=preset,
    303                    write_logfile=write_logfile,
    304                    audiofile=audiofile,
    305                    verbose=verbose, threads=threads,
    306                    ffmpeg_params=ffmpeg_params,
    307                    logger=logger)
    309 if remove_temp and make_audio:
    310     if os.path.exists(audiofile):

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/io/ffmpeg_writer.py:213, in ffmpeg_write_video(clip, filename, fps, codec, bitrate, preset, withmask, write_logfile, audiofile, verbose, threads, ffmpeg_params, logger)
    211     logfile = None
    212 logger(message='Moviepy - Writing video %s\n' % filename)
--> 213 with FFMPEG_VideoWriter(filename, clip.size, fps, codec = codec,
    214                             preset=preset, bitrate=bitrate, logfile=logfile,
    215                             audiofile=audiofile, threads=threads,
    216                             ffmpeg_params=ffmpeg_params) as writer:
    218     nframes = int(clip.duration*fps)
    220     for t,frame in clip.iter_frames(logger=logger, with_times=True,
    221                                     fps=fps, dtype="uint8"):

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/io/ffmpeg_writer.py:88, in FFMPEG_VideoWriter.__init__(self, filename, size, fps, codec, audiofile, preset, bitrate, withmask, logfile, threads, ffmpeg_params)
     77 self.ext = self.filename.split(".")[-1]
     79 # order is important
     80 cmd = [
     81     get_setting("FFMPEG_BINARY"),
     82     '-y',
     83     '-loglevel', 'error' if logfile == sp.PIPE else 'info',
     84     '-f', 'rawvideo',
     85     '-vcodec', 'rawvideo',
     86     '-s', '%dx%d' % (size[0], size[1]),
     87     '-pix_fmt', 'rgba' if withmask else 'rgb24',
---> 88     '-r', '%.02f' % fps,
     89     '-an', '-i', '-'
     90 ]
     91 if audiofile is not None:
     92     cmd.extend([
     93         '-i', audiofile,
     94         '-acodec', 'copy'
     95     ])

TypeError: must be real number, not NoneType

在上述脚本中,对于 RecordVideo 包装器,我们指定了三个不同的变量:video_folder 用于指定应保存视频的文件夹(根据您的问题进行更改),name_prefix 用于视频本身的前缀,最后是 episode_trigger 以便记录每个剧集。这意味着对于环境的每个剧集,都将录制一个视频并保存为“cartpole-agent/eval-episode-x.mp4”样式。

对于 RecordEpisodicStatistics,我们只需要指定缓冲区长度,这是内部 time_queuereturn_queuelength_queue 的最大长度。与其为每个剧集单独收集数据,我们可以使用数据队列在评估结束时打印信息。

为了加快评估环境的速度,可以使用向量环境同时并行评估 N 个剧集,而不是按顺序评估。

在训练期间记录智能体行为#

在训练期间,一个智能体会进行数百或数千个剧集,因此您不能为每个剧集录制视频,但开发人员可能仍然希望了解智能体在训练的不同阶段的行为,定期在训练期间记录剧集。而对于剧集统计,知道每一集的数据会更有帮助。以下脚本提供了一个例子,展示如何在记录每一集的统计数据的同时,定期记录智能体的剧集(我们使用 Python 的 logger,但 tensorboardwandb 等模块也可用)。

import logging

import gymnasium as gym
from gymnasium.wrappers import RecordEpisodeStatistics, RecordVideo

training_period = 250  # record the agent's episode every 250
num_training_episodes = 10_000  # total number of training episodes

env = gym.make("CartPole-v1", render_mode="rgb_array")  # replace with your environment
env = RecordVideo(env, video_folder="cartpole-agent", name_prefix="training",
                  episode_trigger=lambda x: x % training_period == 0)
env = RecordEpisodeStatistics(env)

for episode_num in range(num_training_episodes):
    obs, info = env.reset()

    episode_over = False
    while not episode_over:
        action = env.action_space.sample()  # replace with actual agent
        obs, reward, terminated, truncated, info = env.step(action)

        episode_over = terminated or truncated

    logging.info(f"episode-{episode_num}", info["episode"])
env.close()
Moviepy - Building video /home/runner/work/pybook/pybook/doc/libs/gym/cartpole-agent/training-episode-0.mp4.
Moviepy - Writing video /home/runner/work/pybook/pybook/doc/libs/gym/cartpole-agent/training-episode-0.mp4
/opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/rendering.py:283: UserWarning: WARN: Overwriting existing videos at /home/runner/work/pybook/pybook/doc/libs/gym/cartpole-agent folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)
  logger.warn(
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[2], line 15
     12 env = RecordEpisodeStatistics(env)
     14 for episode_num in range(num_training_episodes):
---> 15     obs, info = env.reset()
     17     episode_over = False
     18     while not episode_over:

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/common.py:543, in RecordEpisodeStatistics.reset(self, seed, options)
    539 def reset(
    540     self, *, seed: int | None = None, options: dict[str, Any] | None = None
    541 ) -> tuple[ObsType, dict[str, Any]]:
    542     """Resets the environment using seed and options and resets the episode rewards and lengths."""
--> 543     obs, info = super().reset(seed=seed, options=options)
    545     self.episode_start_time = time.perf_counter()
    546     self.episode_returns = 0.0

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/core.py:328, in Wrapper.reset(self, seed, options)
    324 def reset(
    325     self, *, seed: int | None = None, options: dict[str, Any] | None = None
    326 ) -> tuple[WrapperObsType, dict[str, Any]]:
    327     """Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data."""
--> 328     return self.env.reset(seed=seed, options=options)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/rendering.py:335, in RecordVideo.reset(self, seed, options)
    332 self.episode_id += 1
    334 if self.recording and self.video_length == float("inf"):
--> 335     self.stop_recording()
    337 if self.episode_trigger and self.episode_trigger(self.episode_id):
    338     self.start_recording(f"{self.name_prefix}-episode-{self.episode_id}")

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/gymnasium/wrappers/rendering.py:407, in RecordVideo.stop_recording(self)
    405     moviepy_logger = None if self.disable_logger else "bar"
    406     path = os.path.join(self.video_folder, f"{self._video_name}.mp4")
--> 407     clip.write_videofile(path, logger=moviepy_logger)
    409 self.recorded_frames = []
    410 self.recording = False

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:54, in requires_duration(f, clip, *a, **k)
     52     raise ValueError("Attribute 'duration' not set")
     53 else:
---> 54     return f(clip, *a, **k)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:135, in use_clip_fps_by_default(f, clip, *a, **k)
    130 new_a = [fun(arg) if (name=='fps') else arg
    131          for (arg, name) in zip(a, names)]
    132 new_kw = {k: fun(v) if k=='fps' else v
    133          for (k,v) in k.items()}
--> 135 return f(clip, *new_a, **new_kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/decorators.py:22, in convert_masks_to_RGB(f, clip, *a, **k)
     20 if clip.ismask:
     21     clip = clip.to_RGB()
---> 22 return f(clip, *a, **k)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/VideoClip.py:300, in VideoClip.write_videofile(self, filename, fps, codec, bitrate, audio, audio_fps, preset, audio_nbytes, audio_codec, audio_bitrate, audio_bufsize, temp_audiofile, rewrite_audio, remove_temp, write_logfile, verbose, threads, ffmpeg_params, logger)
    292 if make_audio:
    293     self.audio.write_audiofile(audiofile, audio_fps,
    294                                audio_nbytes, audio_bufsize,
    295                                audio_codec, bitrate=audio_bitrate,
    296                                write_logfile=write_logfile,
    297                                verbose=verbose,
    298                                logger=logger)
--> 300 ffmpeg_write_video(self, filename, fps, codec,
    301                    bitrate=bitrate,
    302                    preset=preset,
    303                    write_logfile=write_logfile,
    304                    audiofile=audiofile,
    305                    verbose=verbose, threads=threads,
    306                    ffmpeg_params=ffmpeg_params,
    307                    logger=logger)
    309 if remove_temp and make_audio:
    310     if os.path.exists(audiofile):

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/io/ffmpeg_writer.py:213, in ffmpeg_write_video(clip, filename, fps, codec, bitrate, preset, withmask, write_logfile, audiofile, verbose, threads, ffmpeg_params, logger)
    211     logfile = None
    212 logger(message='Moviepy - Writing video %s\n' % filename)
--> 213 with FFMPEG_VideoWriter(filename, clip.size, fps, codec = codec,
    214                             preset=preset, bitrate=bitrate, logfile=logfile,
    215                             audiofile=audiofile, threads=threads,
    216                             ffmpeg_params=ffmpeg_params) as writer:
    218     nframes = int(clip.duration*fps)
    220     for t,frame in clip.iter_frames(logger=logger, with_times=True,
    221                                     fps=fps, dtype="uint8"):

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/moviepy/video/io/ffmpeg_writer.py:88, in FFMPEG_VideoWriter.__init__(self, filename, size, fps, codec, audiofile, preset, bitrate, withmask, logfile, threads, ffmpeg_params)
     77 self.ext = self.filename.split(".")[-1]
     79 # order is important
     80 cmd = [
     81     get_setting("FFMPEG_BINARY"),
     82     '-y',
     83     '-loglevel', 'error' if logfile == sp.PIPE else 'info',
     84     '-f', 'rawvideo',
     85     '-vcodec', 'rawvideo',
     86     '-s', '%dx%d' % (size[0], size[1]),
     87     '-pix_fmt', 'rgba' if withmask else 'rgb24',
---> 88     '-r', '%.02f' % fps,
     89     '-an', '-i', '-'
     90 ]
     91 if audiofile is not None:
     92     cmd.extend([
     93         '-i', audiofile,
     94         '-acodec', 'copy'
     95     ])

TypeError: must be real number, not NoneType